diff options
11 files changed, 168 insertions, 6 deletions
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala new file mode 100755 index 0000000..9e8dcfc --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala @@ -0,0 +1,56 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.{ CaseStringCleaner, HammingMetric, StringCleanerDelegate } +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.cli.command._ + +/** + * The hammingMetric [[org.hashtree.stringmetric.cli.command.Command]]. Compares the number of characters that two equal + * length strings are different from one another. + */ +object hammingMetric extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) { + execute(options) + exit(options) + // Invalid syntax. + } else { + throw new IllegalArgumentException("Expected valid syntax. See --help.") + } + } catch { + case e => error(e)(options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Compares the number of characters that two equal length strings are different from one another." + ls + ls + + "Syntax:" + ls + + tab + "hammingMetric [Options] string1 string2..." + ls + ls + + "Options:" + ls + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + ) + } + + override def execute(options: OptionMap): Unit = { + val strings = options('dashless).split(" ") + + println( + HammingMetric.compare(strings(0), + strings(1))(new StringCleanerDelegate with CaseStringCleaner + ).getOrElse("not comparable").toString + ) + } +}
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala index 874f3aa..51a4958 100755 --- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala @@ -47,6 +47,10 @@ object jaroMetric extends Command { override def execute(options: OptionMap): Unit = { val strings = options('dashless).split(" ") - println(JaroMetric.compare(strings(0), strings(1))(new StringCleanerDelegate with CaseStringCleaner).getOrElse("0.0").toString) + println( + JaroMetric.compare(strings(0), + strings(1))(new StringCleanerDelegate with CaseStringCleaner + ).getOrElse("not comparable").toString + ) } }
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala index ea1b8a6..af633ae 100755 --- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala @@ -47,6 +47,10 @@ object jaroWinklerMetric extends Command { override def execute(options: OptionMap): Unit = { val strings = options('dashless).split(" ") - println(JaroWinklerMetric.compare(strings(0), strings(1))(new StringCleanerDelegate with CaseStringCleaner).getOrElse("0.0").toString) + println( + JaroWinklerMetric.compare(strings(0), + strings(1))(new StringCleanerDelegate with CaseStringCleaner + ).getOrElse("not comparable").toString + ) } }
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala index 5a44395..f0b204e 100755 --- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala @@ -47,6 +47,10 @@ object soundexMetric extends Command { override def execute(options: OptionMap): Unit = { val strings = options('dashless).split(" ") - println(SoundexMetric.compare(strings(0), strings(1))(new StringCleanerDelegate).getOrElse("false").toString) + println( + SoundexMetric.compare(strings(0), + strings(1))(new StringCleanerDelegate + ).getOrElse("not comparable").toString + ) } }
\ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala new file mode 100755 index 0000000..f140505 --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala @@ -0,0 +1,39 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class hammingMetricSpec extends ScalaTest { + "hammingMetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + hammingMetric.main(Array("--unitTest", "--debug", "aBc", "abc")) + ) + + out.toString should equal ("0\n") + out.reset() + + Console.withOut(out)( + hammingMetric.main(Array("--unitTest", "--debug", "aBc", "xyz")) + ) + + out.toString should equal ("3\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + hammingMetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +}
\ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala index 80a0a50..4fba289 100755 --- a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala @@ -30,7 +30,7 @@ final class soundexMetricSpec extends ScalaTest { soundexMetric.main(Array("--unitTest", "--debug", "1", "1")) ) - out.toString should equal ("false\n") + out.toString should equal ("not comparable\n") out.reset() } } diff --git a/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala new file mode 100755 index 0000000..67fd3e5 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala @@ -0,0 +1,25 @@ +package org.hashtree.stringmetric + +/** An implementation of the Hamming [[org.hashtree.stringmetric.StringMetric]]. */ +object HammingMetric extends StringMetric { + override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Int] = { + if (charArray1.length == 0 || charArray2.length == 0 || charArray2.length != charArray2.length) + None + else + Some(hamming(stringCleaner.clean(charArray1), stringCleaner.clean(charArray2))) + } + + override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = { + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) + } + + private[this] def hamming(ca1: Array[Char], ca2: Array[Char]) = { + require(ca1.length > 0) + require(ca2.length > 0) + require(ca1.length == ca2.length) + + ca1.zip(ca2).count(t => t._1 != t._2) + } +}
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala index bd2b468..fb9a3e4 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala @@ -30,7 +30,9 @@ object JaroMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate) + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } private[this] def `match`(ct: CompareTuple) = { diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala index 892da04..8292c55 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala @@ -24,6 +24,8 @@ object JaroWinklerMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate) + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } }
\ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala new file mode 100755 index 0000000..641bbde --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala @@ -0,0 +1,25 @@ +package org.hashtree.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class HammingMetricSpec extends ScalaTest { + "HammingMetric" should provide { + "compare method" when passed { + "valid arguments" should returns { + "Int indicating distance" in { + HammingMetric.compare("", "").isDefined should be (false) + HammingMetric.compare("abc", "").isDefined should be (false) + HammingMetric.compare("", "xyz").isDefined should be (false) + + HammingMetric.compare("abc", "abc").get should be (0) + HammingMetric.compare("abc", "xyz").get should be (3) + HammingMetric.compare("toned", "roses").get should be (3) + HammingMetric.compare("1011101", "1001001").get should be (2) + HammingMetric.compare("2173896", "2233796").get should be (3) + } + } + } + } +}
\ No newline at end of file @@ -1,6 +1,7 @@ #stringmetric A collection of string metrics implemented in Scala. Includes a light-weight core API and CLI for each string metric. The following string metrics are currently supported: +* Hamming * Jaro * Jaro-Winkler * Soundex |