summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala56
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala6
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala6
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala6
-rwxr-xr-xcli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala39
-rwxr-xr-xcli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala2
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala25
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala4
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala4
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala25
-rwxr-xr-xreadme.md1
11 files changed, 168 insertions, 6 deletions
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala
new file mode 100755
index 0000000..9e8dcfc
--- /dev/null
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/hammingMetric.scala
@@ -0,0 +1,56 @@
+package org.hashtree.stringmetric.cli.command
+
+import org.hashtree.stringmetric.{ CaseStringCleaner, HammingMetric, StringCleanerDelegate }
+import org.hashtree.stringmetric.cli._
+import org.hashtree.stringmetric.cli.command._
+
+/**
+ * The hammingMetric [[org.hashtree.stringmetric.cli.command.Command]]. Compares the number of characters that two equal
+ * length strings are different from one another.
+ */
+object hammingMetric extends Command {
+ override def main(args: Array[String]): Unit = {
+ val options = OptionMapUtility.toOptionMap(args)
+
+ try {
+ // Help.
+ if (options.contains('h) || options.contains('help)) {
+ help()
+ exit(options)
+ // Execute.
+ } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) {
+ execute(options)
+ exit(options)
+ // Invalid syntax.
+ } else {
+ throw new IllegalArgumentException("Expected valid syntax. See --help.")
+ }
+ } catch {
+ case e => error(e)(options)
+ }
+ }
+
+ override def help(): Unit = {
+ val ls = sys.props("line.separator")
+ val tab = " "
+
+ println(
+ "Compares the number of characters that two equal length strings are different from one another." + ls + ls +
+ "Syntax:" + ls +
+ tab + "hammingMetric [Options] string1 string2..." + ls + ls +
+ "Options:" + ls +
+ tab + "-h, --help" + ls +
+ tab + tab + "Outputs description, syntax, and options."
+ )
+ }
+
+ override def execute(options: OptionMap): Unit = {
+ val strings = options('dashless).split(" ")
+
+ println(
+ HammingMetric.compare(strings(0),
+ strings(1))(new StringCleanerDelegate with CaseStringCleaner
+ ).getOrElse("not comparable").toString
+ )
+ }
+} \ No newline at end of file
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala
index 874f3aa..51a4958 100755
--- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroMetric.scala
@@ -47,6 +47,10 @@ object jaroMetric extends Command {
override def execute(options: OptionMap): Unit = {
val strings = options('dashless).split(" ")
- println(JaroMetric.compare(strings(0), strings(1))(new StringCleanerDelegate with CaseStringCleaner).getOrElse("0.0").toString)
+ println(
+ JaroMetric.compare(strings(0),
+ strings(1))(new StringCleanerDelegate with CaseStringCleaner
+ ).getOrElse("not comparable").toString
+ )
}
} \ No newline at end of file
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala
index ea1b8a6..af633ae 100755
--- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala
@@ -47,6 +47,10 @@ object jaroWinklerMetric extends Command {
override def execute(options: OptionMap): Unit = {
val strings = options('dashless).split(" ")
- println(JaroWinklerMetric.compare(strings(0), strings(1))(new StringCleanerDelegate with CaseStringCleaner).getOrElse("0.0").toString)
+ println(
+ JaroWinklerMetric.compare(strings(0),
+ strings(1))(new StringCleanerDelegate with CaseStringCleaner
+ ).getOrElse("not comparable").toString
+ )
}
} \ No newline at end of file
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala
index 5a44395..f0b204e 100755
--- a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala
@@ -47,6 +47,10 @@ object soundexMetric extends Command {
override def execute(options: OptionMap): Unit = {
val strings = options('dashless).split(" ")
- println(SoundexMetric.compare(strings(0), strings(1))(new StringCleanerDelegate).getOrElse("false").toString)
+ println(
+ SoundexMetric.compare(strings(0),
+ strings(1))(new StringCleanerDelegate
+ ).getOrElse("not comparable").toString
+ )
}
} \ No newline at end of file
diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala
new file mode 100755
index 0000000..f140505
--- /dev/null
+++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/hammingMetricSpec.scala
@@ -0,0 +1,39 @@
+package org.hashtree.stringmetric.cli.command
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class hammingMetricSpec extends ScalaTest {
+ "hammingMetric" should provide {
+ "main method" when passed {
+ "valid dashless arguments" should executes {
+ "print if they are a match" in {
+ val out = new java.io.ByteArrayOutputStream()
+
+ Console.withOut(out)(
+ hammingMetric.main(Array("--unitTest", "--debug", "aBc", "abc"))
+ )
+
+ out.toString should equal ("0\n")
+ out.reset()
+
+ Console.withOut(out)(
+ hammingMetric.main(Array("--unitTest", "--debug", "aBc", "xyz"))
+ )
+
+ out.toString should equal ("3\n")
+ out.reset()
+ }
+ }
+ "no dashless arguments" should throws {
+ "IllegalArgumentException" in {
+ evaluating {
+ hammingMetric.main(Array("--unitTest", "--debug"))
+ } should produce [IllegalArgumentException]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala
index 80a0a50..4fba289 100755
--- a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala
+++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala
@@ -30,7 +30,7 @@ final class soundexMetricSpec extends ScalaTest {
soundexMetric.main(Array("--unitTest", "--debug", "1", "1"))
)
- out.toString should equal ("false\n")
+ out.toString should equal ("not comparable\n")
out.reset()
}
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala
new file mode 100755
index 0000000..67fd3e5
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala
@@ -0,0 +1,25 @@
+package org.hashtree.stringmetric
+
+/** An implementation of the Hamming [[org.hashtree.stringmetric.StringMetric]]. */
+object HammingMetric extends StringMetric {
+ override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Int] = {
+ if (charArray1.length == 0 || charArray2.length == 0 || charArray2.length != charArray2.length)
+ None
+ else
+ Some(hamming(stringCleaner.clean(charArray1), stringCleaner.clean(charArray2)))
+ }
+
+ override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = {
+ compare(stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
+ }
+
+ private[this] def hamming(ca1: Array[Char], ca2: Array[Char]) = {
+ require(ca1.length > 0)
+ require(ca2.length > 0)
+ require(ca1.length == ca2.length)
+
+ ca1.zip(ca2).count(t => t._1 != t._2)
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
index bd2b468..fb9a3e4 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
@@ -30,7 +30,9 @@ object JaroMetric extends StringMetric {
// Return 1 if strings are an exact match.
if (string1.length > 0 && string1 == string2) return Some(1f)
- compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate)
+ compare(stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
private[this] def `match`(ct: CompareTuple) = {
diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
index 892da04..8292c55 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
@@ -24,6 +24,8 @@ object JaroWinklerMetric extends StringMetric {
// Return 1 if strings are an exact match.
if (string1.length > 0 && string1 == string2) return Some(1f)
- compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate)
+ compare(stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala
new file mode 100755
index 0000000..641bbde
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala
@@ -0,0 +1,25 @@
+package org.hashtree.stringmetric
+
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class HammingMetricSpec extends ScalaTest {
+ "HammingMetric" should provide {
+ "compare method" when passed {
+ "valid arguments" should returns {
+ "Int indicating distance" in {
+ HammingMetric.compare("", "").isDefined should be (false)
+ HammingMetric.compare("abc", "").isDefined should be (false)
+ HammingMetric.compare("", "xyz").isDefined should be (false)
+
+ HammingMetric.compare("abc", "abc").get should be (0)
+ HammingMetric.compare("abc", "xyz").get should be (3)
+ HammingMetric.compare("toned", "roses").get should be (3)
+ HammingMetric.compare("1011101", "1001001").get should be (2)
+ HammingMetric.compare("2173896", "2233796").get should be (3)
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/readme.md b/readme.md
index 13eb075..78bfe34 100755
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,7 @@
#stringmetric
A collection of string metrics implemented in Scala. Includes a light-weight core API and CLI for each string metric. The following string metrics are currently supported:
+* Hamming
* Jaro
* Jaro-Winkler
* Soundex