From 0db51ce1fe68371efb9010810ce93d93f48c50d0 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Mon, 15 Oct 2012 21:53:21 -0600 Subject: Created HammingMetric, spec, and command. --- .../org/hashtree/stringmetric/HammingMetric.scala | 25 ++++++++++++++++++++++ .../org/hashtree/stringmetric/JaroMetric.scala | 4 +++- .../hashtree/stringmetric/JaroWinklerMetric.scala | 4 +++- .../hashtree/stringmetric/HammingMetricSpec.scala | 25 ++++++++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) create mode 100755 core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala create mode 100755 core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala (limited to 'core') diff --git a/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala new file mode 100755 index 0000000..67fd3e5 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala @@ -0,0 +1,25 @@ +package org.hashtree.stringmetric + +/** An implementation of the Hamming [[org.hashtree.stringmetric.StringMetric]]. */ +object HammingMetric extends StringMetric { + override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Int] = { + if (charArray1.length == 0 || charArray2.length == 0 || charArray2.length != charArray2.length) + None + else + Some(hamming(stringCleaner.clean(charArray1), stringCleaner.clean(charArray2))) + } + + override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = { + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) + } + + private[this] def hamming(ca1: Array[Char], ca2: Array[Char]) = { + require(ca1.length > 0) + require(ca2.length > 0) + require(ca1.length == ca2.length) + + ca1.zip(ca2).count(t => t._1 != t._2) + } +} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala index bd2b468..fb9a3e4 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala @@ -30,7 +30,9 @@ object JaroMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate) + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } private[this] def `match`(ct: CompareTuple) = { diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala index 892da04..8292c55 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala @@ -24,6 +24,8 @@ object JaroWinklerMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray))(new StringCleanerDelegate) + compare(stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } } \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala new file mode 100755 index 0000000..641bbde --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/HammingMetricSpec.scala @@ -0,0 +1,25 @@ +package org.hashtree.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class HammingMetricSpec extends ScalaTest { + "HammingMetric" should provide { + "compare method" when passed { + "valid arguments" should returns { + "Int indicating distance" in { + HammingMetric.compare("", "").isDefined should be (false) + HammingMetric.compare("abc", "").isDefined should be (false) + HammingMetric.compare("", "xyz").isDefined should be (false) + + HammingMetric.compare("abc", "abc").get should be (0) + HammingMetric.compare("abc", "xyz").get should be (3) + HammingMetric.compare("toned", "roses").get should be (3) + HammingMetric.compare("1011101", "1001001").get should be (2) + HammingMetric.compare("2173896", "2233796").get should be (3) + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3