diff options
author | Rocky Madden <git@rockymadden.com> | 2012-10-16 17:07:48 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-10-16 17:07:48 -0600 |
commit | 4379844e4b8a4fd3f5723cd6af869cf157d98fc9 (patch) | |
tree | 3af9c101559e4965db018909ba8509dddf352b4a /core | |
parent | 61f2ddf07826d3b278eb17d31ffbb5bfdbdc56ca (diff) | |
download | stringmetric-4379844e4b8a4fd3f5723cd6af869cf157d98fc9.tar.gz stringmetric-4379844e4b8a4fd3f5723cd6af869cf157d98fc9.tar.bz2 stringmetric-4379844e4b8a4fd3f5723cd6af869cf157d98fc9.zip |
Character array cleaning now done prior to length and equality evaluations.
Diffstat (limited to 'core')
4 files changed, 21 insertions, 10 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala index 687ffba..85795b6 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/HammingMetric.scala @@ -5,14 +5,18 @@ object HammingMetric extends StringMetric { implicit val stringCleaner = new StringCleanerDelegate with CaseStringCleaner override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Int] = { - if (charArray1.length == 0 || charArray2.length == 0 || charArray2.length != charArray2.length) + val ca1 = stringCleaner.clean(charArray1) + val ca2 = stringCleaner.clean(charArray2) + + if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None else - Some(hamming(stringCleaner.clean(charArray1), stringCleaner.clean(charArray2))) + Some(hamming(ca1, ca2)) } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = { - compare(stringCleaner.clean(string1.toCharArray), + compare( + stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray) )(new StringCleanerDelegate) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala index 4db130c..7af4933 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala @@ -32,7 +32,8 @@ object JaroMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), + compare( + stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray) )(new StringCleanerDelegate) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala index 8fc554c..85a0736 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala @@ -26,7 +26,8 @@ object JaroWinklerMetric extends StringMetric { // Return 1 if strings are an exact match. if (string1.length > 0 && string1 == string2) return Some(1f) - compare(stringCleaner.clean(string1.toCharArray), + compare( + stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray) )(new StringCleanerDelegate) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala index 714f2f7..3c3ee5b 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala @@ -7,14 +7,17 @@ object SoundexMetric extends StringMetric { implicit val stringCleaner = new StringCleanerDelegate override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Boolean] = { - val se1 = if (charArray1.length > 0) soundex(stringCleaner.clean(charArray1)) else None - val se2 = if (charArray2.length > 0) soundex(stringCleaner.clean(charArray2)) else None + val ca1 = stringCleaner.clean(charArray1) + val ca2 = stringCleaner.clean(charArray2) + val se1 = if (ca1.length > 0) soundex(ca1) else None + val se2 = if (ca2.length > 0) soundex(ca2) else None if (!se1.isDefined || !se2.isDefined) None else Some(se1.get == se2.get) } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Boolean] = { - compare(stringCleaner.clean(string1.toCharArray), + compare( + stringCleaner.clean(string1.toCharArray), stringCleaner.clean(string2.toCharArray) )(new StringCleanerDelegate) } @@ -61,7 +64,8 @@ object SoundexMetric extends StringMetric { case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) // Code once. case _ => { - m1(c, + m1( + c, o.last match { case '1' | '2' | '3' | '4' | '5' | '6' => o.last case _ => m2(o.last) @@ -81,7 +85,8 @@ object SoundexMetric extends StringMetric { if (ca.length - 1 == l._2) Some(l._1 + "000") else { Some( - code(ca.takeRight(ca.length - (l._2 + 1)), + code( + ca.takeRight(ca.length - (l._2 + 1)), l._1, // Pass first letter. Array(l._1) // Pass array with first letter. ).mkString.padTo(4, '0') |