diff options
author | Rocky Madden <git@rockymadden.com> | 2012-10-20 15:52:28 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-10-20 15:52:28 -0600 |
commit | 89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47 (patch) | |
tree | 8b3cbbb760701d1d84dc2fa3c640036bf5c1b796 /core | |
parent | e5888541fc1c76d73c159a1380fc33e3c3d9e2ce (diff) | |
download | stringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.tar.gz stringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.tar.bz2 stringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.zip |
Better handling for zero length arrays, remove need for return keywords, and more consistent application of equality checks on string compare methods.
Diffstat (limited to 'core')
4 files changed, 43 insertions, 47 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala index 657c818..f40062e 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala @@ -8,24 +8,24 @@ object HammingMetric extends StringMetric { val ca1 = stringCleaner.clean(charArray1) val ca2 = stringCleaner.clean(charArray2) - if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) - None - else - Some(hamming(ca1, ca2)) + if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None + else Some(hamming(ca1, ca2)) } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = { - compare( - stringCleaner.clean(string1.toCharArray), - stringCleaner.clean(string2.toCharArray) - )(new StringCleanerDelegate) + if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(0) + else + compare( + stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } private[this] def hamming(ct: CompareTuple[Char]) = { - require(ct._1.length > 0) - require(ct._2.length > 0) require(ct._1.length == ct._2.length) - ct._1.zip(ct._2).count(t => t._1 != t._2) + if (ct._1.length == 0) 0 + else + ct._1.zip(ct._2).count(t => t._1 != t._2) } }
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala index a77811b..5da024e 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala @@ -13,30 +13,28 @@ object JaroMetric extends StringMetric { val ca1 = stringCleaner.clean(charArray1) val ca2 = stringCleaner.clean(charArray2) - // Return None if either character array lacks length. - if (ca1.length == 0 || ca2.length == 0) return None - - val mt = `match`((ca1, ca2)) - val ms = scoreMatches((mt._1, mt._2)) - val ts = scoreTranspositions((mt._1, mt._2)) - - // Return 0 if matches score is 0. - if (ms == 0) return Some(0f) - - Some(((ms.toFloat / ca1.length) + (ms.toFloat / ca2.length) + ((ms.toFloat - ts) / ms)) / 3) + if (ca1.length == 0 || ca2.length == 0) None + else { + val mt = `match`((ca1, ca2)) + val ms = scoreMatches((mt._1, mt._2)) + val ts = scoreTranspositions((mt._1, mt._2)) + + if (ms == 0) Some(0f) + else + Some(((ms.toFloat / ca1.length) + (ms.toFloat / ca2.length) + ((ms.toFloat - ts) / ms)) / 3) + } } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Float] = { - // Return 1 if strings are an exact match. - if (string1.length > 0 && string1 == string2) return Some(1f) - - compare( - stringCleaner.clean(string1.toCharArray), - stringCleaner.clean(string2.toCharArray) - )(new StringCleanerDelegate) + if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(1f) + else + compare( + stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } - private[this] def `match`(ct: CompareTuple[Char]) = { + private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = { val window = math.abs((math.max(ct._1.length, ct._2.length) / 2f).floor.toInt - 1) val one = ArrayBuffer.empty[Int] val two = HashSet.empty[Int] diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala index 568a69e..a8ae494 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala @@ -23,12 +23,11 @@ object JaroWinklerMetric extends StringMetric { } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Float] = { - // Return 1 if strings are an exact match. - if (string1.length > 0 && string1 == string2) return Some(1f) - - compare( - stringCleaner.clean(string1.toCharArray), - stringCleaner.clean(string2.toCharArray) - )(new StringCleanerDelegate) + if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(1f) + else + compare( + stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } }
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala index ade1178..4c58ff1 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala @@ -8,8 +8,7 @@ object LevenshteinMetric extends StringMetric { val ca1 = stringCleaner.clean(charArray1) val ca2 = stringCleaner.clean(charArray2) - if (ca1.length == 0 && ca2.length == 0) - None + if (ca1.length == 0 && ca2.length == 0) None else { val levenshteinMemoize = Memoize.Y(levenshtein) @@ -18,17 +17,17 @@ object LevenshteinMetric extends StringMetric { } override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = { - compare( - stringCleaner.clean(string1.toCharArray), - stringCleaner.clean(string2.toCharArray) - )(new StringCleanerDelegate) + if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(0) + else + compare( + stringCleaner.clean(string1.toCharArray), + stringCleaner.clean(string2.toCharArray) + )(new StringCleanerDelegate) } private[this] def levenshtein(f: CompareTuple[Char] => Int)(ct: CompareTuple[Char]): Int = { - if (ct._1.length == 0) - ct._2.length - else if (ct._2.length == 0) - ct._1.length + if (ct._1.length == 0) ct._2.length + else if (ct._2.length == 0) ct._1.length else { math.min( math.min( |