summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-10-20 15:52:28 -0600
committerRocky Madden <git@rockymadden.com>2012-10-20 15:52:28 -0600
commit89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47 (patch)
tree8b3cbbb760701d1d84dc2fa3c640036bf5c1b796 /core
parente5888541fc1c76d73c159a1380fc33e3c3d9e2ce (diff)
downloadstringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.tar.gz
stringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.tar.bz2
stringmetric-89d8b433959af9d5ea8c8a9379c4c7a8d7b0dd47.zip
Better handling for zero length arrays, remove need for return keywords, and more consistent application of equality checks on string compare methods.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala22
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala36
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala13
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala19
4 files changed, 43 insertions, 47 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala
index 657c818..f40062e 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/distance/HammingMetric.scala
@@ -8,24 +8,24 @@ object HammingMetric extends StringMetric {
val ca1 = stringCleaner.clean(charArray1)
val ca2 = stringCleaner.clean(charArray2)
- if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length)
- None
- else
- Some(hamming(ca1, ca2))
+ if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None
+ else Some(hamming(ca1, ca2))
}
override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = {
- compare(
- stringCleaner.clean(string1.toCharArray),
- stringCleaner.clean(string2.toCharArray)
- )(new StringCleanerDelegate)
+ if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(0)
+ else
+ compare(
+ stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
private[this] def hamming(ct: CompareTuple[Char]) = {
- require(ct._1.length > 0)
- require(ct._2.length > 0)
require(ct._1.length == ct._2.length)
- ct._1.zip(ct._2).count(t => t._1 != t._2)
+ if (ct._1.length == 0) 0
+ else
+ ct._1.zip(ct._2).count(t => t._1 != t._2)
}
} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala
index a77811b..5da024e 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroMetric.scala
@@ -13,30 +13,28 @@ object JaroMetric extends StringMetric {
val ca1 = stringCleaner.clean(charArray1)
val ca2 = stringCleaner.clean(charArray2)
- // Return None if either character array lacks length.
- if (ca1.length == 0 || ca2.length == 0) return None
-
- val mt = `match`((ca1, ca2))
- val ms = scoreMatches((mt._1, mt._2))
- val ts = scoreTranspositions((mt._1, mt._2))
-
- // Return 0 if matches score is 0.
- if (ms == 0) return Some(0f)
-
- Some(((ms.toFloat / ca1.length) + (ms.toFloat / ca2.length) + ((ms.toFloat - ts) / ms)) / 3)
+ if (ca1.length == 0 || ca2.length == 0) None
+ else {
+ val mt = `match`((ca1, ca2))
+ val ms = scoreMatches((mt._1, mt._2))
+ val ts = scoreTranspositions((mt._1, mt._2))
+
+ if (ms == 0) Some(0f)
+ else
+ Some(((ms.toFloat / ca1.length) + (ms.toFloat / ca2.length) + ((ms.toFloat - ts) / ms)) / 3)
+ }
}
override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Float] = {
- // Return 1 if strings are an exact match.
- if (string1.length > 0 && string1 == string2) return Some(1f)
-
- compare(
- stringCleaner.clean(string1.toCharArray),
- stringCleaner.clean(string2.toCharArray)
- )(new StringCleanerDelegate)
+ if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(1f)
+ else
+ compare(
+ stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
- private[this] def `match`(ct: CompareTuple[Char]) = {
+ private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = {
val window = math.abs((math.max(ct._1.length, ct._2.length) / 2f).floor.toInt - 1)
val one = ArrayBuffer.empty[Int]
val two = HashSet.empty[Int]
diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala
index 568a69e..a8ae494 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/distance/JaroWinklerMetric.scala
@@ -23,12 +23,11 @@ object JaroWinklerMetric extends StringMetric {
}
override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Float] = {
- // Return 1 if strings are an exact match.
- if (string1.length > 0 && string1 == string2) return Some(1f)
-
- compare(
- stringCleaner.clean(string1.toCharArray),
- stringCleaner.clean(string2.toCharArray)
- )(new StringCleanerDelegate)
+ if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(1f)
+ else
+ compare(
+ stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala
index ade1178..4c58ff1 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/distance/LevenshteinMetric.scala
@@ -8,8 +8,7 @@ object LevenshteinMetric extends StringMetric {
val ca1 = stringCleaner.clean(charArray1)
val ca2 = stringCleaner.clean(charArray2)
- if (ca1.length == 0 && ca2.length == 0)
- None
+ if (ca1.length == 0 && ca2.length == 0) None
else {
val levenshteinMemoize = Memoize.Y(levenshtein)
@@ -18,17 +17,17 @@ object LevenshteinMetric extends StringMetric {
}
override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Int] = {
- compare(
- stringCleaner.clean(string1.toCharArray),
- stringCleaner.clean(string2.toCharArray)
- )(new StringCleanerDelegate)
+ if (string1.length > 0 && string1.length == string2.length && string1 == string2) Some(0)
+ else
+ compare(
+ stringCleaner.clean(string1.toCharArray),
+ stringCleaner.clean(string2.toCharArray)
+ )(new StringCleanerDelegate)
}
private[this] def levenshtein(f: CompareTuple[Char] => Int)(ct: CompareTuple[Char]): Int = {
- if (ct._1.length == 0)
- ct._2.length
- else if (ct._2.length == 0)
- ct._1.length
+ if (ct._1.length == 0) ct._2.length
+ else if (ct._2.length == 0) ct._1.length
else {
math.min(
math.min(