summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-10-07 03:00:06 -0600
committerRocky Madden <git@rockymadden.com>2012-10-07 03:00:06 -0600
commitef31d1cc8391803d4a9991bbbbf91b45d1add14c (patch)
tree497d0e7c28bfb6876b6d02be0222f61a1a7399ec /core
parent7530433968effe6a4f5b20898ded7290db9f995f (diff)
downloadstringmetric-ef31d1cc8391803d4a9991bbbbf91b45d1add14c.tar.gz
stringmetric-ef31d1cc8391803d4a9991bbbbf91b45d1add14c.tar.bz2
stringmetric-ef31d1cc8391803d4a9991bbbbf91b45d1add14c.zip
Refactored StringMetric trait to force implementors to provide a compare method which accepts character arrays. Character array arguments passed to compare method are assumed to already be clean. This allowed for the removal of duplicate calls to string cleaning methods.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala20
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala13
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/StringMetric.scala2
3 files changed, 20 insertions, 15 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
index bd5f850..bcea174 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/JaroMetric.scala
@@ -9,21 +9,23 @@ import scala.util.control.Breaks.{ break, breakable }
* matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios.
*/
object JaroMetric extends StringMetric {
- override def compare(string1: String, string2: String): Float = {
- val ca1 = string1.replaceAllLiterally(" ", "").toLowerCase.toCharArray
- val ca2 = string2.replaceAllLiterally(" ", "").toLowerCase.toCharArray
-
+ override def compare(charArray1: Array[Char], charArray2: Array[Char]): Float = {
// Return 0 if either character array lacks length.
- if (ca1.length == 0 || ca2.length == 0) return 0f
+ if (charArray1.length == 0 || charArray2.length == 0) return 0f
- val mt = `match`(ca1, ca2)
- val ms = scoreMatches(mt._1, mt._2)
- val ts = scoreTranspositions(mt._1, mt._2)
+ val mt = `match`((charArray1, charArray2))
+ val ms = scoreMatches((mt._1, mt._2))
+ val ts = scoreTranspositions((mt._1, mt._2))
// Return 0 if matches score is 0.
if (ms == 0) return 0f
- ((ms.toFloat / ca1.length) + (ms.toFloat / ca2.length) + ((ms.toFloat - ts) / ms)) / 3
+ ((ms.toFloat / charArray1.length) + (ms.toFloat / charArray2.length) + ((ms.toFloat - ts) / ms)) / 3
+ }
+
+ override def compare(string1: String, string2: String): Float = {
+ compare(string1.replaceAllLiterally(" ", "").toLowerCase.toCharArray,
+ string2.replaceAllLiterally(" ", "").toLowerCase.toCharArray)
}
private[this] def `match`(ct: CompareTuple): MatchTuple = {
diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
index 545dc42..dec2cbe 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala
@@ -10,12 +10,15 @@ import scala.util.control.Breaks.{ break, breakable }
* scenarios (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722).
*/
object JaroWinklerMetric extends StringMetric {
- override def compare(string1: String, string2: String): Float = {
- val ca1 = string1.replaceAllLiterally(" ", "").toLowerCase.toCharArray
- val ca2 = string2.replaceAllLiterally(" ", "").toLowerCase.toCharArray
- val prefix = ca1.zip(ca2).takeWhile(t => t._1 == t._2).map(_._1)
- val jaro = JaroMetric.compare(string1, string2)
+ override def compare(charArray1: Array[Char], charArray2: Array[Char]): Float = {
+ val prefix = charArray1.zip(charArray2).takeWhile(t => t._1 == t._2).map(_._1)
+ val jaro = JaroMetric.compare(charArray1, charArray2)
jaro + ((if (prefix.length <= 4) prefix.length else 4) * (0.1f * (1 - jaro)))
}
+
+ override def compare(string1: String, string2: String): Float = {
+ compare(string1.replaceAllLiterally(" ", "").toLowerCase.toCharArray,
+ string2.replaceAllLiterally(" ", "").toLowerCase.toCharArray)
+ }
} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala
index 792aeba..2e92292 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala
@@ -2,5 +2,5 @@ package org.hashtree.stringmetric
/** Marks those which leverage traits of a string based Metric. */
trait StringMetric extends Metric[String] {
-
+ def compare(ca1: Array[Char], ca2: Array[Char]): AnyVal
} \ No newline at end of file