summaryrefslogtreecommitdiff
path: root/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2014-01-02 13:47:43 -0700
committerRocky Madden <git@rockymadden.com>2014-01-02 13:47:43 -0700
commit49de854bb464f1be37fbb27f942b9b65e52df751 (patch)
tree6c9a27ac1264648f67eba9c8707fa87d3dc5b3cd /core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
parent42b990a1523a68717afcbdbc2cc4968c041451ec (diff)
downloadstringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.gz
stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.bz2
stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.zip
Moved from gradle to sbt.
Diffstat (limited to 'core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala')
-rwxr-xr-xcore/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala23
1 files changed, 23 insertions, 0 deletions
diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
new file mode 100755
index 0000000..e83f73f
--- /dev/null
+++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
@@ -0,0 +1,23 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.Metric.StringMetric
+
+/**
+ * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is
+ * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios
+ * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722).
+ */
+case object JaroWinklerMetric extends StringMetric[Double] {
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ JaroMetric.compare(a, b).map {
+ case 0d => 0d
+ case 1d => 1d
+ case jaro => {
+ val prefix = a.zip(b).takeWhile(t => t._1 == t._2)
+
+ jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro))
+ }
+ }
+
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
+}