summaryrefslogtreecommitdiff
path: root/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
diff options
context:
space:
mode:
Diffstat (limited to 'core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala')
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala58
1 files changed, 58 insertions, 0 deletions
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
new file mode 100755
index 0000000..47dff23
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
@@ -0,0 +1,58 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+
+/** An implementation of the Levenshtein metric. */
+class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Int] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(0)
+ else Some(levenshtein(fca1, fca2))
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] =
+ compare(string1.toCharArray, string2.toCharArray)
+
+ private[this] def levenshtein(ct: CompareTuple[Char]) = {
+ val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1)
+
+ def distance(t: (Int, Int)): Int = {
+ t match {
+ case (r, 0) => r
+ case (0, c) => c
+ case (r, c) if m(r)(c) != -1 => m(r)(c)
+ case (r, c) => {
+ val min =
+ if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1)
+ else math.min(
+ math.min(
+ distance(r - 1, c) + 1, // Delete (left).
+ distance(r, c - 1) + 1 // Insert (up).
+ ),
+ distance(r - 1, c - 1) + 1 // Substitute (left-up).
+ )
+
+ m(r)(c) = min
+ min
+ }
+ }
+ }
+
+ distance(ct._1.length, ct._2.length)
+ }
+}
+
+object LevenshteinMetric {
+ private lazy val self = apply()
+
+ def apply(): LevenshteinMetric = new LevenshteinMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}