summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2013-01-10 12:59:59 -0700
committerRocky Madden <git@rockymadden.com>2013-01-10 12:59:59 -0700
commit2b1ee6b4f3d548414e0513149c727a640f4548fe (patch)
tree97963f86cb3563b801fd5c6de5f6d9b259e35334 /core
parent5ba0d76e0266b6ad7ac9b1d3db4e04f04550ef50 (diff)
downloadstringmetric-2b1ee6b4f3d548414e0513149c727a640f4548fe.tar.gz
stringmetric-2b1ee6b4f3d548414e0513149c727a640f4548fe.tar.bz2
stringmetric-2b1ee6b4f3d548414e0513149c727a640f4548fe.zip
Created Ratcliff/Obershelp metric, command, and specs.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala55
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala42
2 files changed, 97 insertions, 0 deletions
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
new file mode 100755
index 0000000..94e44d0
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{ CompareTuple, FilterableStringMetric, StringFilter, StringMetric }
+
+/** An implementation of the Ratcliff/Obershelp [[com.rockymadden.stringmetric.StringMetric]]. */
+object RatcliffObershelpMetric extends StringMetric with FilterableStringMetric {
+ type CompareReturn = Double
+
+ override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit stringFilter: StringFilter): Option[CompareReturn] = {
+
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else Some(2d * commonSequences(fca1, fca2).foldLeft(0)(_ + _.length) / (fca1.length + fca2.length))
+ }
+
+ override def compare(string1: String, string2: String)
+ (implicit stringFilter: StringFilter): Option[CompareReturn] =
+
+ compare(
+ stringFilter.filter(string1.toCharArray),
+ stringFilter.filter(string2.toCharArray)
+ )
+
+ private[this] def longestCommonSubsequence(ct: CompareTuple[Char]) = {
+ val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1)
+ var lrc = (0, 0, 0) // Length, row, column.
+
+ for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) {
+ if (ct._1(r) == ct._2(c)) {
+ val l = m(r)(c) + 1
+ m(r + 1)(c + 1) = l
+
+ if (l > lrc._1) lrc = (l, r + 1, c + 1)
+ }
+ }
+
+ lrc
+ }
+
+ private[this] def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = {
+ val lcs = longestCommonSubsequence(ct)
+
+ if (lcs._1 == 0) Array.empty
+ else {
+ val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2))
+ val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3))
+
+ Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ commonSequences(sct1._1, sct2._1) ++ commonSequences(sct1._2, sct2._2)
+ }
+ }
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
new file mode 100755
index 0000000..e1ff9cf
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
@@ -0,0 +1,42 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RatcliffObershelpMetricSpec extends ScalaTest {
+ "RatcliffObershelpMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ RatcliffObershelpMetric.compare("", "").isDefined should be (false)
+ RatcliffObershelpMetric.compare("abc", "").isDefined should be (false)
+ RatcliffObershelpMetric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "0" in {
+ RatcliffObershelpMetric.compare("abc", "abc").get should be (1)
+ RatcliffObershelpMetric.compare("123", "123").get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "Double indicating distance" in {
+ RatcliffObershelpMetric.compare("abc", "xyz").get should be (0)
+ RatcliffObershelpMetric.compare("123", "456").get should be (0)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ RatcliffObershelpMetric.compare("aleksander", "alexandre").get should be (0.7368421052631579)
+ RatcliffObershelpMetric.compare("alexandre", "aleksander").get should be (0.7368421052631579)
+ RatcliffObershelpMetric.compare("pennsylvania", "pencilvaneya").get should be (0.6666666666666666)
+ RatcliffObershelpMetric.compare("pencilvaneya", "pennsylvania").get should be (0.6666666666666666)
+ RatcliffObershelpMetric.compare("abcefglmn", "abefglmo").get should be (0.8235294117647058)
+ RatcliffObershelpMetric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058)
+ }
+ }
+ }
+ }
+}