summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2013-04-29 18:27:19 -0600
committerRocky Madden <git@rockymadden.com>2013-04-29 18:27:19 -0600
commit6068b4bb8d59c30e22863793ef2220c35f51d456 (patch)
tree87a22c6f32e87fc91a982cf90d794a94467e7ffa /core
parent2075c9febdab7c00984509db311d86f3c20e72ca (diff)
downloadstringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.tar.gz
stringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.tar.bz2
stringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.zip
Fixed missing subtraction operation of intersection set length.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala8
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala26
2 files changed, 16 insertions, 18 deletions
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
index 3b9b418..6941b10 100755
--- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
@@ -1,6 +1,6 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
+import com.rockymadden.stringmetric.{ StringMetric, MatchTuple, StringFilter }
import com.rockymadden.stringmetric.tokenization.NGramTokenizer
/* An implementation of the Jaccard metric. */
@@ -15,17 +15,15 @@ class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter =>
else if (fca1.sameElements(fca2)) Some(1d)
else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
- val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
+ val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length
- ms.toDouble / (ca1bg.length + ca2bg.length)
+ i.toDouble / (ca1bg.length + ca2bg.length - i)
}
}
}
final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
compare(string1.toCharArray, string2.toCharArray)(n: Int)
-
- private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
}
object JaccardMetric {
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala
index a3aeb5e..17bc3ef 100755
--- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala
@@ -41,22 +41,22 @@ final class JaccardMetricSpec extends ScalaTest {
}
"valid arguments" should returns {
"Double indicating distance" in {
- Metric.compare("night", "nacht")(1).get should be (0.3)
- Metric.compare("night", "naght")(1).get should be (0.4)
- Metric.compare("context", "contact")(1).get should be (0.35714285714285715)
+ Metric.compare("night", "nacht")(1).get should be (0.42857142857142855)
+ Metric.compare("night", "naght")(1).get should be (0.6666666666666666)
+ Metric.compare("context", "contact")(1).get should be (0.5555555555555556)
- Metric.compare("night", "nacht")(2).get should be (0.125)
- Metric.compare("night", "naght")(2).get should be (0.25)
- Metric.compare("context", "contact")(2).get should be (0.25)
- Metric.compare("contextcontext", "contact")(2).get should be (0.15789473684210525)
- Metric.compare("context", "contactcontact")(2).get should be (0.15789473684210525)
- Metric.compare("ht", "nacht")(2).get should be (0.2)
+ Metric.compare("night", "nacht")(2).get should be (0.14285714285714285)
+ Metric.compare("night", "naght")(2).get should be (0.3333333333333333)
+ Metric.compare("context", "contact")(2).get should be (0.3333333333333333)
+ Metric.compare("contextcontext", "contact")(2).get should be (0.1875)
+ Metric.compare("context", "contactcontact")(2).get should be (0.1875)
+ Metric.compare("ht", "nacht")(2).get should be (0.25)
Metric.compare("xp", "nacht")(2).get should be (0)
- Metric.compare("ht", "hththt")(2).get should be (0.16666666666666666)
+ Metric.compare("ht", "hththt")(2).get should be (0.2)
Metric.compare("night", "nacht")(3).get should be (0)
- Metric.compare("night", "naght")(3).get should be (0.16666666666666666)
- Metric.compare("context", "contact")(3).get should be (0.2)
+ Metric.compare("night", "naght")(3).get should be (0.2)
+ Metric.compare("context", "contact")(3).get should be (0.25)
}
}
}
@@ -64,7 +64,7 @@ final class JaccardMetricSpec extends ScalaTest {
"JaccardMetric companion object" should provide {
"pass-through compare method" should returns {
"same value as class" in {
- JaccardMetric.compare("context", "contact")(3).get should be (0.2)
+ JaccardMetric.compare("context", "contact")(3).get should be (0.25)
}
}
}