diff options
author | Rocky Madden <git@rockymadden.com> | 2013-04-29 18:27:19 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2013-04-29 18:27:19 -0600 |
commit | 6068b4bb8d59c30e22863793ef2220c35f51d456 (patch) | |
tree | 87a22c6f32e87fc91a982cf90d794a94467e7ffa /core/source | |
parent | 2075c9febdab7c00984509db311d86f3c20e72ca (diff) | |
download | stringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.tar.gz stringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.tar.bz2 stringmetric-6068b4bb8d59c30e22863793ef2220c35f51d456.zip |
Fixed missing subtraction operation of intersection set length.
Diffstat (limited to 'core/source')
-rwxr-xr-x | core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala | 8 | ||||
-rwxr-xr-x | core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala | 26 |
2 files changed, 16 insertions, 18 deletions
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala index 3b9b418..6941b10 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -1,6 +1,6 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} +import com.rockymadden.stringmetric.{ StringMetric, MatchTuple, StringFilter } import com.rockymadden.stringmetric.tokenization.NGramTokenizer /* An implementation of the Jaccard metric. */ @@ -15,17 +15,15 @@ class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter => else if (fca1.sameElements(fca2)) Some(1d) else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => NGramTokenizer.tokenize(fca2)(n).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) + val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length - ms.toDouble / (ca1bg.length + ca2bg.length) + i.toDouble / (ca1bg.length + ca2bg.length - i) } } } final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = compare(string1.toCharArray, string2.toCharArray)(n: Int) - - private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length } object JaccardMetric { diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala index a3aeb5e..17bc3ef 100755 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala @@ -41,22 +41,22 @@ final class JaccardMetricSpec extends ScalaTest { } "valid arguments" should returns { "Double indicating distance" in { - Metric.compare("night", "nacht")(1).get should be (0.3) - Metric.compare("night", "naght")(1).get should be (0.4) - Metric.compare("context", "contact")(1).get should be (0.35714285714285715) + Metric.compare("night", "nacht")(1).get should be (0.42857142857142855) + Metric.compare("night", "naght")(1).get should be (0.6666666666666666) + Metric.compare("context", "contact")(1).get should be (0.5555555555555556) - Metric.compare("night", "nacht")(2).get should be (0.125) - Metric.compare("night", "naght")(2).get should be (0.25) - Metric.compare("context", "contact")(2).get should be (0.25) - Metric.compare("contextcontext", "contact")(2).get should be (0.15789473684210525) - Metric.compare("context", "contactcontact")(2).get should be (0.15789473684210525) - Metric.compare("ht", "nacht")(2).get should be (0.2) + Metric.compare("night", "nacht")(2).get should be (0.14285714285714285) + Metric.compare("night", "naght")(2).get should be (0.3333333333333333) + Metric.compare("context", "contact")(2).get should be (0.3333333333333333) + Metric.compare("contextcontext", "contact")(2).get should be (0.1875) + Metric.compare("context", "contactcontact")(2).get should be (0.1875) + Metric.compare("ht", "nacht")(2).get should be (0.25) Metric.compare("xp", "nacht")(2).get should be (0) - Metric.compare("ht", "hththt")(2).get should be (0.16666666666666666) + Metric.compare("ht", "hththt")(2).get should be (0.2) Metric.compare("night", "nacht")(3).get should be (0) - Metric.compare("night", "naght")(3).get should be (0.16666666666666666) - Metric.compare("context", "contact")(3).get should be (0.2) + Metric.compare("night", "naght")(3).get should be (0.2) + Metric.compare("context", "contact")(3).get should be (0.25) } } } @@ -64,7 +64,7 @@ final class JaccardMetricSpec extends ScalaTest { "JaccardMetric companion object" should provide { "pass-through compare method" should returns { "same value as class" in { - JaccardMetric.compare("context", "contact")(3).get should be (0.2) + JaccardMetric.compare("context", "contact")(3).get should be (0.25) } } } |