diff options
author | Rocky Madden <git@rockymadden.com> | 2014-01-02 13:47:43 -0700 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2014-01-02 13:47:43 -0700 |
commit | 49de854bb464f1be37fbb27f942b9b65e52df751 (patch) | |
tree | 6c9a27ac1264648f67eba9c8707fa87d3dc5b3cd /core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala | |
parent | 42b990a1523a68717afcbdbc2cc4968c041451ec (diff) | |
download | stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.gz stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.bz2 stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.zip |
Moved from gradle to sbt.
Diffstat (limited to 'core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala')
-rwxr-xr-x | core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala new file mode 100755 index 0000000..6ec5db4 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -0,0 +1,20 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +final case class JaccardMetric(n: Int) extends StringMetric[Double] { + import com.rockymadden.stringmetric.Tokenize.NGramTokenizer + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => + val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length + + i.toDouble / (ca1bg.length + ca2bg.length - i) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) +} |