diff options
author | Rocky Madden <git@rockymadden.com> | 2013-12-30 13:21:47 -0700 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2013-12-30 13:21:47 -0700 |
commit | 09ee8d1438c84e45b3cccf9a205b5234da68afcf (patch) | |
tree | faa4f4536a7e94a798c4d83d7ab2b42320acfaeb /core/source/main/scala/com/rockymadden | |
parent | 521ebea1fa797519f43f4401b5b1b8ec2d5d55a4 (diff) | |
download | stringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.tar.gz stringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.tar.bz2 stringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.zip |
Merged n-gram evaluation.
Diffstat (limited to 'core/source/main/scala/com/rockymadden')
4 files changed, 8 insertions, 20 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala index a9617d5..1e07432 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -10,10 +10,8 @@ final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] { import com.rockymadden.stringmetric.tokenize.NGramTokenizer import com.rockymadden.stringmetric.MatchTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { - if (n <= 0) return None - - if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => NGramTokenizer(n).tokenize(b).map { ca2bg => @@ -22,7 +20,6 @@ final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] { (2d * ms) / (ca1bg.length + ca2bg.length) } } - } override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala index d3c350b..629eaa0 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -5,10 +5,8 @@ import com.rockymadden.stringmetric.Metric.StringMetric final case class JaccardMetric(n: Int) extends StringMetric[Double] { import com.rockymadden.stringmetric.tokenize.NGramTokenizer - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { - if (n <= 0) return None - - if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => NGramTokenizer(n).tokenize(b).map { ca2bg => @@ -17,7 +15,6 @@ final case class JaccardMetric(n: Int) extends StringMetric[Double] { i.toDouble / (ca1bg.length + ca2bg.length - i) } } - } override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala index 119c8c7..d712738 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -7,10 +7,8 @@ final case class NGramMetric(n: Int) extends StringMetric[Double] { import com.rockymadden.stringmetric.tokenize.NGramTokenizer import scala.math - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { - if (n <= 0) return None - - if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => NGramTokenizer(n).tokenize(b).map { ca2bg => @@ -19,7 +17,6 @@ final case class NGramMetric(n: Int) extends StringMetric[Double] { ms.toDouble / math.max(ca1bg.length, ca2bg.length) } } - } override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala index b378190..cc33a26 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -7,10 +7,8 @@ final case class OverlapMetric(n: Int) extends StringMetric[Double] { import com.rockymadden.stringmetric.tokenize.NGramTokenizer import scala.math - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { - if (n <= 0) return None - - if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => NGramTokenizer(n).tokenize(b).map { ca2bg => @@ -19,7 +17,6 @@ final case class OverlapMetric(n: Int) extends StringMetric[Double] { ms.toDouble / math.min(ca1bg.length, ca2bg.length) } } - } override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) |