summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2013-12-30 13:21:47 -0700
committerRocky Madden <git@rockymadden.com>2013-12-30 13:21:47 -0700
commit09ee8d1438c84e45b3cccf9a205b5234da68afcf (patch)
treefaa4f4536a7e94a798c4d83d7ab2b42320acfaeb
parent521ebea1fa797519f43f4401b5b1b8ec2d5d55a4 (diff)
downloadstringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.tar.gz
stringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.tar.bz2
stringmetric-09ee8d1438c84e45b3cccf9a205b5234da68afcf.zip
Merged n-gram evaluation.
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala7
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala7
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala7
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala7
4 files changed, 8 insertions, 20 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
index a9617d5..1e07432 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
@@ -10,10 +10,8 @@ final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.tokenize.NGramTokenizer
import com.rockymadden.stringmetric.MatchTuple
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
- if (n <= 0) return None
-
- if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
else if (a.sameElements(b)) Some(1d)
else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
NGramTokenizer(n).tokenize(b).map { ca2bg =>
@@ -22,7 +20,6 @@ final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] {
(2d * ms) / (ca1bg.length + ca2bg.length)
}
}
- }
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
index d3c350b..629eaa0 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
@@ -5,10 +5,8 @@ import com.rockymadden.stringmetric.Metric.StringMetric
final case class JaccardMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.tokenize.NGramTokenizer
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
- if (n <= 0) return None
-
- if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
else if (a.sameElements(b)) Some(1d)
else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
NGramTokenizer(n).tokenize(b).map { ca2bg =>
@@ -17,7 +15,6 @@ final case class JaccardMetric(n: Int) extends StringMetric[Double] {
i.toDouble / (ca1bg.length + ca2bg.length - i)
}
}
- }
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
index 119c8c7..d712738 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
@@ -7,10 +7,8 @@ final case class NGramMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.tokenize.NGramTokenizer
import scala.math
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
- if (n <= 0) return None
-
- if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
else if (a.sameElements(b)) Some(1d)
else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
NGramTokenizer(n).tokenize(b).map { ca2bg =>
@@ -19,7 +17,6 @@ final case class NGramMetric(n: Int) extends StringMetric[Double] {
ms.toDouble / math.max(ca1bg.length, ca2bg.length)
}
}
- }
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
index b378190..cc33a26 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
@@ -7,10 +7,8 @@ final case class OverlapMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.tokenize.NGramTokenizer
import scala.math
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
- if (n <= 0) return None
-
- if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
else if (a.sameElements(b)) Some(1d)
else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
NGramTokenizer(n).tokenize(b).map { ca2bg =>
@@ -19,7 +17,6 @@ final case class OverlapMetric(n: Int) extends StringMetric[Double] {
ms.toDouble / math.min(ca1bg.length, ca2bg.length)
}
}
- }
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)