diff options
author | Rocky Madden <git@rockymadden.com> | 2013-12-28 11:49:33 -0700 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2013-12-28 11:49:33 -0700 |
commit | ef997c15f2ac152b53f05669f1d6bb9aa00aded7 (patch) | |
tree | 571f2eb996940b6f39dc7ab073bbc2d20ae9c93c /core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala | |
parent | 001b756ff5fb88e295b0ff4e4ee89bf326a4f83e (diff) | |
download | stringmetric-ef997c15f2ac152b53f05669f1d6bb9aa00aded7.tar.gz stringmetric-ef997c15f2ac152b53f05669f1d6bb9aa00aded7.tar.bz2 stringmetric-ef997c15f2ac152b53f05669f1d6bb9aa00aded7.zip |
A more functional structure.
Diffstat (limited to 'core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala')
-rwxr-xr-x | core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala | 37 |
1 files changed, 12 insertions, 25 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala index e74e8eb..8025f38 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -1,21 +1,19 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} -import com.rockymadden.stringmetric.tokenization.NGramTokenizer -import scala.math +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the N-Gram metric. */ -class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class NGramMetric(private val n: Int) extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.MatchTuple + import com.rockymadden.stringmetric.tokenization.NGramTokenizer + import scala.math - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { + if (n <= 0) return None - if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (fca1.sameElements(fca2)) Some(1d) - else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) ms.toDouble / math.max(ca1bg.length, ca2bg.length) @@ -23,18 +21,7 @@ class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => } } - final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = - compare(string1.toCharArray, string2.toCharArray)(n) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length } - -object NGramMetric { - private lazy val self = apply() - - def apply(): NGramMetric = new NGramMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) - - def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) -} |