summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala23
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala19
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala (renamed from core/source/test/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerSpec.scala)7
7 files changed, 28 insertions, 29 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala b/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala
index 36b7eef..00b173d 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala
@@ -1,19 +1,36 @@
package com.rockymadden.stringmetric
object Tokenize {
- trait Tokenizer[A] {
+ sealed trait Tokenizer[A] {
def tokenize(a: A): Option[Array[A]]
}
- trait StringTokenizer extends Tokenizer[Array[Char]] {
+ sealed trait StringTokenizer extends Tokenizer[Array[Char]] {
def tokenize(a: String): Option[Array[String]]
}
object StringTokenizer {
- val NGram = tokenize.NGramTokenizer
+ val NGram = NGramTokenizer
def tokenizeWithNGram(n: Int)(charArray: Array[Char]) = NGram(n).tokenize(charArray)
}
+
+
+ final case class NGramTokenizer(n: Int) extends StringTokenizer {
+ override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = {
+ if (n <= 0) return None
+
+ if (a.length < n) None
+ else Some(sequence(a, Array.empty[Array[Char]], n))
+ }
+
+ override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
+
+ @annotation.tailrec
+ private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) =>
+ if (i.length <= n) o :+ i
+ else sequence(i.tail, o :+ i.take(n), n)
+ }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
index 1e07432..0ad3915 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
@@ -7,7 +7,7 @@ import com.rockymadden.stringmetric.Metric.StringMetric
* Traditionally, the algorithm uses bigrams.
*/
final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.tokenize.NGramTokenizer
+ import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
import com.rockymadden.stringmetric.MatchTuple
override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
index 629eaa0..6ec5db4 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
@@ -3,7 +3,7 @@ package com.rockymadden.stringmetric.similarity
import com.rockymadden.stringmetric.Metric.StringMetric
final case class JaccardMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.tokenize.NGramTokenizer
+ import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
index d712738..8c194ce 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
@@ -4,7 +4,7 @@ import com.rockymadden.stringmetric.Metric.StringMetric
final case class NGramMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.MatchTuple
- import com.rockymadden.stringmetric.tokenize.NGramTokenizer
+ import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
import scala.math
override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
index cc33a26..8f0418b 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
@@ -4,7 +4,7 @@ import com.rockymadden.stringmetric.Metric.StringMetric
final case class OverlapMetric(n: Int) extends StringMetric[Double] {
import com.rockymadden.stringmetric.MatchTuple
- import com.rockymadden.stringmetric.tokenize.NGramTokenizer
+ import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
import scala.math
override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala
deleted file mode 100755
index aa89b31..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.rockymadden.stringmetric.tokenize
-
-import com.rockymadden.stringmetric.Tokenize.StringTokenizer
-
-final case class NGramTokenizer(n: Int) extends StringTokenizer {
- override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = {
- if (n <= 0) return None
-
- if (a.length < n) None
- else Some(sequence(a, Array.empty[Array[Char]], n))
- }
-
- override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
-
- @annotation.tailrec
- private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) =>
- if (i.length <= n) o :+ i
- else sequence(i.tail, o :+ i.take(n), n)
-}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala
index 01636f0..cfba0f7 100755
--- a/core/source/test/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerSpec.scala
+++ b/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala
@@ -1,11 +1,12 @@
-package com.rockymadden.stringmetric.tokenize
+package com.rockymadden.stringmetric
-import com.rockymadden.stringmetric.ScalaTest
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
-final class NGramTokenizerSpec extends ScalaTest { "NGramTokenizer" should provide {
+final class TokenizeSpec extends ScalaTest { "NGramTokenizer" should provide {
+ import Tokenize._
+
"tokenize method" when passed {
"empty argument" should returns {
"None" in {