diff options
author | Rocky Madden <git@rockymadden.com> | 2014-03-26 16:09:24 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2014-03-26 16:09:24 -0600 |
commit | 3c806b88e9169ed742ae5740d81fbbb24f0ca768 (patch) | |
tree | 4b6a75a1042000841200b6af031abe62b542ab71 | |
parent | a25d9e70528a0b2bab96cc55ede9232076fd4299 (diff) | |
download | stringmetric-3c806b88e9169ed742ae5740d81fbbb24f0ca768.tar.gz stringmetric-3c806b88e9169ed742ae5740d81fbbb24f0ca768.tar.bz2 stringmetric-3c806b88e9169ed742ae5740d81fbbb24f0ca768.zip |
Removed module structure.
40 files changed, 585 insertions, 406 deletions
diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala index 67cd639..6c37f70 100755 --- a/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala @@ -1,13 +1,11 @@ package com.rockymadden.stringmetric +import scala.collection.immutable.Map +import scala.language.implicitConversions // Some things might look sloppy (e.g. access modifiers, broad imports, repetitive imports, etc), but are required // because of the way "scalascript" is ultimately compiled. package object cli { - import scala.collection.immutable.Map - import scala.language.implicitConversions - - implicit def optionStringToArray(os: OptionString): Array[String] = if (os.get.length == 0) Array.empty[String] else os.get.split(" ") implicit def optionStringToBigDecimal(os: OptionString): BigDecimal = BigDecimal(os.get) @@ -19,14 +17,11 @@ package object cli { implicit def optionStringToShort(os: OptionString): Short = os.get.toShort implicit def optionStringToString(os: OptionString): String = os.get - val Ls = sys.props("line.separator") val Tab = " " - class OptionString(val get: String) - object OptionString { implicit def fromString(s: String): OptionString = OptionString(s) @@ -36,7 +31,6 @@ package object cli { type OptionMap = Map[Symbol, OptionString] - object OptionMap { def apply(as: Array[String]): OptionMap = apply(as: _*) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala index 21a6ad8..0e9a5ab 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala @@ -1,74 +1,5 @@ package com.rockymadden.stringmetric -object Algorithm { - import scala.collection.immutable.Map - import Transform._ - - - trait Algorithm[A] { - def compute(a: A): Option[A] - } - - - object Algorithm { - implicit def toStringAlgorithmDecorator(sa: StringAlgorithm): StringAlgorithmDecorator = - new StringAlgorithmDecorator(sa) - } - - - trait StringAlgorithm extends Algorithm[Array[Char]] { - def compute(a: String): Option[String] - } - - - object StringAlgorithm { - val Metaphone = phonetic.MetaphoneAlgorithm - val Nysiis = phonetic.NysiisAlgorithm - val RefinedNysiis = phonetic.RefinedNysiisAlgorithm - val RefinedSoundex = phonetic.RefinedSoundexAlgorithm - val Soundex = phonetic.SoundexAlgorithm - - def computeWithMetaphone(a: Array[Char]) = Metaphone.compute(a) - - def computeWithNysiis(a: Array[Char]) = Nysiis.compute(a) - - def computeWithRefinedNysiis(a: Array[Char]) = RefinedNysiis.compute(a) - - def computeWithRefinedSoundex(a: Array[Char]) = RefinedSoundex.compute(a) - - def computeWithSoundex(a: Array[Char]) = Soundex.compute(a) - } - - - sealed trait AlgorithmDecorator[A] { - val withMemoization: Algorithm[A] - - val withTransform: (Transform[A] => Algorithm[A]) - } - - - final case class StringAlgorithmDecorator(sa: StringAlgorithm) extends AlgorithmDecorator[Array[Char]] { - override val withMemoization: StringAlgorithm = new StringAlgorithm { - private val base: StringAlgorithm = sa - private var memo: Map[String, Option[String]] = Map() - - override def compute(a: Array[Char]): Option[Array[Char]] = compute(a.toString).map(_.toCharArray) - - override def compute(a: String): Option[String] = - if (memo.contains(a)) memo(a) - else { - memo = memo + (a -> base.compute(a)) - memo(a) - } - } - - override val withTransform: (StringTransform => StringAlgorithm) = (st) => new StringAlgorithm { - private val base: StringAlgorithm = sa - private val transform: StringTransform = st - - override def compute(a: Array[Char]): Option[Array[Char]] = base.compute(transform(a)) - - override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - } - } +trait Algorithm[A] { + def compute(a: A): Option[A] } diff --git a/core/src/main/scala/com/rockymadden/stringmetric/AlgorithmDecorator.scala b/core/src/main/scala/com/rockymadden/stringmetric/AlgorithmDecorator.scala new file mode 100644 index 0000000..fad2d64 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/AlgorithmDecorator.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric + +import scala.collection.immutable.Map + +sealed trait AlgorithmDecorator[A] { + val withMemoization: Algorithm[A] + + val withTransform: (Transform[A] => Algorithm[A]) +} + + +final case class StringAlgorithmDecorator(sa: StringAlgorithm) extends AlgorithmDecorator[Array[Char]] { + override val withMemoization: StringAlgorithm = new StringAlgorithm { + private val base: StringAlgorithm = sa + private var memo: Map[String, Option[String]] = Map() + + override def compute(a: Array[Char]): Option[Array[Char]] = compute(a.toString).map(_.toCharArray) + + override def compute(a: String): Option[String] = + if (memo.contains(a)) memo(a) + else { + memo = memo + (a -> base.compute(a)) + memo(a) + } + } + + override val withTransform: (StringTransform => StringAlgorithm) = (st) => new StringAlgorithm { + private val base: StringAlgorithm = sa + private val transform: StringTransform = st + + override def compute(a: Array[Char]): Option[Array[Char]] = base.compute(transform(a)) + + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala index 5e666d2..965a4e1 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala @@ -2,41 +2,64 @@ package com.rockymadden.stringmetric import scala.collection.immutable.Set -object Alphabet { - sealed abstract class AlphabetSet(val chars: Set[Char]) { - def isSuperset(a: Char): Boolean = chars.contains(a) +sealed trait Alphabet { + val chars: Set[Char] - def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length + def isSuperset(a: Char): Boolean = chars.contains(a) - def isSuperset(a: String): Boolean = isSuperset(a.toCharArray) - } + def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length + def isSuperset(a: String): Boolean = isSuperset(a.toCharArray) +} - case object LowercaseConsonant extends AlphabetSet( - Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') - ) +object Alphabet { + case object LowercaseConsonant extends Alphabet { + val chars = + Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z') + } - case object UppercaseConsonant extends AlphabetSet( - Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') - ) + case object UppercaseConsonant extends Alphabet { + val chars = + Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z') + } - case object Consonant extends AlphabetSet(LowercaseConsonant.chars ++ UppercaseConsonant.chars) + case object Consonant extends Alphabet { + val chars = LowercaseConsonant.chars ++ UppercaseConsonant.chars + } - case object LowercaseVowel extends AlphabetSet(Set('a', 'e', 'i', 'o', 'u')) + case object LowercaseVowel extends Alphabet { + val chars = Set('a', 'e', 'i', 'o', 'u') + } - case object UppercaseVowel extends AlphabetSet(Set('A', 'E', 'I', 'O', 'U')) + case object UppercaseVowel extends Alphabet { + val chars = Set('A', 'E', 'I', 'O', 'U') + } - case object Vowel extends AlphabetSet(LowercaseVowel.chars ++ UppercaseVowel.chars) + case object Vowel extends Alphabet { + val chars = LowercaseVowel.chars ++ UppercaseVowel.chars + } - case object LowercaseY extends AlphabetSet(Set('y')) + case object LowercaseY extends Alphabet { + val chars = Set('y') + } - case object UppercaseY extends AlphabetSet(Set('Y')) + case object UppercaseY extends Alphabet { + val chars = Set('Y') + } - case object Y extends AlphabetSet(LowercaseY.chars ++ UppercaseY.chars) + case object Y extends Alphabet { + val chars = LowercaseY.chars ++ UppercaseY.chars + } - case object LowercaseAlpha extends AlphabetSet(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars) + case object LowercaseAlpha extends Alphabet { + val chars = LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars + } - case object UppercaseAlpha extends AlphabetSet(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars) + case object UppercaseAlpha extends Alphabet { + val chars = UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars + } - case object Alpha extends AlphabetSet(LowercaseAlpha.chars ++ UppercaseAlpha.chars) + case object Alpha extends Alphabet { + val chars = LowercaseAlpha.chars ++ UppercaseAlpha.chars + } } diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala index 6ac8880..b192ed9 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala @@ -1,106 +1,5 @@ package com.rockymadden.stringmetric -object Metric { - import Transform._ - - - trait Metric[A, B] { - def compare(a: A, b: A): Option[B] - } - - - object Metric { - implicit def toStringMetricDecorator[A](sa: StringMetric[A]): StringMetricDecorator[A] = - new StringMetricDecorator[A](sa) - } - - - trait StringMetric[A] extends Metric[Array[Char], A] { - def compare(a: String, b: String): Option[A] - } - - - object StringMetric { - val DiceSorensen = similarity.DiceSorensenMetric - val Hamming = similarity.HammingMetric - val Jaccard = similarity.JaccardMetric - val Jaro = similarity.JaroMetric - val JaroWinkler = similarity.JaroWinklerMetric - val Levenshtein = similarity.LevenshteinMetric - val Metaphone = phonetic.MetaphoneMetric - val NGram = similarity.NGramMetric - val Nysiis = phonetic.NysiisMetric - val Overlap = similarity.OverlapMetric - val RefinedNysiis = phonetic.RefinedNysiisMetric - val RefinedSoundex = phonetic.RefinedSoundexMetric - val Soundex = phonetic.SoundexMetric - val WeightedLevenshtein = similarity.WeightedLevenshteinMetric - - def compareWithDiceSorensen(n: Int)(a: Array[Char], b: Array[Char]) = DiceSorensen(n).compare(a, b) - - def compareWithHamming(a: Array[Char], b: Array[Char]) = Hamming.compare(a, b) - - def compareWithJaccard(n: Int)(a: Array[Char], b: Array[Char]) = Jaccard(n).compare(a, b) - - def compareWithJaro(a: Array[Char], b: Array[Char]) = Jaro.compare(a, b) - - def compareWithJaroWinkler(a: Array[Char], b: Array[Char]) = JaroWinkler.compare(a, b) - - def compareWithLevenshtein(a: Array[Char], b: Array[Char]) = Levenshtein.compare(a, b) - - def compareWithMetaphone(a: Array[Char], b: Array[Char]) = Metaphone.compare(a, b) - - def compareWithNGram(n: Int)(a: Array[Char], b: Array[Char]) = NGram(n).compare(a, b) - - def compareWithNysiis(a: Array[Char], b: Array[Char]) = Nysiis.compare(a, b) - - def compareWithOverlap(n: Int)(a: Array[Char], b: Array[Char]) = Overlap(n).compare(a, b) - - def compareWithRefinedNysiis(a: Array[Char], b: Array[Char]) = RefinedNysiis.compare(a, b) - - def compareWithRefinedSoundex(a: Array[Char], b: Array[Char]) = RefinedSoundex.compare(a, b) - - def compareWithSoundex(a: Array[Char], b: Array[Char]) = Soundex.compare(a, b) - - def compareWithWeightedLevenshtein(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) - (a: Array[Char], b: Array[Char]) = - - WeightedLevenshtein(delete, insert, substitute).compare(a, b) - } - - - sealed trait MetricDecorator[A, B] { - val withMemoization: Metric[A, B] - - val withTransform: (Transform[A] => Metric[A, B]) - } - - - final case class StringMetricDecorator[A](sm: StringMetric[A]) extends MetricDecorator[Array[Char], A] { - override val withMemoization: StringMetric[A] = new StringMetric[A] { - private val base: StringMetric[A] = sm - private var memo: Map[(String, String), Option[A]] = Map() - - override def compare(a: Array[Char], b: Array[Char]): Option[A] = compare(a.toString, b.toString) - - override def compare(a: String, b: String): Option[A] = { - val t = (a, b) - - if (memo.contains(t)) memo(t) - else { - memo = memo + (t -> base.compare(a, b)) - memo(t) - } - } - } - - override val withTransform: (StringTransform => StringMetric[A]) = (st) => new StringMetric[A] { - private val base: StringMetric[A] = sm - private val transform: StringTransform = st - - override def compare(a: Array[Char], b: Array[Char]): Option[A] = base.compare(transform(a), transform(b)) - - override def compare(a: String, b: String): Option[A] = compare(a.toCharArray, b.toCharArray) - } - } +trait Metric[A, B] { + def compare(a: A, b: A): Option[B] } diff --git a/core/src/main/scala/com/rockymadden/stringmetric/MetricDecorator.scala b/core/src/main/scala/com/rockymadden/stringmetric/MetricDecorator.scala new file mode 100644 index 0000000..e14db86 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/MetricDecorator.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric + +import scala.collection.immutable.Map + +sealed trait MetricDecorator[A, B] { + val withMemoization: Metric[A, B] + + val withTransform: (Transform[A] => Metric[A, B]) +} + +final case class StringMetricDecorator[A](sm: StringMetric[A]) extends MetricDecorator[Array[Char], A] { + override val withMemoization: StringMetric[A] = new StringMetric[A] { + private val base: StringMetric[A] = sm + private var memo: Map[(String, String), Option[A]] = Map() + + override def compare(a: Array[Char], b: Array[Char]): Option[A] = compare(a.toString, b.toString) + + override def compare(a: String, b: String): Option[A] = { + val t = (a, b) + + if (memo.contains(t)) memo(t) + else { + memo = memo + (t -> base.compare(a, b)) + memo(t) + } + } + } + + override val withTransform: (StringTransform => StringMetric[A]) = (st) => new StringMetric[A] { + private val base: StringMetric[A] = sm + private val transform: StringTransform = st + + override def compare(a: Array[Char], b: Array[Char]): Option[A] = base.compare(transform(a), transform(b)) + + override def compare(a: String, b: String): Option[A] = compare(a.toCharArray, b.toCharArray) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala new file mode 100644 index 0000000..e5571de --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala @@ -0,0 +1,28 @@ +package com.rockymadden.stringmetric + +import scala.language.implicitConversions + +trait StringAlgorithm extends Algorithm[Array[Char]] { + def compute(a: String): Option[String] +} + +object StringAlgorithm { + val Metaphone = phonetic.MetaphoneAlgorithm + val Nysiis = phonetic.NysiisAlgorithm + val RefinedNysiis = phonetic.RefinedNysiisAlgorithm + val RefinedSoundex = phonetic.RefinedSoundexAlgorithm + val Soundex = phonetic.SoundexAlgorithm + + implicit def toStringAlgorithmDecorator(sa: StringAlgorithm): StringAlgorithmDecorator = + new StringAlgorithmDecorator(sa) + + def computeWithMetaphone(a: Array[Char]) = Metaphone.compute(a) + + def computeWithNysiis(a: Array[Char]) = Nysiis.compute(a) + + def computeWithRefinedNysiis(a: Array[Char]) = RefinedNysiis.compute(a) + + def computeWithRefinedSoundex(a: Array[Char]) = RefinedSoundex.compute(a) + + def computeWithSoundex(a: Array[Char]) = Soundex.compute(a) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/StringMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/StringMetric.scala new file mode 100644 index 0000000..f488141 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/StringMetric.scala @@ -0,0 +1,58 @@ +package com.rockymadden.stringmetric + +import scala.language.implicitConversions + +trait StringMetric[A] extends Metric[Array[Char], A] { + def compare(a: String, b: String): Option[A] +} + +object StringMetric { + val DiceSorensen = similarity.DiceSorensenMetric + val Hamming = similarity.HammingMetric + val Jaccard = similarity.JaccardMetric + val Jaro = similarity.JaroMetric + val JaroWinkler = similarity.JaroWinklerMetric + val Levenshtein = similarity.LevenshteinMetric + val Metaphone = phonetic.MetaphoneMetric + val NGram = similarity.NGramMetric + val Nysiis = phonetic.NysiisMetric + val Overlap = similarity.OverlapMetric + val RefinedNysiis = phonetic.RefinedNysiisMetric + val RefinedSoundex = phonetic.RefinedSoundexMetric + val Soundex = phonetic.SoundexMetric + val WeightedLevenshtein = similarity.WeightedLevenshteinMetric + + implicit def toStringMetricDecorator[A](sa: StringMetric[A]): StringMetricDecorator[A] = + new StringMetricDecorator[A](sa) + + def compareWithDiceSorensen(n: Int)(a: Array[Char], b: Array[Char]) = DiceSorensen(n).compare(a, b) + + def compareWithHamming(a: Array[Char], b: Array[Char]) = Hamming.compare(a, b) + + def compareWithJaccard(n: Int)(a: Array[Char], b: Array[Char]) = Jaccard(n).compare(a, b) + + def compareWithJaro(a: Array[Char], b: Array[Char]) = Jaro.compare(a, b) + + def compareWithJaroWinkler(a: Array[Char], b: Array[Char]) = JaroWinkler.compare(a, b) + + def compareWithLevenshtein(a: Array[Char], b: Array[Char]) = Levenshtein.compare(a, b) + + def compareWithMetaphone(a: Array[Char], b: Array[Char]) = Metaphone.compare(a, b) + + def compareWithNGram(n: Int)(a: Array[Char], b: Array[Char]) = NGram(n).compare(a, b) + + def compareWithNysiis(a: Array[Char], b: Array[Char]) = Nysiis.compare(a, b) + + def compareWithOverlap(n: Int)(a: Array[Char], b: Array[Char]) = Overlap(n).compare(a, b) + + def compareWithRefinedNysiis(a: Array[Char], b: Array[Char]) = RefinedNysiis.compare(a, b) + + def compareWithRefinedSoundex(a: Array[Char], b: Array[Char]) = RefinedSoundex.compare(a, b) + + def compareWithSoundex(a: Array[Char], b: Array[Char]) = Soundex.compare(a, b) + + def compareWithWeightedLevenshtein(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) + (a: Array[Char], b: Array[Char]) = + + WeightedLevenshtein(delete, insert, substitute).compare(a, b) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala b/core/src/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala new file mode 100644 index 0000000..0e956a8 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala @@ -0,0 +1,19 @@ +package com.rockymadden.stringmetric + +sealed trait StringTokenizer extends Tokenizer[Array[Char]] { + def tokenize(a: String): Option[Array[String]] +} + + +final case class NGramTokenizer(n: Int) extends StringTokenizer { + override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = + if (n <= 0 || a.length < n) None + else Some(sequence(a, Array.empty[Array[Char]], n)) + + override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString)) + + @annotation.tailrec + private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) => + if (i.length <= n) o :+ i + else sequence(i.tail, o :+ i.take(n), n) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala b/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala deleted file mode 100755 index a011c96..0000000 --- a/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.rockymadden.stringmetric - -object Tokenize { - sealed trait Tokenizer[A] { - def tokenize(a: A): Option[Array[A]] - } - - - sealed trait StringTokenizer extends Tokenizer[Array[Char]] { - def tokenize(a: String): Option[Array[String]] - } - - - object StringTokenizer { - val NGram = NGramTokenizer - - def tokenizeWithNGram(n: Int)(charArray: Array[Char]) = NGram(n).tokenize(charArray) - } - - - final case class NGramTokenizer(n: Int) extends StringTokenizer { - override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = - if (n <= 0 || a.length < n) None - else Some(sequence(a, Array.empty[Array[Char]], n)) - - override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString)) - - @annotation.tailrec - private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) => - if (i.length <= n) o :+ i - else sequence(i.tail, o :+ i.take(n), n) - } -} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Tokenizer.scala b/core/src/main/scala/com/rockymadden/stringmetric/Tokenizer.scala new file mode 100755 index 0000000..7a2544c --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Tokenizer.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Tokenizer[A] { + def tokenize(a: A): Option[Array[A]] +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala index af8bf45..c3c5afc 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala @@ -1,72 +1,66 @@ package com.rockymadden.stringmetric -object Transform { - import scala.collection.immutable.NumericRange +import scala.collection.immutable.NumericRange +trait transform { + private val Ascii = NumericRange(0x00, 0x7F, 1) + private val ExtendedAscii = NumericRange(0x00, 0x7F, 1) + private val Latin = NumericRange(0x00, 0x24F, 1) + private val LowerCase = NumericRange(0x61, 0x7A, 1) + private val Numbers = NumericRange(0x30, 0x39, 1) + private val UpperCase = NumericRange(0x41, 0x5A, 1) - type Transform[A] = (A => A) - type StringTransform = Transform[Array[Char]] + private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filter(c => f(c)).mkString + private val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filterNot(c => f(c)).mkString - object StringTransform { - private val Ascii = NumericRange(0x00, 0x7F, 1) - private val ExtendedAscii = NumericRange(0x00, 0x7F, 1) - private val Latin = NumericRange(0x00, 0x24F, 1) - private val LowerCase = NumericRange(0x61, 0x7A, 1) - private val Numbers = NumericRange(0x30, 0x39, 1) - private val UpperCase = NumericRange(0x41, 0x5A, 1) + val filterAlpha: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) + val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) - private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filter(c => f(c)).mkString + val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) - private val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filterNot(c => f(c)).mkString + val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) - val filterAlpha: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) - }) + val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) - val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) - }) + val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) - val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) - }) + val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) - val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) - }) + val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) - val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) + val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) - val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) + val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) - val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) + val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) - val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) + val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) - val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) + val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) - val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) + val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) - val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) + val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) - val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) + val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) - val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) - - val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) - - val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) - - val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) - - val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) - } + val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) } + +object transform extends transform diff --git a/core/src/main/scala/com/rockymadden/stringmetric/package.scala b/core/src/main/scala/com/rockymadden/stringmetric/package.scala index 54bef55..82efb0c 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/package.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/package.scala @@ -3,10 +3,10 @@ package com.rockymadden package object stringmetric { import scala.language.implicitConversions - type CompareTuple[T] = (Array[T], Array[T]) type MatchTuple[T] = (Array[T], Array[T]) - + type StringTransform = Transform[Array[Char]] + type Transform[A] = (A => A) implicit def stringToCharArray(s: String): Array[Char] = s.toCharArray } diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala index 3abe7cc..bf1240c 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object MetaphoneAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - override def compute(a: Array[Char]): Option[Array[Char]] = if (a.length == 0 || !(Alpha isSuperset a.head)) None else { diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala index d06f774..dea1765 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object MetaphoneMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap { mp1 => diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala index 3e46675..5f00497 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object NysiisAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - override def compute(a: Array[Char]): Option[Array[Char]] = if (a.length == 0 || !(Alpha isSuperset a.head)) None else { diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala index c9a0914..96ea3f1 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object NysiisMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { val unequal: ((Char, Char) => Boolean) = (c1, c2) => { val lc1 = c1.toLower diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala index 9976847..d5055a2 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object RefinedNysiisAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - override def compute(a: Array[Char]): Option[Array[Char]] = if (a.length == 0 || !(Alpha isSuperset a.head)) None else { diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala index 488f261..57378ac 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object RefinedNysiisMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { val unequal = (c1: Char, c2: Char) => { val lc1 = c1.toLower diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala index e8f3af6..ab7db8f 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object RefinedSoundexAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compute(a: Array[Char]): Option[Array[Char]] = if (a.length == 0 || !(Alpha isSuperset a.head)) None else Some(transcode(a, Array(a.head.toLower))) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala index 289fe29..51b9456 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object RefinedSoundexMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None else if (a.head.toLower != b.head.toLower) Some(false) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala index b211908..7d24e2c 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object SoundexAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compute(a: Array[Char]): Option[Array[Char]] = if (a.length == 0 || !(Alpha isSuperset a.head)) None else { diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala index eca32db..242cbf0 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import com.rockymadden.stringmetric.Alphabet._ case object SoundexMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None else if (a.head.toLower != b.head.toLower) Some(false) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala index 0ad3915..e8be07c 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -1,15 +1,12 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ /** * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required. * Traditionally, the algorithm uses bigrams. */ final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import com.rockymadden.stringmetric.MatchTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala index 4a90f32..027d23d 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala @@ -1,10 +1,8 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ case object HammingMetric extends StringMetric[Int] { - import com.rockymadden.stringmetric.CompareTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Int] = if (a.length == 0 || b.length == 0 || a.length != b.length) None else if (a.sameElements(b)) Some(0) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala index 6ec5db4..f234c39 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -1,10 +1,8 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ final case class JaccardMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala index 575d67a..e992e92 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala @@ -1,16 +1,13 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric -import scala.Some +import com.rockymadden.stringmetric._ +import scala.collection.mutable.{ArrayBuffer, HashSet} /** * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios. */ case object JaroMetric extends StringMetric[Double] { - import com.rockymadden.stringmetric.{CompareTuple, MatchTuple} - import scala.collection.mutable.{ArrayBuffer, HashSet} - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (a.length == 0 || b.length == 0) None else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala index e83f73f..0ba4953 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala @@ -1,6 +1,6 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ /** * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala index fb90cdc..15c3372 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala @@ -1,10 +1,8 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ case object LevenshteinMetric extends StringMetric[Int] { - import com.rockymadden.stringmetric.CompareTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Int] = if (a.length == 0 || b.length == 0) None else if (a.sameElements(b)) Some(0) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala index 8c194ce..eadc853 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -1,12 +1,9 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import scala.math final case class NGramMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.MatchTuple - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import scala.math - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala index 8f0418b..6702101 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -1,12 +1,9 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ +import scala.math final case class OverlapMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.MatchTuple - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import scala.math - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala index fa113bc..e712015 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala @@ -1,10 +1,8 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ case object RatcliffObershelpMetric extends StringMetric[Double] { - import com.rockymadden.stringmetric.CompareTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (a.length == 0 || b.length == 0) None else if (a.sameElements(b)) Some(1d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala index ae6f49c..6ce96e3 100755 --- a/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala @@ -1,12 +1,10 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.Metric.StringMetric +import com.rockymadden.stringmetric._ final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) extends StringMetric[Double] { - import com.rockymadden.stringmetric.CompareTuple - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = if (a.length == 0 || b.length == 0) None else if (a.sameElements(b)) Some(0d) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala new file mode 100644 index 0000000..c3c5afc --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -0,0 +1,66 @@ +package com.rockymadden.stringmetric + +import scala.collection.immutable.NumericRange + +trait transform { + private val Ascii = NumericRange(0x00, 0x7F, 1) + private val ExtendedAscii = NumericRange(0x00, 0x7F, 1) + private val Latin = NumericRange(0x00, 0x24F, 1) + private val LowerCase = NumericRange(0x61, 0x7A, 1) + private val Numbers = NumericRange(0x30, 0x39, 1) + private val UpperCase = NumericRange(0x41, 0x5A, 1) + + private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filter(c => f(c)).mkString + + private val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filterNot(c => f(c)).mkString + + val filterAlpha: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) + + val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) + + val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) + + val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) + + val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) + + val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) + + val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) + + val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) + + val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) + + val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) + + val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) + + val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) + + val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) + + val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) + + val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) + + val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) + + val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) +} + +object transform extends transform diff --git a/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala index 43e5d80..0145aea 100755 --- a/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala @@ -1,8 +1,8 @@ package com.rockymadden.stringmetric -object AlphabetSpec extends org.specs2.mutable.SpecificationWithJUnit { - import Alphabet.{Alpha, Vowel} +import com.rockymadden.stringmetric.Alphabet._ +object AlphabetSpec extends org.specs2.mutable.SpecificationWithJUnit { "AlphabetSet isSuperset()" should { "return false with non-alphabet argument" in { Alpha isSuperset '0' must beFalse diff --git a/core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala index 15110dd..6366685 100644 --- a/core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala @@ -1,10 +1,9 @@ package com.rockymadden.stringmetric -object AlgorithmSpec extends org.specs2.mutable.SpecificationWithJUnit { - import phonetic._ - import Algorithm._ - import Transform._ +import com.rockymadden.stringmetric.phonetic._ +import com.rockymadden.stringmetric.transform._ +object StringAlgorithmSpec extends org.specs2.mutable.SpecificationWithJUnit { "StringAlgorithm convenience methods" should { "pass through" in { StringAlgorithm.computeWithMetaphone("testone").get must @@ -35,7 +34,7 @@ object AlgorithmSpec extends org.specs2.mutable.SpecificationWithJUnit { "StringAlgorithmDecorator withTransform()" should { "transform" in { - (MetaphoneAlgorithm withTransform StringTransform.filterAlpha).compute("abc123").get must + (MetaphoneAlgorithm withTransform filterAlpha).compute("abc123").get must beEqualTo(MetaphoneAlgorithm.compute("abc").get) } } diff --git a/core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala index a72889e..fbdc536 100644 --- a/core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala @@ -1,11 +1,10 @@ package com.rockymadden.stringmetric -object MetricSpec extends org.specs2.mutable.SpecificationWithJUnit { - import phonetic._ - import similarity._ - import Metric._ - import Transform._ +import com.rockymadden.stringmetric.phonetic._ +import com.rockymadden.stringmetric.similarity._ +import com.rockymadden.stringmetric.transform._ +object StringMetricSpec extends org.specs2.mutable.SpecificationWithJUnit { "StringMetric convenience methods" should { "pass through" in { StringMetric.compareWithDiceSorensen(1)("testone", "testtwo").get must @@ -54,9 +53,9 @@ object MetricSpec extends org.specs2.mutable.SpecificationWithJUnit { "StringMetricDecorator withTransform()" should { "transform" in { - (MetaphoneMetric withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get must + (MetaphoneMetric withTransform filterAlpha).compare("abc123", "abc456").get must beEqualTo(true) - (DiceSorensenMetric(1) withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get must + (DiceSorensenMetric(1) withTransform filterAlpha).compare("abc123", "abc456").get must beEqualTo(1.0) } } diff --git a/core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala index c133c66..0a994b7 100755 --- a/core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala @@ -1,8 +1,6 @@ package com.rockymadden.stringmetric -object TokenizeSpec extends org.specs2.mutable.SpecificationWithJUnit { - import Tokenize._ - +object StringTokenizerSpec extends org.specs2.mutable.SpecificationWithJUnit { "NGramTokenizer tokenize()" should { "return None with empty argument" in { NGramTokenizer(1).tokenize("").isDefined must beEqualTo(false) diff --git a/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala index c9c5029..01fa3a3 100644 --- a/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala @@ -1,19 +1,19 @@ package com.rockymadden.stringmetric -object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { - import Transform._ +import com.rockymadden.stringmetric.transform._ - "StringTransform filterAlpha()" should { +object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { + "filterAlpha()" should { "return transformed" in { - StringTransform.filterAlpha( + filterAlpha( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo("aBc".toCharArray) } } - "StringTransform filterNotAlpha()" should { + "filterNotAlpha()" should { "return transformed" in { - StringTransform.filterNotAlpha( + filterNotAlpha( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("123" + 0x250.toChar).toCharArray @@ -21,17 +21,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterAlphaNumeric()" should { + "filterAlphaNumeric()" should { "return transformed" in { - StringTransform.filterAlphaNumeric( + filterAlphaNumeric( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo("aBc123".toCharArray) } } - "StringTransform filterNotAlphaNumeric()" should { + "filterNotAlphaNumeric()" should { "return transformed" in { - StringTransform.filterNotAlphaNumeric( + filterNotAlphaNumeric( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("" + 0x250.toChar).toCharArray @@ -39,17 +39,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterAscii()" should { + "filterAscii()" should { "return transformed" in { - StringTransform.filterAscii( + filterAscii( ("aBc" + 0x80.toChar).toCharArray ) must beEqualTo("aBc".toCharArray) } } - "StringTransform filterNotAscii()" should { + "filterNotAscii()" should { "return transformed" in { - StringTransform.filterNotAscii( + filterNotAscii( ("aBc" + 0x100.toChar).toCharArray ) must beEqualTo( ("" + 0x100.toChar).toCharArray @@ -57,17 +57,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterExtendedAscii()" should { + "filterExtendedAscii()" should { "return transformed" in { - StringTransform.filterExtendedAscii( + filterExtendedAscii( ("aBc" + 0x100.toChar).toCharArray ) must beEqualTo("aBc".toCharArray) } } - "StringTransform filterNotExtendedAscii()" should { + "filterNotExtendedAscii()" should { "return transformed" in { - StringTransform.filterNotExtendedAscii( + filterNotExtendedAscii( ("aBc" + 0x250.toChar).toCharArray ) must beEqualTo( ("" + 0x250.toChar).toCharArray @@ -75,17 +75,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterLatin()" should { + "filterLatin()" should { "return transformed" in { - StringTransform.filterLatin( + filterLatin( ("aBc" + 0x250.toChar).toCharArray ) must beEqualTo("aBc".toCharArray) } } - "StringTransform filterNotLatin()" should { + "filterNotLatin()" should { "return transformed" in { - StringTransform.filterNotLatin( + filterNotLatin( ("aBc" + 0x300.toChar).toCharArray ) must beEqualTo( ("" + 0x300.toChar).toCharArray @@ -93,17 +93,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterLowerCase()" should { + "filterLowerCase()" should { "return transformed" in { - StringTransform.filterLowerCase( + filterLowerCase( "aBc123" + 0x250.toChar ) must beEqualTo("ac".toCharArray) } } - "StringTransform filterNotLowerCase()" should { + "filterNotLowerCase()" should { "return transformed" in { - StringTransform.filterNotLowerCase( + filterNotLowerCase( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("B123" + 0x250.toChar).toCharArray @@ -111,17 +111,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterNumeric()" should { + "filterNumeric()" should { "return transformed" in { - StringTransform.filterNumeric( + filterNumeric( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo("123".toCharArray) } } - "StringTransform filterNotNumeric()" should { + "filterNotNumeric()" should { "return transformed" in { - StringTransform.filterNotNumeric( + filterNotNumeric( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("aBc" + 0x250.toChar).toCharArray @@ -129,17 +129,17 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform filterUpperCase()" should { + "filterUpperCase()" should { "return transformed" in { - StringTransform.filterUpperCase( + filterUpperCase( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo("B".toCharArray) } } - "StringTransform filterNotUpperCase()" should { + "filterNotUpperCase()" should { "return transformed" in { - StringTransform.filterNotUpperCase( + filterNotUpperCase( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("ac123" + 0x250.toChar).toCharArray @@ -147,9 +147,9 @@ object TransformSpec extends org.specs2.mutable.SpecificationWithJUnit { } } - "StringTransform ignoreAlphaCase()" should { + "ignoreAlphaCase()" should { "return transformed" in { - StringTransform.ignoreAlphaCase( + ignoreAlphaCase( ("aBc123" + 0x250.toChar).toCharArray ) must beEqualTo( ("abc123" + 0x250.toChar).toCharArray diff --git a/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala new file mode 100644 index 0000000..01fa3a3 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala @@ -0,0 +1,159 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.transform._ + +object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { + "filterAlpha()" should { + "return transformed" in { + filterAlpha( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo("aBc".toCharArray) + } + } + + "filterNotAlpha()" should { + "return transformed" in { + filterNotAlpha( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("123" + 0x250.toChar).toCharArray + ) + } + } + + "filterAlphaNumeric()" should { + "return transformed" in { + filterAlphaNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo("aBc123".toCharArray) + } + } + + "filterNotAlphaNumeric()" should { + "return transformed" in { + filterNotAlphaNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("" + 0x250.toChar).toCharArray + ) + } + } + + "filterAscii()" should { + "return transformed" in { + filterAscii( + ("aBc" + 0x80.toChar).toCharArray + ) must beEqualTo("aBc".toCharArray) + } + } + + "filterNotAscii()" should { + "return transformed" in { + filterNotAscii( + ("aBc" + 0x100.toChar).toCharArray + ) must beEqualTo( + ("" + 0x100.toChar).toCharArray + ) + } + } + + "filterExtendedAscii()" should { + "return transformed" in { + filterExtendedAscii( + ("aBc" + 0x100.toChar).toCharArray + ) must beEqualTo("aBc".toCharArray) + } + } + + "filterNotExtendedAscii()" should { + "return transformed" in { + filterNotExtendedAscii( + ("aBc" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("" + 0x250.toChar).toCharArray + ) + } + } + + "filterLatin()" should { + "return transformed" in { + filterLatin( + ("aBc" + 0x250.toChar).toCharArray + ) must beEqualTo("aBc".toCharArray) + } + } + + "filterNotLatin()" should { + "return transformed" in { + filterNotLatin( + ("aBc" + 0x300.toChar).toCharArray + ) must beEqualTo( + ("" + 0x300.toChar).toCharArray + ) + } + } + + "filterLowerCase()" should { + "return transformed" in { + filterLowerCase( + "aBc123" + 0x250.toChar + ) must beEqualTo("ac".toCharArray) + } + } + + "filterNotLowerCase()" should { + "return transformed" in { + filterNotLowerCase( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("B123" + 0x250.toChar).toCharArray + ) + } + } + + "filterNumeric()" should { + "return transformed" in { + filterNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo("123".toCharArray) + } + } + + "filterNotNumeric()" should { + "return transformed" in { + filterNotNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("aBc" + 0x250.toChar).toCharArray + ) + } + } + + "filterUpperCase()" should { + "return transformed" in { + filterUpperCase( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo("B".toCharArray) + } + } + + "filterNotUpperCase()" should { + "return transformed" in { + filterNotUpperCase( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("ac123" + 0x250.toChar).toCharArray + ) + } + } + + "ignoreAlphaCase()" should { + "return transformed" in { + ignoreAlphaCase( + ("aBc123" + 0x250.toChar).toCharArray + ) must beEqualTo( + ("abc123" + 0x250.toChar).toCharArray + ) + } + } +} |