From ef997c15f2ac152b53f05669f1d6bb9aa00aded7 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Sat, 28 Dec 2013 11:49:33 -0700 Subject: A more functional structure. --- .../com/rockymadden/stringmetric/Algorithm.scala | 11 +- .../com/rockymadden/stringmetric/Alphabet.scala | 77 ++++++------- .../com/rockymadden/stringmetric/Filter.scala | 5 - .../com/rockymadden/stringmetric/Filterable.scala | 5 - .../com/rockymadden/stringmetric/Metric.scala | 11 +- .../rockymadden/stringmetric/StringAlgorithm.scala | 42 -------- .../rockymadden/stringmetric/StringFilter.scala | 45 -------- .../stringmetric/StringFilterable.scala | 5 - .../rockymadden/stringmetric/StringMetric.scala | 120 --------------------- .../rockymadden/stringmetric/StringTokenizer.scala | 14 --- .../com/rockymadden/stringmetric/Tokenizer.scala | 11 +- .../stringmetric/filter/AsciiControlFilter.scala | 11 -- .../filter/AsciiControlOnlyFilter.scala | 11 -- .../stringmetric/filter/AsciiLetterFilter.scala | 11 -- .../filter/AsciiLetterNumberFilter.scala | 15 --- .../filter/AsciiLetterNumberOnlyFilter.scala | 15 --- .../filter/AsciiLetterOnlyFilter.scala | 11 -- .../stringmetric/filter/AsciiNumberFilter.scala | 11 -- .../filter/AsciiNumberOnlyFilter.scala | 11 -- .../stringmetric/filter/AsciiSpaceFilter.scala | 10 -- .../stringmetric/filter/AsciiSymbolFilter.scala | 15 --- .../filter/AsciiSymbolOnlyFilter.scala | 15 --- .../filter/IgnoreAsciiLetterCaseFilter.scala | 11 -- .../stringmetric/filter/StringFilterDelegate.scala | 9 -- .../stringmetric/phonetic/MetaphoneAlgorithm.scala | 34 ++---- .../stringmetric/phonetic/MetaphoneMetric.scala | 33 ++---- .../stringmetric/phonetic/NysiisAlgorithm.scala | 40 +++---- .../stringmetric/phonetic/NysiisMetric.scala | 34 ++---- .../phonetic/RefinedNysiisAlgorithm.scala | 44 +++----- .../phonetic/RefinedNysiisMetric.scala | 34 ++---- .../phonetic/RefinedSoundexAlgorithm.scala | 31 ++---- .../phonetic/RefinedSoundexMetric.scala | 35 ++---- .../stringmetric/phonetic/SoundexAlgorithm.scala | 33 ++---- .../stringmetric/phonetic/SoundexMetric.scala | 35 ++---- .../similarity/DiceSorensenMetric.scala | 34 ++---- .../stringmetric/similarity/HammingMetric.scala | 37 ++----- .../stringmetric/similarity/JaccardMetric.scala | 34 ++---- .../stringmetric/similarity/JaroMetric.scala | 49 +++------ .../similarity/JaroWinklerMetric.scala | 29 ++--- .../similarity/LevenshteinMetric.scala | 34 ++---- .../stringmetric/similarity/NGramMetric.scala | 37 +++---- .../stringmetric/similarity/OverlapMetric.scala | 39 +++---- .../similarity/RatcliffObershelpMetric.scala | 36 ++----- .../similarity/WeightedLevenshteinMetric.scala | 44 ++------ .../stringmetric/tokenization/NGramTokenizer.scala | 28 ++--- 45 files changed, 266 insertions(+), 990 deletions(-) delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Filter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala (limited to 'core/source/main') diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala index 10bc2cd..bb823aa 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala @@ -1,5 +1,12 @@ package com.rockymadden.stringmetric -trait Algorithm[A, B, C] { - def compute(a: A)(implicit b: B): Option[C] +object Algorithm { + trait AlgorithmLike[A] { + def compute(a: A): Option[A] + } + + + trait StringAlgorithmLike extends AlgorithmLike[Array[Char]] { + def compute(a: String): Option[String] + } } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala index d2ede81..6d12dd4 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala @@ -3,53 +3,40 @@ package com.rockymadden.stringmetric import scala.collection.immutable.Set object Alphabet { - protected sealed abstract class AlphabetSet { - protected[Alphabet] val Chars: Set[Char] + sealed abstract class AlphabetLike(protected[Alphabet] val chars: Set[Char]) { + def isSuperset(a: Char): Boolean = chars.contains(a) - def isSuperset(char: Char): Boolean = Chars.contains(char) + def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length - def isSuperset(charArray: Array[Char]): Boolean = - charArray.length > 0 && charArray.takeWhile(Chars.contains(_)).length == charArray.length - - def isSuperset(string: String): Boolean = isSuperset(string.toCharArray) + def isSuperset(a: String): Boolean = isSuperset(a.toCharArray) } - case object LowercaseConsonant extends AlphabetSet { - override protected[Alphabet] final val Chars = - Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') - } - case object UppercaseConsonant extends AlphabetSet { - override protected[Alphabet] final val Chars = - Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') - } - case object Consonant extends AlphabetSet { - override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ UppercaseConsonant.Chars - } - case object LowercaseVowel extends AlphabetSet { - override protected[Alphabet] final val Chars = Set('a', 'e', 'i', 'o', 'u') - } - case object UppercaseVowel extends AlphabetSet { - override protected[Alphabet] final val Chars = Set('A', 'E', 'I', 'O', 'U') - } - case object Vowel extends AlphabetSet { - override protected[Alphabet] final val Chars = LowercaseVowel.Chars ++ UppercaseVowel.Chars - } - case object LowercaseY extends AlphabetSet { - override protected[Alphabet] final val Chars = Set('y') - } - case object UppercaseY extends AlphabetSet { - override protected[Alphabet] final val Chars = Set('Y') - } - case object Y extends AlphabetSet { - override protected[Alphabet] final val Chars = LowercaseY.Chars ++ UppercaseY.Chars - } - case object LowercaseAlpha extends AlphabetSet { - override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ LowercaseVowel.Chars ++ LowercaseY.Chars - } - case object UppercaseAlpha extends AlphabetSet { - override protected[Alphabet] final val Chars = UppercaseConsonant.Chars ++ UppercaseVowel.Chars ++ UppercaseY.Chars - } - case object Alpha extends AlphabetSet { - override protected[Alphabet] final val Chars = LowercaseAlpha.Chars ++ UppercaseAlpha.Chars - } + + case object LowercaseConsonant extends AlphabetLike( + Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') + ) + + case object UppercaseConsonant extends AlphabetLike( + Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') + ) + + case object Consonant extends AlphabetLike(LowercaseConsonant.chars ++ UppercaseConsonant.chars) + + case object LowercaseVowel extends AlphabetLike(Set('a', 'e', 'i', 'o', 'u')) + + case object UppercaseVowel extends AlphabetLike(Set('A', 'E', 'I', 'O', 'U')) + + case object Vowel extends AlphabetLike(LowercaseVowel.chars ++ UppercaseVowel.chars) + + case object LowercaseY extends AlphabetLike(Set('y')) + + case object UppercaseY extends AlphabetLike(Set('Y')) + + case object Y extends AlphabetLike(LowercaseY.chars ++ UppercaseY.chars) + + case object LowercaseAlpha extends AlphabetLike(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars) + + case object UppercaseAlpha extends AlphabetLike(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars) + + case object Alpha extends AlphabetLike(LowercaseAlpha.chars ++ UppercaseAlpha.chars) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala b/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala deleted file mode 100755 index 2a02f6b..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.rockymadden.stringmetric - -trait Filter[A] extends Filterable[A] { - override def filter(a: A): A = a -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala b/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala deleted file mode 100755 index 77dc0bf..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.rockymadden.stringmetric - -trait Filterable[A] { - def filter(a: A): A -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala index 6862321..3bbed88 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala @@ -1,5 +1,12 @@ package com.rockymadden.stringmetric -trait Metric[A, B, C] { - def compare(a1: A, a2: A)(implicit b: B): Option[C] +object Metric { + trait MetricLike[A, B] { + def compare(a1: A, a2: A): Option[B] + } + + + trait StringMetricLike[A] extends MetricLike[Array[Char], A] { + def compare(string1: String, string2: String): Option[A] + } } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala deleted file mode 100755 index 0d194da..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.rockymadden.stringmetric - -trait StringAlgorithm[A, B] extends Algorithm[String, A, B] { - def compute(charArray: Array[Char])(implicit a: A): Option[Array[Char]] -} - -object StringAlgorithm { - type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm - val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm - - type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm - val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm - - type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm - val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm - - type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm - val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm - - type Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm - val Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm - - def computeWithMetaphone(charArray: Array[Char]) = Metaphone.compute(charArray) - - def computeWithMetaphone(string: String) = Metaphone.compute(string) - - def computeWithNysiis(charArray: Array[Char]) = Nysiis.compute(charArray) - - def computeWithNysiis(string: String) = Nysiis.compute(string) - - def computeWithRefinedNysiis(charArray: Array[Char]) = RefinedNysiis.compute(charArray) - - def computeWithRefinedNysiis(string: String) = RefinedNysiis.compute(string) - - def computeWithRefinedSoundex(charArray: Array[Char]) = RefinedSoundex.compute(charArray) - - def computeWithRefinedSoundex(string: String) = RefinedSoundex.compute(string) - - def computeWithSoundex(charArray: Array[Char]) = Soundex.compute(charArray) - - def computeWithSoundex(string: String) = Soundex.compute(string) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala deleted file mode 100755 index 1430d34..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.rockymadden.stringmetric - -import com.rockymadden.stringmetric.filter.StringFilterDelegate - -trait StringFilter extends Filter[String] with StringFilterable { - override def filter(charArray: Array[Char]): Array[Char] = charArray -} - -object StringFilter { - type AsciiControl = com.rockymadden.stringmetric.filter.AsciiControlFilter - lazy val asciiControl = new StringFilterDelegate with AsciiControl - - type AsciiControlOnly = com.rockymadden.stringmetric.filter.AsciiControlOnlyFilter - lazy val asciiControlOnly = new StringFilterDelegate with AsciiControlOnly - - type AsciiLetterNumber = com.rockymadden.stringmetric.filter.AsciiLetterNumberFilter - lazy val asciiLetterNumber = new StringFilterDelegate with AsciiLetterNumber - - type AsciiLetterNumberOnly = com.rockymadden.stringmetric.filter.AsciiLetterNumberOnlyFilter - lazy val asciiLetterNumberOnly = new StringFilterDelegate with AsciiLetterNumberOnly - - type AsciiLetter = com.rockymadden.stringmetric.filter.AsciiLetterFilter - lazy val asciiLetter = new StringFilterDelegate with AsciiLetter - - type AsciiLetterOnly = com.rockymadden.stringmetric.filter.AsciiLetterOnlyFilter - lazy val asciiLetterOnly = new StringFilterDelegate with AsciiLetterOnly - - type AsciiNumber = com.rockymadden.stringmetric.filter.AsciiNumberFilter - lazy val asciiNumber = new StringFilterDelegate with AsciiNumber - - type AsciiNumberOnly = com.rockymadden.stringmetric.filter.AsciiNumberOnlyFilter - lazy val asciiNumberOnly = new StringFilterDelegate with AsciiNumberOnly - - type AsciiSpace = com.rockymadden.stringmetric.filter.AsciiSpaceFilter - lazy val asciiSpace = new StringFilterDelegate with AsciiSpace - - type AsciiSymbol = com.rockymadden.stringmetric.filter.AsciiSymbolFilter - lazy val asciiSymbol = new StringFilterDelegate with AsciiSymbol - - type AsciiSymbolOnly = com.rockymadden.stringmetric.filter.AsciiSymbolOnlyFilter - lazy val asciiSymbolOnly = new StringFilterDelegate with AsciiSymbolOnly - - type IgnoreAsciiLetterCase = com.rockymadden.stringmetric.filter.IgnoreAsciiLetterCaseFilter - lazy val ignoreAsciiLetterCase = new StringFilterDelegate with IgnoreAsciiLetterCase -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala deleted file mode 100755 index d639dfb..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.rockymadden.stringmetric - -trait StringFilterable extends Filterable[String] { - def filter(charArray: Array[Char]): Array[Char] -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala deleted file mode 100755 index 212f76d..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala +++ /dev/null @@ -1,120 +0,0 @@ -package com.rockymadden.stringmetric - -trait StringMetric[A, B] extends Metric[String, A, B] { - def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit a: A): Option[B] -} - -object StringMetric { - type DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric - val DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric - - type Hamming = com.rockymadden.stringmetric.similarity.HammingMetric - val Hamming = com.rockymadden.stringmetric.similarity.HammingMetric - - type Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric - val Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric - - type Jaro = com.rockymadden.stringmetric.similarity.JaroMetric - val Jaro = com.rockymadden.stringmetric.similarity.JaroMetric - - type JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric - val JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric - - type Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric - val Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric - - type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric - val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric - - type NGram = com.rockymadden.stringmetric.similarity.NGramMetric - val NGram = com.rockymadden.stringmetric.similarity.NGramMetric - - type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric - val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric - - type Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric - val Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric - - type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric - val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric - - type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric - val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric - - type Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric - val Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric - - type WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric - val WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric - - def compareWithDiceSorensen(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = - DiceSorensen.compare(charArray1, charArray2)(n) - - def compareWithDiceSorensen(string1: String, string2: String)(n: Int) = DiceSorensen.compare(string1, string2)(n) - - def compareWithHamming(charArray1: Array[Char], charArray2: Array[Char]) = Hamming.compare(charArray1, charArray2) - - def compareWithHamming(string1: String, string2: String)= Hamming.compare(string1, string2) - - def compareWithJaccard(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = - Jaccard.compare(charArray1, charArray2)(n) - - def compareWithJaccard(string1: String, string2: String)(n: Int) = Jaccard.compare(string1, string2)(n) - - def compareWithJaro(charArray1: Array[Char], charArray2: Array[Char]) = Jaro.compare(charArray1, charArray2) - - def compareWithJaro(string1: String, string2: String) = Jaro.compare(string1, string2) - - def compareWithJaroWinkler(charArray1: Array[Char], charArray2: Array[Char]) = - JaroWinkler.compare(charArray1, charArray2) - - def compareWithJaroWinkler(string1: String, string2: String) = JaroWinkler.compare(string1, string2) - - def compareWithLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) = - Levenshtein.compare(charArray1, charArray2) - - def compareWithLevenshtein(string1: String, string2: String) = Levenshtein.compare(string1, string2) - - def compareWithMetaphone(charArray1: Array[Char], charArray2: Array[Char]) = - Metaphone.compare(charArray1, charArray2) - - def compareWithMetaphone(string1: String, string2: String) = Metaphone.compare(string1, string2) - - def compareWithNGram(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = - NGram.compare(charArray1, charArray2)(n) - - def compareWithNGram(string1: String, string2: String)(n: Int) = NGram.compare(string1, string2)(n) - - def compareWithNysiis(charArray1: Array[Char], charArray2: Array[Char]) = Nysiis.compare(charArray1, charArray2) - - def compareWithNysiis(string1: String, string2: String) = Nysiis.compare(string1, string2) - - def compareWithOverlap(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = - Overlap.compare(charArray1, charArray2)(n) - - def compareWithOverlap(string1: String, string2: String)(n: Int) = Overlap.compare(string1, string2)(n) - - def compareWithRefinedNysiis(charArray1: Array[Char], charArray2: Array[Char]) = - RefinedNysiis.compare(charArray1, charArray2) - - def compareWithRefinedNysiis(string1: String, string2: String) = RefinedNysiis.compare(string1, string2) - - def compareWithRefinedSoundex(charArray1: Array[Char], charArray2: Array[Char]) = - RefinedSoundex.compare(charArray1, charArray2) - - def compareWithRefinedSoundex(string1: String, string2: String) = RefinedSoundex.compare(string1, string2) - - def compareWithSoundex(charArray1: Array[Char], charArray2: Array[Char]) = Soundex.compare(charArray1, charArray2) - - def compareWithSoundex(string1: String, string2: String) = Soundex.compare(string1, string2) - - def compareWithWeightedLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) - (options: (BigDecimal, BigDecimal, BigDecimal)) = - - WeightedLevenshtein.compare(charArray1, charArray2)(options) - - def compareWithWeightedLevenshtein(string1: String, string2: String) - (options: (BigDecimal, BigDecimal, BigDecimal)) = - - WeightedLevenshtein.compare(string1, string2)(options) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala deleted file mode 100755 index bef56d9..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala +++ /dev/null @@ -1,14 +0,0 @@ -package com.rockymadden.stringmetric - -trait StringTokenizer[A, B] extends Tokenizer[String, A, B] { - def tokenize(charArray: Array[Char])(implicit a: A): Option[Array[Array[Char]]] -} - -object StringTokenizer { - type NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer - val NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer - - def tokenizeWithNGram(charArray: Array[Char])(n: Int) = NGram.tokenize(charArray)(n) - - def tokenizeWithNGram(string: String)(n: Int) = NGram.tokenize(string)(n) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala index c9edae5..aae9742 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala @@ -1,5 +1,12 @@ package com.rockymadden.stringmetric -trait Tokenizer[A, B, C] { - def tokenize(a: A)(implicit b: B): Option[C] +object Tokenizer { + trait TokenizerLike[A] { + def tokenize(a: A): Option[Array[A]] + } + + + trait StringTokenizerLike extends TokenizerLike[Array[Char]] { + def tokenize(a: String): Option[Array[String]] + } } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala deleted file mode 100755 index bd45ecf..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII controls do not matter. */ -trait AsciiControlFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => !(c <= 31 || c == 127))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala deleted file mode 100755 index c08b686..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures only ASCII control characters matter. */ -trait AsciiControlOnlyFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => (c <= 31 || c == 127))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala deleted file mode 100755 index 24509cb..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII letters do not matter. */ -trait AsciiLetterFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => !((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala deleted file mode 100755 index e17c715..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII letters and numbers do not matter. */ -trait AsciiLetterNumberFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter( - charArray.filter(c => - !((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)) - ) - ) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala deleted file mode 100755 index 7cf97ba..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures only ASCII letters and numbers matter. */ -trait AsciiLetterNumberOnlyFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter( - charArray.filter(c => - ((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)) - ) - ) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala deleted file mode 100755 index 70032d9..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures only ASCII letters matter. */ -trait AsciiLetterOnlyFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => ((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala deleted file mode 100755 index 42fe77e..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII numbers do not matter. */ -trait AsciiNumberFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => !(c >= 48 && c <= 57))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala deleted file mode 100755 index 3f17099..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures only ASCII numbers matter. */ -trait AsciiNumberOnlyFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.filter(c => (c >= 48 && c <= 57 ))) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala deleted file mode 100755 index 538107d..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII spaces do not matter. */ -trait AsciiSpaceFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = super.filter(charArray.filter(_ != ' ')) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala deleted file mode 100755 index 7b0c810..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII symbols do not matter. */ -trait AsciiSymbolFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter( - charArray.filter(c => - !((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126)) - ) - ) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala deleted file mode 100755 index 5cb5e94..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures only ASCII symbols matter. */ -trait AsciiSymbolOnlyFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter( - charArray.filter(c => - ((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126)) - ) - ) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala deleted file mode 100755 index 54fe66f..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -/** Ensures ASCII letter case-sensitivity does not matter. */ -trait IgnoreAsciiLetterCaseFilter extends StringFilter { - abstract override def filter(charArray: Array[Char]): Array[Char] = - super.filter(charArray.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c)) - - abstract override def filter(string: String): String = filter(string.toCharArray).mkString -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala deleted file mode 100755 index 8ece42d..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala +++ /dev/null @@ -1,9 +0,0 @@ -package com.rockymadden.stringmetric.filter - -import com.rockymadden.stringmetric.StringFilter - -class StringFilterDelegate extends StringFilter { - override def filter(charArray: Array[Char]): Array[Char] = charArray - - override def filter(string: String): String = string -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala index 655a3a5..126f170 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -1,31 +1,27 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} -import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike -/** An implementation of the Metaphone algorithm. */ -class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => - final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { - val fca = filter(charArray) +case object MetaphoneAlgorithm extends StringAlgorithmLike { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None else { - val th = deduplicate(transcodeHead(fca.map(_.toLower))) + val th = deduplicate(transcodeHead(a.map(_.toLower))) val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. } - } - final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = - compute(string.toCharArray).map(_.mkString) + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - private[this] def deduplicate(ca: Array[Char]) = + private def deduplicate(ca: Array[Char]) = if (ca.length <= 1) ca else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last @annotation.tailrec - private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { if (c == '\0' && r.length == 0) o else { def shift(d: Int, ca: Array[Char]) = { @@ -93,7 +89,7 @@ class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: } } - private[this] def transcodeHead(ca: Array[Char]) = { + private def transcodeHead(ca: Array[Char]) = { (ca.length: @annotation.switch) match { case 0 => ca case 1 => if (ca.head == 'x') Array('s') else ca @@ -109,13 +105,3 @@ class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: } } } - -object MetaphoneAlgorithm { - private lazy val self = apply() - - def apply(): MetaphoneAlgorithm = new MetaphoneAlgorithm with StringFilter - - def compute(charArray: Array[Char]) = self.compute(charArray) - - def compute(string: String) = self.compute(string) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala index 2975ad3..083016c 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala @@ -1,32 +1,15 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringFilter, StringMetric} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the Metaphone metric. */ -class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Boolean] = { +case object MetaphoneMetric extends StringMetricLike[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 => - MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_)) + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap(mp1 => + MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_)) ) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = - compare(string1.toCharArray, string2.toCharArray) -} - -object MetaphoneMetric { - private lazy val self = apply() - - def apply(): MetaphoneMetric = new MetaphoneMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - def compare(string1: String, string2: String) = self.compare(string1, string2) + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala index cf16bbc..43c2bc2 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala @@ -1,16 +1,14 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} -import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike -/** An implementation of the NYSIIS algorithm. */ -class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => - final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { - val fca = filter(charArray) +case object NysiisAlgorithm extends StringAlgorithmLike { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None else { - val tr = transcodeRight(fca.map(_.toLower)) + val tr = transcodeRight(a.map(_.toLower)) val tl = transcodeLeft(tr._1) val t = if (tl._2.length == 0) tl._1 ++ tr._2 @@ -24,26 +22,24 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str if (t.length == 1) Some(t) else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail)))) } - } - final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = - compute(string.toCharArray).map(_.mkString) + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - private[this] def cleanLast(ca: Array[Char]) = + private def cleanLast(ca: Array[Char]) = if (ca.length == 0) ca else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length) else ca - private[this] def cleanTerminal(ca: Array[Char]) = + private def cleanTerminal(ca: Array[Char]) = if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' else ca - private[this] def deduplicate(ca: Array[Char]) = + private def deduplicate(ca: Array[Char]) = if (ca.length <= 1) ca else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last @annotation.tailrec - private[this] def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + private def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { if (c == '\0' && r.length == 0) o else { def shift(d: Int, ca: Array[Char]) = { @@ -86,7 +82,7 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str } } - private[this] def transcodeLeft(ca: Array[Char]) = { + private def transcodeLeft(ca: Array[Char]) = { if (ca.length == 0) (Array.empty[Char], ca) else { lazy val tr2 = ca.takeRight(ca.length - 2) @@ -103,7 +99,7 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str } } - private[this] def transcodeRight(ca: Array[Char]) = { + private def transcodeRight(ca: Array[Char]) = { if (ca.length >= 2) { val lc = ca(ca.length - 1) val lcm1 = ca(ca.length - 2) @@ -118,13 +114,3 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str } else (ca, Array.empty[Char]) } } - -object NysiisAlgorithm { - private lazy val self = apply() - - def apply(): NysiisAlgorithm = new NysiisAlgorithm with StringFilter - - def compute(charArray: Array[Char]) = self.compute(charArray) - - def compute(string: String) = self.compute(string) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala index 6d1c22c..6316981 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala @@ -1,13 +1,11 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringFilter, StringMetric} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the NYSIIS metric. */ -class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Boolean] = { +case object NysiisMetric extends StringMetricLike[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { val unequal = (c1: Char, c2: Char) => { val lc1 = c1.toLower val lc2 = c2.toLower @@ -15,26 +13,12 @@ class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFi (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) } - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else if (unequal(fca1.head, fca2.head)) Some(false) - else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 => - NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_)) + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (unequal(a.head, b.head)) Some(false) + else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(ny1 => + NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_)) ) } - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = - compare(string1.toCharArray, string2.toCharArray) -} - -object NysiisMetric { - private lazy val self = apply() - - def apply(): NysiisMetric = new NysiisMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - - def compare(string1: String, string2: String) = self.compare(string1, string2) + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala index 196681c..72bd84e 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -1,42 +1,38 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} -import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike -/** An implementation of the refined NYSIIS algorithm. */ -class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => - final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { - val fca = filter(charArray) +case object RefinedNysiisAlgorithm extends StringAlgorithmLike { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None else { - val lfca = fca.map(_.toLower) - val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z')))) + val lca = a.map(_.toLower) + val tlh = transcodeLast(transcodeHead(lca.head +: cleanLast(lca.tail, Set('s', 'z')))) val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char]) if (t.length == 1) Some(t) else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a'))))) } - } - final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = - compute(string.toCharArray).map(_.mkString) + override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) - private[this] def cleanLast(ca: Array[Char], s: Set[Char]) = + private def cleanLast(ca: Array[Char], s: Set[Char]) = if (ca.length == 0) ca else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length) else ca - private[this] def cleanTerminal(ca: Array[Char]) = + private def cleanTerminal(ca: Array[Char]) = if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' else ca - private[this] def deduplicate(ca: Array[Char]) = + private def deduplicate(ca: Array[Char]) = if (ca.length <= 1) ca else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last @annotation.tailrec - private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { if (c == '\0' && r.length == 0) o else { def shift(d: Int, ca: Array[Char]) = { @@ -95,7 +91,7 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th } } - private[this] def transcodeHead(ca: Array[Char]) = { + private def transcodeHead(ca: Array[Char]) = if (ca.length == 0) ca else (ca.head: @annotation.switch) match { @@ -103,9 +99,8 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2) case _ => ca } - } - private[this] def transcodeLast(ca: Array[Char]) = { + private def transcodeLast(ca: Array[Char]) = if (ca.length >= 2) { val lc = ca(ca.length - 1) val lcm1 = ca(ca.length - 2) @@ -120,15 +115,4 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th case _ => ca } } else ca - } -} - -object RefinedNysiisAlgorithm { - private lazy val self = apply() - - def apply(): RefinedNysiisAlgorithm = new RefinedNysiisAlgorithm with StringFilter - - def compute(charArray: Array[Char]) = self.compute(charArray) - - def compute(string: String) = self.compute(string) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala index c96cc52..73795a0 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -1,13 +1,11 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringFilter, StringMetric} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the refined NYSIIS metric. */ -class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Boolean] = { +case object RefinedNysiisMetric extends StringMetricLike[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { val unequal = (c1: Char, c2: Char) => { val lc1 = c1.toLower val lc2 = c2.toLower @@ -15,26 +13,12 @@ class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: S (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) } - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else if (unequal(fca1.head, fca2.head)) Some(false) - else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 => - RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_)) + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (unequal(a.head, b.head)) Some(false) + else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(rny1 => + RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_)) ) } - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = - compare(string1.toCharArray, string2.toCharArray) -} - -object RefinedNysiisMetric { - private lazy val self = apply() - - def apply(): RefinedNysiisMetric = new RefinedNysiisMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - - def compare(string1: String, string2: String) = self.compare(string1, string2) + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala index c1b0a6e..9f7fce9 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -1,22 +1,18 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike -/** An implementation of the refined Soundex algorithm. */ -class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => - final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { - val fca = filter(charArray) +case object RefinedSoundexAlgorithm extends StringAlgorithmLike { + import com.rockymadden.stringmetric.Alphabet.Alpha - if (fca.length == 0 || !(Alpha isSuperset fca.head)) None - else Some(transcode(fca, Array(fca.head.toLower))) - } + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else Some(transcode(a, Array(a.head.toLower))) - final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = - compute(string.toCharArray).map(_.mkString) + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) @annotation.tailrec - private[this] def transcode(i: Array[Char], o: Array[Char]): Array[Char] = { + private def transcode(i: Array[Char], o: Array[Char]): Array[Char] = if (i.length == 0) o else { val c = i.head.toLower @@ -60,15 +56,4 @@ class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { t transcode(i.tail, if (a != '\0') o :+ a else o) } - } -} - -object RefinedSoundexAlgorithm { - private lazy val self = apply() - - def apply(): RefinedSoundexAlgorithm = new RefinedSoundexAlgorithm with StringFilter - - def compute(charArray: Array[Char]) = self.compute(charArray) - - def compute(string: String) = self.compute(string) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala index eb2f01e..5ad0e30 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -1,33 +1,16 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringFilter, StringMetric} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the refined Soundex metric. */ -class RefinedSoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Boolean] = { +case object RefinedSoundexMetric extends StringMetricLike[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else if (fca1.head.toLower != fca2.head.toLower) Some(false) - else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 => - RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_)) + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (a.head.toLower != b.head.toLower) Some(false) + else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(rse1 => + RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_)) ) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = - compare(string1.toCharArray, string2.toCharArray) -} - -object RefinedSoundexMetric { - private lazy val self = apply() - - def apply(): RefinedSoundexMetric = new RefinedSoundexMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - def compare(string1: String, string2: String) = self.compare(string1, string2) + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala index 2f2bf99..d615144 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala @@ -1,26 +1,22 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike -/** An implementation of the Soundex algorithm. */ -class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => - final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { - val fca = filter(charArray) +case object SoundexAlgorithm extends StringAlgorithmLike { + import com.rockymadden.stringmetric.Alphabet.Alpha - if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None else { - val fc = fca.head.toLower + val fc = a.head.toLower - Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0')) + Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0')) } - } - final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = - compute(string.toCharArray).map(_.mkString) + override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) @annotation.tailrec - private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = { + private def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = if (i.length == 0) o else { val c = i.head.toLower @@ -58,15 +54,4 @@ class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: St if (o.length == 3 && a != '\0') o :+ a else transcode(i.tail, c, if (a != '\0') o :+ a else o) } - } -} - -object SoundexAlgorithm { - private lazy val self = apply() - - def apply(): SoundexAlgorithm = new SoundexAlgorithm with StringFilter - - def compute(charArray: Array[Char]) = self.compute(charArray) - - def compute(string: String) = self.compute(string) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala index e4daa17..7e0bf5c 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala @@ -1,33 +1,16 @@ package com.rockymadden.stringmetric.phonetic -import com.rockymadden.stringmetric.{StringFilter, StringMetric} -import com.rockymadden.stringmetric.Alphabet.Alpha +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the Soundex metric. */ -class SoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Boolean] = { +case object SoundexMetric extends StringMetricLike[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else if (fca1.head.toLower != fca2.head.toLower) Some(false) - else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 => - SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_)) + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (a.head.toLower != b.head.toLower) Some(false) + else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(se1 => + SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_)) ) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = - compare(string1.toCharArray, string2.toCharArray) -} - -object SoundexMetric { - private lazy val self = apply() - - def apply(): SoundexMetric = new SoundexMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - def compare(string1: String, string2: String) = self.compare(string1, string2) + final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala index 5e01bb1..8381921 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -1,23 +1,22 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} -import com.rockymadden.stringmetric.tokenization.NGramTokenizer +import com.rockymadden.stringmetric.Metric.StringMetricLike /** * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required. * Traditionally, the algorithm uses bigrams. */ -class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class DiceSorensenMetric(private val n: Int) extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.tokenization.NGramTokenizer + import com.rockymadden.stringmetric.MatchTuple - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { + if (n <= 0) return None - if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (fca1.sameElements(fca2)) Some(1d) - else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) (2d * ms) / (ca1bg.length + ca2bg.length) @@ -25,18 +24,7 @@ class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter } } - final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = - compare(string1.toCharArray, string2.toCharArray)(n: Int) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length } - -object DiceSorensenMetric { - private lazy val self = apply() - - def apply(): DiceSorensenMetric = new DiceSorensenMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) - - def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala index 95ff203..09b62bc 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala @@ -1,37 +1,18 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the Hamming metric. */ -class HammingMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Int] = { +case object HammingMetric extends StringMetricLike[Int] { + import com.rockymadden.stringmetric.CompareTuple - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Int] = + if (a.length == 0 || b.length == 0 || a.length != b.length) None + else if (a.sameElements(b)) Some(0) + else Some(hamming(a, b)) - if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None - else if (fca1.sameElements(fca2)) Some(0) - else Some(hamming(fca1, fca2)) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] = - compare(string1.toCharArray, string2.toCharArray) - - private[this] def hamming(ct: CompareTuple[Char]) = { - require(ct._1.length == ct._2.length) + override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) + private def hamming(ct: CompareTuple[Char]) = if (ct._1.length == 0) 0 else ct._1.zip(ct._2).count(t => t._1 != t._2) - } -} - -object HammingMetric { - private lazy val self = apply() - - def apply(): HammingMetric = new HammingMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - - def compare(string1: String, string2: String) = self.compare(string1, string2) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala index e32c926..e1fd4ed 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -1,20 +1,17 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, StringFilter} -import com.rockymadden.stringmetric.tokenization.NGramTokenizer +import com.rockymadden.stringmetric.Metric.StringMetricLike -/* An implementation of the Jaccard metric. */ -class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class JaccardMetric(private val n: Int) extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.tokenization.NGramTokenizer - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { + if (n <= 0) return None - if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (fca1.sameElements(fca2)) Some(1d) - else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length i.toDouble / (ca1bg.length + ca2bg.length - i) @@ -22,16 +19,5 @@ class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter => } } - final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = - compare(string1.toCharArray, string2.toCharArray)(n: Int) -} - -object JaccardMetric { - private lazy val self = apply() - - def apply(): JaccardMetric = new JaccardMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) - - def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala index b7ce2c5..b9d1434 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala @@ -1,38 +1,33 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{CompareTuple, MatchTuple, StringFilter, StringMetric} -import scala.collection.mutable.{ArrayBuffer, HashSet} +import com.rockymadden.stringmetric.Metric.StringMetricLike /** * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios. */ -class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Double] = { +case object JaroMetric extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.{CompareTuple, MatchTuple} + import scala.collection.mutable.{ArrayBuffer, HashSet} - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || fca2.length == 0) None - else if (fca1.sameElements(fca2)) Some(1d) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(1d) else { - val mt = `match`(fca1, fca2) + val mt = `match`(a, b) val ms = scoreMatches(mt._1, mt._2) if (ms == 0) Some(0d) else { val ts = scoreTranspositions(mt._1, mt._2) - Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3) + Some(((ms.toDouble / a.length) + (ms.toDouble / b.length) + ((ms.toDouble - ts) / ms)) / 3) } } - } - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = - compare(string1.toCharArray, string2.toCharArray) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = { + private def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = { lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) val one = ArrayBuffer.empty[Int] val two = HashSet.empty[Int] @@ -63,25 +58,7 @@ class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilte (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_))) } - private[this] def scoreMatches(mt: MatchTuple[Char]) = { - require(mt._1.length == mt._2.length) - - mt._1.length - } - - private[this] def scoreTranspositions(mt: MatchTuple[Char]) = { - require(mt._1.length == mt._2.length) - - (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt - } -} - -object JaroMetric { - private lazy val self = apply() - - def apply(): JaroMetric = new JaroMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + private def scoreMatches(mt: MatchTuple[Char]) = mt._1.length - def compare(string1: String, string2: String) = self.compare(string1, string2) + private def scoreTranspositions(mt: MatchTuple[Char]) = (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala index 4e9aebd..8d5d724 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala @@ -1,40 +1,23 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Metric.StringMetricLike /** * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722). */ -class JaroWinklerMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Double] = { - - val fca1 = filter(charArray1) - val fca2 = filter(charArray2) - - JaroMetric.compare(fca1, fca2).map { +case object JaroWinklerMetric extends StringMetricLike[Double] { + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + JaroMetric.compare(a, b).map { case 0d => 0d case 1d => 1d case jaro => { - val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2) + val prefix = a.zip(b).takeWhile(t => t._1 == t._2) jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro)) } } - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = - compare(string1.toCharArray, string2.toCharArray) -} - -object JaroWinklerMetric { - private lazy val self = apply() - - def apply(): JaroWinklerMetric = new JaroWinklerMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - def compare(string1: String, string2: String) = self.compare(string1, string2) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) } diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala index 47dff23..9f78aed 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala @@ -1,24 +1,18 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the Levenshtein metric. */ -class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Int] = { +case object LevenshteinMetric extends StringMetricLike[Int] { + import com.rockymadden.stringmetric.CompareTuple - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Int] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(0) + else Some(levenshtein(a, b)) - if (fca1.length == 0 || fca2.length == 0) None - else if (fca1.sameElements(fca2)) Some(0) - else Some(levenshtein(fca1, fca2)) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] = - compare(string1.toCharArray, string2.toCharArray) + override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) - private[this] def levenshtein(ct: CompareTuple[Char]) = { + private def levenshtein(ct: CompareTuple[Char]) = { val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1) def distance(t: (Int, Int)): Int = { @@ -46,13 +40,3 @@ class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringF distance(ct._1.length, ct._2.length) } } - -object LevenshteinMetric { - private lazy val self = apply() - - def apply(): LevenshteinMetric = new LevenshteinMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - - def compare(string1: String, string2: String) = self.compare(string1, string2) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala index e74e8eb..8025f38 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -1,21 +1,19 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} -import com.rockymadden.stringmetric.tokenization.NGramTokenizer -import scala.math +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the N-Gram metric. */ -class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class NGramMetric(private val n: Int) extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.MatchTuple + import com.rockymadden.stringmetric.tokenization.NGramTokenizer + import scala.math - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { + if (n <= 0) return None - if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (fca1.sameElements(fca2)) Some(1d) - else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) ms.toDouble / math.max(ca1bg.length, ca2bg.length) @@ -23,18 +21,7 @@ class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => } } - final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = - compare(string1.toCharArray, string2.toCharArray)(n) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length } - -object NGramMetric { - private lazy val self = apply() - - def apply(): NGramMetric = new NGramMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) - - def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala index a543a7e..3bfe604 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -1,40 +1,27 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} -import com.rockymadden.stringmetric.tokenization.NGramTokenizer -import scala.math +import com.rockymadden.stringmetric.Metric.StringMetricLike -/* An implementation of the overlap metric. */ -class OverlapMetric extends StringMetric[Int, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class OverlapMetric(private val n: Int) extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.MatchTuple + import com.rockymadden.stringmetric.tokenization.NGramTokenizer + import scala.math - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = { + if (n <= 0) return None - if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (fca1.sameElements(fca2)) Some(1d) - else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - ms.toDouble / (math.min(ca1bg.length, ca2bg.length)) + ms.toDouble / math.min(ca1bg.length, ca2bg.length) } } } - final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = - compare(string1.toCharArray, string2.toCharArray)(n: Int) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length } - -object OverlapMetric { - private lazy val self = apply() - - def apply(): OverlapMetric = new OverlapMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) - - def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala index 1017b1f..197e59a 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala @@ -1,24 +1,18 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of the Ratcliff/Obershelp metric. */ -class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit di: DummyImplicit): Option[Double] = { +case object RatcliffObershelpMetric extends StringMetricLike[Double] { + import com.rockymadden.stringmetric.CompareTuple - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(1d) + else Some(2d * commonSequences(a, b).foldLeft(0)(_ + _.length) / (a.length + b.length)) - if (fca1.length == 0 || fca2.length == 0) None - else if (fca1.sameElements(fca2)) Some(1d) - else Some(2d * commonSequences(fca1, fca2).foldLeft(0)(_ + _.length) / (fca1.length + fca2.length)) - } - - final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = - compare(string1.toCharArray, string2.toCharArray) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - private[this] def longestCommonSubsequence(ct: CompareTuple[Char]) = { + private def longestCommonSubsequence(ct: CompareTuple[Char]) = { val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1) var lrc = (0, 0, 0) // Length, row, column. @@ -33,7 +27,7 @@ class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this lrc } - private[this] def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = { + private def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = { val lcs = longestCommonSubsequence(ct) if (lcs._1 == 0) Array.empty @@ -45,13 +39,3 @@ class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this } } } - -object RatcliffObershelpMetric { - private lazy val self = apply() - - def apply(): RatcliffObershelpMetric = new RatcliffObershelpMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) - - def compare(string1: String, string2: String) = self.compare(string1, string2) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala index 976b01a..2564eb7 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala @@ -1,32 +1,18 @@ package com.rockymadden.stringmetric.similarity -import com.rockymadden.stringmetric.{CompareTuple, StringMetric, StringFilter} -import scala.math.BigDecimal +import com.rockymadden.stringmetric.Metric.StringMetricLike -/** An implementation of a weighted Levenshtein metric. */ -class WeightedLevenshteinMetric - extends StringMetric[(BigDecimal, BigDecimal, BigDecimal), Double] { this: StringFilter => +final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) + extends StringMetricLike[Double] { - /** Options order is delete, insert, then substitute weight. */ - final override def compare(charArray1: Array[Char], charArray2: Array[Char]) - (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = { + import com.rockymadden.stringmetric.CompareTuple - if (options._1 < 0 || options._2 < 0 || options._3 < 0) - throw new IllegalArgumentException("Expected valid weight options.") + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(0d) + else Some(weightedLevenshtein((a, b), (delete, insert, substitute)).toDouble) - val fca1 = filter(charArray1) - lazy val fca2 = filter(charArray2) - - if (fca1.length == 0 || fca2.length == 0) None - else if (fca1.sameElements(fca2)) Some(0d) - else Some(weightedLevenshtein((fca1, fca2), options).toDouble) - } - - /** Options order is delete, insert, then substitute weight. */ - final override def compare(string1: String, string2: String) - (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = - - compare(string1.toCharArray, string2.toCharArray)(options) + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) private[this] def weightedLevenshtein(ct: CompareTuple[Char], w: (BigDecimal, BigDecimal, BigDecimal)) = { val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1) @@ -47,15 +33,3 @@ class WeightedLevenshteinMetric m(ct._1.length)(ct._2.length) } } - -object WeightedLevenshteinMetric { - private lazy val self = apply() - - def apply(): WeightedLevenshteinMetric = new WeightedLevenshteinMetric with StringFilter - - def compare(charArray1: Array[Char], charArray2: Array[Char])(options: (BigDecimal, BigDecimal, BigDecimal)) = - self.compare(charArray1, charArray2)(options) - - def compare(string1: String, string2: String)(options: (BigDecimal, BigDecimal, BigDecimal)) = - self.compare(string1, string2)(options) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala index d858a59..209288f 100755 --- a/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala +++ b/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala @@ -1,20 +1,16 @@ package com.rockymadden.stringmetric.tokenization -import com.rockymadden.stringmetric.{StringFilter, StringTokenizer} +import com.rockymadden.stringmetric.Tokenizer.StringTokenizerLike -/** An implementation of the N-Gram tokenizer. */ -class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringFilter => - final override def tokenize(charArray: Array[Char])(implicit n: Int): Option[Array[Array[Char]]] = { - if (n <= 0) throw new IllegalArgumentException("Expected valid n.") +final case class NGramTokenizer(private val n: Int) extends StringTokenizerLike { + override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = { + if (n <= 0) return None - val fca = filter(charArray) - - if (fca.length < n) None - else Some(sequence(fca, Array.empty[Array[Char]], n)) + if (a.length < n) None + else Some(sequence(a, Array.empty[Array[Char]], n)) } - final override def tokenize(string: String)(implicit n: Int): Option[Array[String]] = - tokenize(string.toCharArray)(n).map(_.map(_.mkString)) + override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString)) @annotation.tailrec private[this] def sequence(i: Array[Char], o: Array[Array[Char]], n: Int): Array[Array[Char]] = { @@ -24,13 +20,3 @@ class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringF else sequence(i.tail, o :+ i.take(n), n) } } - -object NGramTokenizer { - private lazy val self = apply() - - def apply(): NGramTokenizer = new NGramTokenizer with StringFilter - - def tokenize(charArray: Array[Char])(n: Int) = self.tokenize(charArray)(n) - - def tokenize(string: String)(n: Int) = self.tokenize(string)(n) -} -- cgit v1.2.3