summaryrefslogtreecommitdiff
path: root/core/source/main/scala/com
diff options
context:
space:
mode:
Diffstat (limited to 'core/source/main/scala/com')
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala77
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Filter.scala5
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Filterable.scala5
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Metric.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala42
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala45
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala5
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala120
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala14
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala15
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala15
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala10
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala15
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala15
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala11
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala9
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala33
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala40
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala44
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala31
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala35
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala33
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala35
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala37
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala49
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala29
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala34
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala37
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala39
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala36
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala44
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala28
45 files changed, 266 insertions, 990 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala
index 10bc2cd..bb823aa 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala
@@ -1,5 +1,12 @@
package com.rockymadden.stringmetric
-trait Algorithm[A, B, C] {
- def compute(a: A)(implicit b: B): Option[C]
+object Algorithm {
+ trait AlgorithmLike[A] {
+ def compute(a: A): Option[A]
+ }
+
+
+ trait StringAlgorithmLike extends AlgorithmLike[Array[Char]] {
+ def compute(a: String): Option[String]
+ }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala
index d2ede81..6d12dd4 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala
@@ -3,53 +3,40 @@ package com.rockymadden.stringmetric
import scala.collection.immutable.Set
object Alphabet {
- protected sealed abstract class AlphabetSet {
- protected[Alphabet] val Chars: Set[Char]
+ sealed abstract class AlphabetLike(protected[Alphabet] val chars: Set[Char]) {
+ def isSuperset(a: Char): Boolean = chars.contains(a)
- def isSuperset(char: Char): Boolean = Chars.contains(char)
+ def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length
- def isSuperset(charArray: Array[Char]): Boolean =
- charArray.length > 0 && charArray.takeWhile(Chars.contains(_)).length == charArray.length
-
- def isSuperset(string: String): Boolean = isSuperset(string.toCharArray)
+ def isSuperset(a: String): Boolean = isSuperset(a.toCharArray)
}
- case object LowercaseConsonant extends AlphabetSet {
- override protected[Alphabet] final val Chars =
- Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z')
- }
- case object UppercaseConsonant extends AlphabetSet {
- override protected[Alphabet] final val Chars =
- Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z')
- }
- case object Consonant extends AlphabetSet {
- override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ UppercaseConsonant.Chars
- }
- case object LowercaseVowel extends AlphabetSet {
- override protected[Alphabet] final val Chars = Set('a', 'e', 'i', 'o', 'u')
- }
- case object UppercaseVowel extends AlphabetSet {
- override protected[Alphabet] final val Chars = Set('A', 'E', 'I', 'O', 'U')
- }
- case object Vowel extends AlphabetSet {
- override protected[Alphabet] final val Chars = LowercaseVowel.Chars ++ UppercaseVowel.Chars
- }
- case object LowercaseY extends AlphabetSet {
- override protected[Alphabet] final val Chars = Set('y')
- }
- case object UppercaseY extends AlphabetSet {
- override protected[Alphabet] final val Chars = Set('Y')
- }
- case object Y extends AlphabetSet {
- override protected[Alphabet] final val Chars = LowercaseY.Chars ++ UppercaseY.Chars
- }
- case object LowercaseAlpha extends AlphabetSet {
- override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ LowercaseVowel.Chars ++ LowercaseY.Chars
- }
- case object UppercaseAlpha extends AlphabetSet {
- override protected[Alphabet] final val Chars = UppercaseConsonant.Chars ++ UppercaseVowel.Chars ++ UppercaseY.Chars
- }
- case object Alpha extends AlphabetSet {
- override protected[Alphabet] final val Chars = LowercaseAlpha.Chars ++ UppercaseAlpha.Chars
- }
+
+ case object LowercaseConsonant extends AlphabetLike(
+ Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z')
+ )
+
+ case object UppercaseConsonant extends AlphabetLike(
+ Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z')
+ )
+
+ case object Consonant extends AlphabetLike(LowercaseConsonant.chars ++ UppercaseConsonant.chars)
+
+ case object LowercaseVowel extends AlphabetLike(Set('a', 'e', 'i', 'o', 'u'))
+
+ case object UppercaseVowel extends AlphabetLike(Set('A', 'E', 'I', 'O', 'U'))
+
+ case object Vowel extends AlphabetLike(LowercaseVowel.chars ++ UppercaseVowel.chars)
+
+ case object LowercaseY extends AlphabetLike(Set('y'))
+
+ case object UppercaseY extends AlphabetLike(Set('Y'))
+
+ case object Y extends AlphabetLike(LowercaseY.chars ++ UppercaseY.chars)
+
+ case object LowercaseAlpha extends AlphabetLike(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars)
+
+ case object UppercaseAlpha extends AlphabetLike(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars)
+
+ case object Alpha extends AlphabetLike(LowercaseAlpha.chars ++ UppercaseAlpha.chars)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala b/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala
deleted file mode 100755
index 2a02f6b..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Filter.scala
+++ /dev/null
@@ -1,5 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait Filter[A] extends Filterable[A] {
- override def filter(a: A): A = a
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala b/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala
deleted file mode 100755
index 77dc0bf..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Filterable.scala
+++ /dev/null
@@ -1,5 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait Filterable[A] {
- def filter(a: A): A
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala
index 6862321..3bbed88 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala
@@ -1,5 +1,12 @@
package com.rockymadden.stringmetric
-trait Metric[A, B, C] {
- def compare(a1: A, a2: A)(implicit b: B): Option[C]
+object Metric {
+ trait MetricLike[A, B] {
+ def compare(a1: A, a2: A): Option[B]
+ }
+
+
+ trait StringMetricLike[A] extends MetricLike[Array[Char], A] {
+ def compare(string1: String, string2: String): Option[A]
+ }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala
deleted file mode 100755
index 0d194da..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/StringAlgorithm.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait StringAlgorithm[A, B] extends Algorithm[String, A, B] {
- def compute(charArray: Array[Char])(implicit a: A): Option[Array[Char]]
-}
-
-object StringAlgorithm {
- type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm
- val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm
-
- type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm
- val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm
-
- type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm
- val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm
-
- type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm
- val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm
-
- type Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm
- val Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm
-
- def computeWithMetaphone(charArray: Array[Char]) = Metaphone.compute(charArray)
-
- def computeWithMetaphone(string: String) = Metaphone.compute(string)
-
- def computeWithNysiis(charArray: Array[Char]) = Nysiis.compute(charArray)
-
- def computeWithNysiis(string: String) = Nysiis.compute(string)
-
- def computeWithRefinedNysiis(charArray: Array[Char]) = RefinedNysiis.compute(charArray)
-
- def computeWithRefinedNysiis(string: String) = RefinedNysiis.compute(string)
-
- def computeWithRefinedSoundex(charArray: Array[Char]) = RefinedSoundex.compute(charArray)
-
- def computeWithRefinedSoundex(string: String) = RefinedSoundex.compute(string)
-
- def computeWithSoundex(charArray: Array[Char]) = Soundex.compute(charArray)
-
- def computeWithSoundex(string: String) = Soundex.compute(string)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala
deleted file mode 100755
index 1430d34..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/StringFilter.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-package com.rockymadden.stringmetric
-
-import com.rockymadden.stringmetric.filter.StringFilterDelegate
-
-trait StringFilter extends Filter[String] with StringFilterable {
- override def filter(charArray: Array[Char]): Array[Char] = charArray
-}
-
-object StringFilter {
- type AsciiControl = com.rockymadden.stringmetric.filter.AsciiControlFilter
- lazy val asciiControl = new StringFilterDelegate with AsciiControl
-
- type AsciiControlOnly = com.rockymadden.stringmetric.filter.AsciiControlOnlyFilter
- lazy val asciiControlOnly = new StringFilterDelegate with AsciiControlOnly
-
- type AsciiLetterNumber = com.rockymadden.stringmetric.filter.AsciiLetterNumberFilter
- lazy val asciiLetterNumber = new StringFilterDelegate with AsciiLetterNumber
-
- type AsciiLetterNumberOnly = com.rockymadden.stringmetric.filter.AsciiLetterNumberOnlyFilter
- lazy val asciiLetterNumberOnly = new StringFilterDelegate with AsciiLetterNumberOnly
-
- type AsciiLetter = com.rockymadden.stringmetric.filter.AsciiLetterFilter
- lazy val asciiLetter = new StringFilterDelegate with AsciiLetter
-
- type AsciiLetterOnly = com.rockymadden.stringmetric.filter.AsciiLetterOnlyFilter
- lazy val asciiLetterOnly = new StringFilterDelegate with AsciiLetterOnly
-
- type AsciiNumber = com.rockymadden.stringmetric.filter.AsciiNumberFilter
- lazy val asciiNumber = new StringFilterDelegate with AsciiNumber
-
- type AsciiNumberOnly = com.rockymadden.stringmetric.filter.AsciiNumberOnlyFilter
- lazy val asciiNumberOnly = new StringFilterDelegate with AsciiNumberOnly
-
- type AsciiSpace = com.rockymadden.stringmetric.filter.AsciiSpaceFilter
- lazy val asciiSpace = new StringFilterDelegate with AsciiSpace
-
- type AsciiSymbol = com.rockymadden.stringmetric.filter.AsciiSymbolFilter
- lazy val asciiSymbol = new StringFilterDelegate with AsciiSymbol
-
- type AsciiSymbolOnly = com.rockymadden.stringmetric.filter.AsciiSymbolOnlyFilter
- lazy val asciiSymbolOnly = new StringFilterDelegate with AsciiSymbolOnly
-
- type IgnoreAsciiLetterCase = com.rockymadden.stringmetric.filter.IgnoreAsciiLetterCaseFilter
- lazy val ignoreAsciiLetterCase = new StringFilterDelegate with IgnoreAsciiLetterCase
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala
deleted file mode 100755
index d639dfb..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/StringFilterable.scala
+++ /dev/null
@@ -1,5 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait StringFilterable extends Filterable[String] {
- def filter(charArray: Array[Char]): Array[Char]
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala
deleted file mode 100755
index 212f76d..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/StringMetric.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait StringMetric[A, B] extends Metric[String, A, B] {
- def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit a: A): Option[B]
-}
-
-object StringMetric {
- type DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric
- val DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric
-
- type Hamming = com.rockymadden.stringmetric.similarity.HammingMetric
- val Hamming = com.rockymadden.stringmetric.similarity.HammingMetric
-
- type Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric
- val Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric
-
- type Jaro = com.rockymadden.stringmetric.similarity.JaroMetric
- val Jaro = com.rockymadden.stringmetric.similarity.JaroMetric
-
- type JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric
- val JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric
-
- type Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric
- val Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric
-
- type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric
- val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric
-
- type NGram = com.rockymadden.stringmetric.similarity.NGramMetric
- val NGram = com.rockymadden.stringmetric.similarity.NGramMetric
-
- type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric
- val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric
-
- type Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric
- val Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric
-
- type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric
- val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric
-
- type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric
- val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric
-
- type Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric
- val Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric
-
- type WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric
- val WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric
-
- def compareWithDiceSorensen(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
- DiceSorensen.compare(charArray1, charArray2)(n)
-
- def compareWithDiceSorensen(string1: String, string2: String)(n: Int) = DiceSorensen.compare(string1, string2)(n)
-
- def compareWithHamming(charArray1: Array[Char], charArray2: Array[Char]) = Hamming.compare(charArray1, charArray2)
-
- def compareWithHamming(string1: String, string2: String)= Hamming.compare(string1, string2)
-
- def compareWithJaccard(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
- Jaccard.compare(charArray1, charArray2)(n)
-
- def compareWithJaccard(string1: String, string2: String)(n: Int) = Jaccard.compare(string1, string2)(n)
-
- def compareWithJaro(charArray1: Array[Char], charArray2: Array[Char]) = Jaro.compare(charArray1, charArray2)
-
- def compareWithJaro(string1: String, string2: String) = Jaro.compare(string1, string2)
-
- def compareWithJaroWinkler(charArray1: Array[Char], charArray2: Array[Char]) =
- JaroWinkler.compare(charArray1, charArray2)
-
- def compareWithJaroWinkler(string1: String, string2: String) = JaroWinkler.compare(string1, string2)
-
- def compareWithLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) =
- Levenshtein.compare(charArray1, charArray2)
-
- def compareWithLevenshtein(string1: String, string2: String) = Levenshtein.compare(string1, string2)
-
- def compareWithMetaphone(charArray1: Array[Char], charArray2: Array[Char]) =
- Metaphone.compare(charArray1, charArray2)
-
- def compareWithMetaphone(string1: String, string2: String) = Metaphone.compare(string1, string2)
-
- def compareWithNGram(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
- NGram.compare(charArray1, charArray2)(n)
-
- def compareWithNGram(string1: String, string2: String)(n: Int) = NGram.compare(string1, string2)(n)
-
- def compareWithNysiis(charArray1: Array[Char], charArray2: Array[Char]) = Nysiis.compare(charArray1, charArray2)
-
- def compareWithNysiis(string1: String, string2: String) = Nysiis.compare(string1, string2)
-
- def compareWithOverlap(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
- Overlap.compare(charArray1, charArray2)(n)
-
- def compareWithOverlap(string1: String, string2: String)(n: Int) = Overlap.compare(string1, string2)(n)
-
- def compareWithRefinedNysiis(charArray1: Array[Char], charArray2: Array[Char]) =
- RefinedNysiis.compare(charArray1, charArray2)
-
- def compareWithRefinedNysiis(string1: String, string2: String) = RefinedNysiis.compare(string1, string2)
-
- def compareWithRefinedSoundex(charArray1: Array[Char], charArray2: Array[Char]) =
- RefinedSoundex.compare(charArray1, charArray2)
-
- def compareWithRefinedSoundex(string1: String, string2: String) = RefinedSoundex.compare(string1, string2)
-
- def compareWithSoundex(charArray1: Array[Char], charArray2: Array[Char]) = Soundex.compare(charArray1, charArray2)
-
- def compareWithSoundex(string1: String, string2: String) = Soundex.compare(string1, string2)
-
- def compareWithWeightedLevenshtein(charArray1: Array[Char], charArray2: Array[Char])
- (options: (BigDecimal, BigDecimal, BigDecimal)) =
-
- WeightedLevenshtein.compare(charArray1, charArray2)(options)
-
- def compareWithWeightedLevenshtein(string1: String, string2: String)
- (options: (BigDecimal, BigDecimal, BigDecimal)) =
-
- WeightedLevenshtein.compare(string1, string2)(options)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala
deleted file mode 100755
index bef56d9..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/StringTokenizer.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package com.rockymadden.stringmetric
-
-trait StringTokenizer[A, B] extends Tokenizer[String, A, B] {
- def tokenize(charArray: Array[Char])(implicit a: A): Option[Array[Array[Char]]]
-}
-
-object StringTokenizer {
- type NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer
- val NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer
-
- def tokenizeWithNGram(charArray: Array[Char])(n: Int) = NGram.tokenize(charArray)(n)
-
- def tokenizeWithNGram(string: String)(n: Int) = NGram.tokenize(string)(n)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala
index c9edae5..aae9742 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/Tokenizer.scala
@@ -1,5 +1,12 @@
package com.rockymadden.stringmetric
-trait Tokenizer[A, B, C] {
- def tokenize(a: A)(implicit b: B): Option[C]
+object Tokenizer {
+ trait TokenizerLike[A] {
+ def tokenize(a: A): Option[Array[A]]
+ }
+
+
+ trait StringTokenizerLike extends TokenizerLike[Array[Char]] {
+ def tokenize(a: String): Option[Array[String]]
+ }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala
deleted file mode 100755
index bd45ecf..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII controls do not matter. */
-trait AsciiControlFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => !(c <= 31 || c == 127)))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala
deleted file mode 100755
index c08b686..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures only ASCII control characters matter. */
-trait AsciiControlOnlyFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => (c <= 31 || c == 127)))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala
deleted file mode 100755
index 24509cb..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII letters do not matter. */
-trait AsciiLetterFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => !((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala
deleted file mode 100755
index e17c715..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII letters and numbers do not matter. */
-trait AsciiLetterNumberFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(
- charArray.filter(c =>
- !((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))
- )
- )
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala
deleted file mode 100755
index 7cf97ba..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures only ASCII letters and numbers matter. */
-trait AsciiLetterNumberOnlyFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(
- charArray.filter(c =>
- ((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))
- )
- )
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala
deleted file mode 100755
index 70032d9..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures only ASCII letters matter. */
-trait AsciiLetterOnlyFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => ((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala
deleted file mode 100755
index 42fe77e..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII numbers do not matter. */
-trait AsciiNumberFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => !(c >= 48 && c <= 57)))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala
deleted file mode 100755
index 3f17099..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures only ASCII numbers matter. */
-trait AsciiNumberOnlyFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.filter(c => (c >= 48 && c <= 57 )))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala
deleted file mode 100755
index 538107d..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala
+++ /dev/null
@@ -1,10 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII spaces do not matter. */
-trait AsciiSpaceFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] = super.filter(charArray.filter(_ != ' '))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala
deleted file mode 100755
index 7b0c810..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII symbols do not matter. */
-trait AsciiSymbolFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(
- charArray.filter(c =>
- !((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126))
- )
- )
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala
deleted file mode 100755
index 5cb5e94..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures only ASCII symbols matter. */
-trait AsciiSymbolOnlyFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(
- charArray.filter(c =>
- ((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126))
- )
- )
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala
deleted file mode 100755
index 54fe66f..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-/** Ensures ASCII letter case-sensitivity does not matter. */
-trait IgnoreAsciiLetterCaseFilter extends StringFilter {
- abstract override def filter(charArray: Array[Char]): Array[Char] =
- super.filter(charArray.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c))
-
- abstract override def filter(string: String): String = filter(string.toCharArray).mkString
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala b/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala
deleted file mode 100755
index 8ece42d..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala
+++ /dev/null
@@ -1,9 +0,0 @@
-package com.rockymadden.stringmetric.filter
-
-import com.rockymadden.stringmetric.StringFilter
-
-class StringFilterDelegate extends StringFilter {
- override def filter(charArray: Array[Char]): Array[Char] = charArray
-
- override def filter(string: String): String = string
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
index 655a3a5..126f170 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -1,31 +1,27 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
-import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike
-/** An implementation of the Metaphone algorithm. */
-class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
- final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
- val fca = filter(charArray)
+case object MetaphoneAlgorithm extends StringAlgorithmLike {
+ import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
- if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ override def compute(a: Array[Char]): Option[Array[Char]] =
+ if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
- val th = deduplicate(transcodeHead(fca.map(_.toLower)))
+ val th = deduplicate(transcodeHead(a.map(_.toLower)))
val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
}
- }
- final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
- compute(string.toCharArray).map(_.mkString)
+ override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
- private[this] def deduplicate(ca: Array[Char]) =
+ private def deduplicate(ca: Array[Char]) =
if (ca.length <= 1) ca
else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -93,7 +89,7 @@ class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this:
}
}
- private[this] def transcodeHead(ca: Array[Char]) = {
+ private def transcodeHead(ca: Array[Char]) = {
(ca.length: @annotation.switch) match {
case 0 => ca
case 1 => if (ca.head == 'x') Array('s') else ca
@@ -109,13 +105,3 @@ class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this:
}
}
}
-
-object MetaphoneAlgorithm {
- private lazy val self = apply()
-
- def apply(): MetaphoneAlgorithm = new MetaphoneAlgorithm with StringFilter
-
- def compute(charArray: Array[Char]) = self.compute(charArray)
-
- def compute(string: String) = self.compute(string)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
index 2975ad3..083016c 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
@@ -1,32 +1,15 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the Metaphone metric. */
-class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Boolean] = {
+case object MetaphoneMetric extends StringMetricLike[Boolean] {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
- else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 =>
- MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_))
+ override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
+ if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
+ else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap(mp1 =>
+ MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_))
)
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object MetaphoneMetric {
- private lazy val self = apply()
-
- def apply(): MetaphoneMetric = new MetaphoneMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
index cf16bbc..43c2bc2 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -1,16 +1,14 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
-import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike
-/** An implementation of the NYSIIS algorithm. */
-class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
- final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
- val fca = filter(charArray)
+case object NysiisAlgorithm extends StringAlgorithmLike {
+ import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
- if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ override def compute(a: Array[Char]): Option[Array[Char]] =
+ if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
- val tr = transcodeRight(fca.map(_.toLower))
+ val tr = transcodeRight(a.map(_.toLower))
val tl = transcodeLeft(tr._1)
val t =
if (tl._2.length == 0) tl._1 ++ tr._2
@@ -24,26 +22,24 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str
if (t.length == 1) Some(t)
else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail))))
}
- }
- final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
- compute(string.toCharArray).map(_.mkString)
+ override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
- private[this] def cleanLast(ca: Array[Char]) =
+ private def cleanLast(ca: Array[Char]) =
if (ca.length == 0) ca
else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
else ca
- private[this] def cleanTerminal(ca: Array[Char]) =
+ private def cleanTerminal(ca: Array[Char]) =
if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
else ca
- private[this] def deduplicate(ca: Array[Char]) =
+ private def deduplicate(ca: Array[Char]) =
if (ca.length <= 1) ca
else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private[this] def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -86,7 +82,7 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str
}
}
- private[this] def transcodeLeft(ca: Array[Char]) = {
+ private def transcodeLeft(ca: Array[Char]) = {
if (ca.length == 0) (Array.empty[Char], ca)
else {
lazy val tr2 = ca.takeRight(ca.length - 2)
@@ -103,7 +99,7 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str
}
}
- private[this] def transcodeRight(ca: Array[Char]) = {
+ private def transcodeRight(ca: Array[Char]) = {
if (ca.length >= 2) {
val lc = ca(ca.length - 1)
val lcm1 = ca(ca.length - 2)
@@ -118,13 +114,3 @@ class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: Str
} else (ca, Array.empty[Char])
}
}
-
-object NysiisAlgorithm {
- private lazy val self = apply()
-
- def apply(): NysiisAlgorithm = new NysiisAlgorithm with StringFilter
-
- def compute(charArray: Array[Char]) = self.compute(charArray)
-
- def compute(string: String) = self.compute(string)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
index 6d1c22c..6316981 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
@@ -1,13 +1,11 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the NYSIIS metric. */
-class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Boolean] = {
+case object NysiisMetric extends StringMetricLike[Boolean] {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
+ override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = {
val unequal = (c1: Char, c2: Char) => {
val lc1 = c1.toLower
val lc2 = c2.toLower
@@ -15,26 +13,12 @@ class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFi
(if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
}
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
- else if (unequal(fca1.head, fca2.head)) Some(false)
- else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 =>
- NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_))
+ if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
+ else if (unequal(a.head, b.head)) Some(false)
+ else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(ny1 =>
+ NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_))
)
}
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object NysiisMetric {
- private lazy val self = apply()
-
- def apply(): NysiisMetric = new NysiisMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
-
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
index 196681c..72bd84e 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -1,42 +1,38 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
-import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike
-/** An implementation of the refined NYSIIS algorithm. */
-class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
- final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
- val fca = filter(charArray)
+case object RefinedNysiisAlgorithm extends StringAlgorithmLike {
+ import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
- if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ override def compute(a: Array[Char]): Option[Array[Char]] =
+ if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
- val lfca = fca.map(_.toLower)
- val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z'))))
+ val lca = a.map(_.toLower)
+ val tlh = transcodeLast(transcodeHead(lca.head +: cleanLast(lca.tail, Set('s', 'z'))))
val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char])
if (t.length == 1) Some(t)
else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a')))))
}
- }
- final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
- compute(string.toCharArray).map(_.mkString)
+ override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
- private[this] def cleanLast(ca: Array[Char], s: Set[Char]) =
+ private def cleanLast(ca: Array[Char], s: Set[Char]) =
if (ca.length == 0) ca
else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length)
else ca
- private[this] def cleanTerminal(ca: Array[Char]) =
+ private def cleanTerminal(ca: Array[Char]) =
if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
else ca
- private[this] def deduplicate(ca: Array[Char]) =
+ private def deduplicate(ca: Array[Char]) =
if (ca.length <= 1) ca
else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -95,7 +91,7 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th
}
}
- private[this] def transcodeHead(ca: Array[Char]) = {
+ private def transcodeHead(ca: Array[Char]) =
if (ca.length == 0) ca
else
(ca.head: @annotation.switch) match {
@@ -103,9 +99,8 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th
case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2)
case _ => ca
}
- }
- private[this] def transcodeLast(ca: Array[Char]) = {
+ private def transcodeLast(ca: Array[Char]) =
if (ca.length >= 2) {
val lc = ca(ca.length - 1)
val lcm1 = ca(ca.length - 2)
@@ -120,15 +115,4 @@ class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { th
case _ => ca
}
} else ca
- }
-}
-
-object RefinedNysiisAlgorithm {
- private lazy val self = apply()
-
- def apply(): RefinedNysiisAlgorithm = new RefinedNysiisAlgorithm with StringFilter
-
- def compute(charArray: Array[Char]) = self.compute(charArray)
-
- def compute(string: String) = self.compute(string)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
index c96cc52..73795a0 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -1,13 +1,11 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the refined NYSIIS metric. */
-class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Boolean] = {
+case object RefinedNysiisMetric extends StringMetricLike[Boolean] {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
+ override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = {
val unequal = (c1: Char, c2: Char) => {
val lc1 = c1.toLower
val lc2 = c2.toLower
@@ -15,26 +13,12 @@ class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: S
(if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
}
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
- else if (unequal(fca1.head, fca2.head)) Some(false)
- else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 =>
- RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_))
+ if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
+ else if (unequal(a.head, b.head)) Some(false)
+ else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(rny1 =>
+ RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_))
)
}
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object RefinedNysiisMetric {
- private lazy val self = apply()
-
- def apply(): RefinedNysiisMetric = new RefinedNysiisMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
-
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
index c1b0a6e..9f7fce9 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
@@ -1,22 +1,18 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike
-/** An implementation of the refined Soundex algorithm. */
-class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
- final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
- val fca = filter(charArray)
+case object RefinedSoundexAlgorithm extends StringAlgorithmLike {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
- if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
- else Some(transcode(fca, Array(fca.head.toLower)))
- }
+ override def compute(a: Array[Char]): Option[Array[Char]] =
+ if (a.length == 0 || !(Alpha isSuperset a.head)) None
+ else Some(transcode(a, Array(a.head.toLower)))
- final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
- compute(string.toCharArray).map(_.mkString)
+ override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
@annotation.tailrec
- private[this] def transcode(i: Array[Char], o: Array[Char]): Array[Char] = {
+ private def transcode(i: Array[Char], o: Array[Char]): Array[Char] =
if (i.length == 0) o
else {
val c = i.head.toLower
@@ -60,15 +56,4 @@ class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { t
transcode(i.tail, if (a != '\0') o :+ a else o)
}
- }
-}
-
-object RefinedSoundexAlgorithm {
- private lazy val self = apply()
-
- def apply(): RefinedSoundexAlgorithm = new RefinedSoundexAlgorithm with StringFilter
-
- def compute(charArray: Array[Char]) = self.compute(charArray)
-
- def compute(string: String) = self.compute(string)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
index eb2f01e..5ad0e30 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -1,33 +1,16 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the refined Soundex metric. */
-class RefinedSoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Boolean] = {
+case object RefinedSoundexMetric extends StringMetricLike[Boolean] {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
- else if (fca1.head.toLower != fca2.head.toLower) Some(false)
- else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 =>
- RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_))
+ override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
+ if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
+ else if (a.head.toLower != b.head.toLower) Some(false)
+ else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(rse1 =>
+ RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_))
)
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object RefinedSoundexMetric {
- private lazy val self = apply()
-
- def apply(): RefinedSoundexMetric = new RefinedSoundexMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
index 2f2bf99..d615144 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
@@ -1,26 +1,22 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Algorithm.StringAlgorithmLike
-/** An implementation of the Soundex algorithm. */
-class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
- final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
- val fca = filter(charArray)
+case object SoundexAlgorithm extends StringAlgorithmLike {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
- if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ override def compute(a: Array[Char]): Option[Array[Char]] =
+ if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
- val fc = fca.head.toLower
+ val fc = a.head.toLower
- Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0'))
+ Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0'))
}
- }
- final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
- compute(string.toCharArray).map(_.mkString)
+ override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
@annotation.tailrec
- private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = {
+ private def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] =
if (i.length == 0) o
else {
val c = i.head.toLower
@@ -58,15 +54,4 @@ class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: St
if (o.length == 3 && a != '\0') o :+ a
else transcode(i.tail, c, if (a != '\0') o :+ a else o)
}
- }
-}
-
-object SoundexAlgorithm {
- private lazy val self = apply()
-
- def apply(): SoundexAlgorithm = new SoundexAlgorithm with StringFilter
-
- def compute(charArray: Array[Char]) = self.compute(charArray)
-
- def compute(string: String) = self.compute(string)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
index e4daa17..7e0bf5c 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
@@ -1,33 +1,16 @@
package com.rockymadden.stringmetric.phonetic
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
-import com.rockymadden.stringmetric.Alphabet.Alpha
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the Soundex metric. */
-class SoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Boolean] = {
+case object SoundexMetric extends StringMetricLike[Boolean] {
+ import com.rockymadden.stringmetric.Alphabet.Alpha
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
- else if (fca1.head.toLower != fca2.head.toLower) Some(false)
- else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 =>
- SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_))
+ override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
+ if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
+ else if (a.head.toLower != b.head.toLower) Some(false)
+ else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(se1 =>
+ SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_))
)
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object SoundexMetric {
- private lazy val self = apply()
-
- def apply(): SoundexMetric = new SoundexMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
index 5e01bb1..8381921 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
@@ -1,23 +1,22 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
-import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+import com.rockymadden.stringmetric.Metric.StringMetricLike
/**
* An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required.
* Traditionally, the algorithm uses bigrams.
*/
-class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
- if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+final case class DiceSorensenMetric(private val n: Int) extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+ import com.rockymadden.stringmetric.MatchTuple
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
+ if (n <= 0) return None
- if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (fca1.sameElements(fca2)) Some(1d)
- else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
- NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (a.sameElements(b)) Some(1d)
+ else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
+ NGramTokenizer(n).tokenize(b).map { ca2bg =>
val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
(2d * ms) / (ca1bg.length + ca2bg.length)
@@ -25,18 +24,7 @@ class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter
}
}
- final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)(n: Int)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
}
-
-object DiceSorensenMetric {
- private lazy val self = apply()
-
- def apply(): DiceSorensenMetric = new DiceSorensenMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
-
- def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
index 95ff203..09b62bc 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
@@ -1,37 +1,18 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the Hamming metric. */
-class HammingMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Int] = {
+case object HammingMetric extends StringMetricLike[Int] {
+ import com.rockymadden.stringmetric.CompareTuple
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Int] =
+ if (a.length == 0 || b.length == 0 || a.length != b.length) None
+ else if (a.sameElements(b)) Some(0)
+ else Some(hamming(a, b))
- if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None
- else if (fca1.sameElements(fca2)) Some(0)
- else Some(hamming(fca1, fca2))
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] =
- compare(string1.toCharArray, string2.toCharArray)
-
- private[this] def hamming(ct: CompareTuple[Char]) = {
- require(ct._1.length == ct._2.length)
+ override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
+ private def hamming(ct: CompareTuple[Char]) =
if (ct._1.length == 0) 0
else ct._1.zip(ct._2).count(t => t._1 != t._2)
- }
-}
-
-object HammingMetric {
- private lazy val self = apply()
-
- def apply(): HammingMetric = new HammingMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
-
- def compare(string1: String, string2: String) = self.compare(string1, string2)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
index e32c926..e1fd4ed 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
@@ -1,20 +1,17 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringMetric, StringFilter}
-import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/* An implementation of the Jaccard metric. */
-class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
- if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+final case class JaccardMetric(private val n: Int) extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.tokenization.NGramTokenizer
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
+ if (n <= 0) return None
- if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (fca1.sameElements(fca2)) Some(1d)
- else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
- NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (a.sameElements(b)) Some(1d)
+ else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
+ NGramTokenizer(n).tokenize(b).map { ca2bg =>
val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length
i.toDouble / (ca1bg.length + ca2bg.length - i)
@@ -22,16 +19,5 @@ class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter =>
}
}
- final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)(n: Int)
-}
-
-object JaccardMetric {
- private lazy val self = apply()
-
- def apply(): JaccardMetric = new JaccardMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
-
- def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
index b7ce2c5..b9d1434 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
@@ -1,38 +1,33 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{CompareTuple, MatchTuple, StringFilter, StringMetric}
-import scala.collection.mutable.{ArrayBuffer, HashSet}
+import com.rockymadden.stringmetric.Metric.StringMetricLike
/**
* An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched
* in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios.
*/
-class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Double] = {
+case object JaroMetric extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.{CompareTuple, MatchTuple}
+ import scala.collection.mutable.{ArrayBuffer, HashSet}
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || fca2.length == 0) None
- else if (fca1.sameElements(fca2)) Some(1d)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (a.length == 0 || b.length == 0) None
+ else if (a.sameElements(b)) Some(1d)
else {
- val mt = `match`(fca1, fca2)
+ val mt = `match`(a, b)
val ms = scoreMatches(mt._1, mt._2)
if (ms == 0) Some(0d)
else {
val ts = scoreTranspositions(mt._1, mt._2)
- Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3)
+ Some(((ms.toDouble / a.length) + (ms.toDouble / b.length) + ((ms.toDouble - ts) / ms)) / 3)
}
}
- }
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = {
+ private def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = {
lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1)
val one = ArrayBuffer.empty[Int]
val two = HashSet.empty[Int]
@@ -63,25 +58,7 @@ class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilte
(one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_)))
}
- private[this] def scoreMatches(mt: MatchTuple[Char]) = {
- require(mt._1.length == mt._2.length)
-
- mt._1.length
- }
-
- private[this] def scoreTranspositions(mt: MatchTuple[Char]) = {
- require(mt._1.length == mt._2.length)
-
- (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
- }
-}
-
-object JaroMetric {
- private lazy val self = apply()
-
- def apply(): JaroMetric = new JaroMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+ private def scoreMatches(mt: MatchTuple[Char]) = mt._1.length
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ private def scoreTranspositions(mt: MatchTuple[Char]) = (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
index 4e9aebd..8d5d724 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
@@ -1,40 +1,23 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Metric.StringMetricLike
/**
* An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is
* matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios
* (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722).
*/
-class JaroWinklerMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Double] = {
-
- val fca1 = filter(charArray1)
- val fca2 = filter(charArray2)
-
- JaroMetric.compare(fca1, fca2).map {
+case object JaroWinklerMetric extends StringMetricLike[Double] {
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ JaroMetric.compare(a, b).map {
case 0d => 0d
case 1d => 1d
case jaro => {
- val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2)
+ val prefix = a.zip(b).takeWhile(t => t._1 == t._2)
jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro))
}
}
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)
-}
-
-object JaroWinklerMetric {
- private lazy val self = apply()
-
- def apply(): JaroWinklerMetric = new JaroWinklerMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
- def compare(string1: String, string2: String) = self.compare(string1, string2)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
index 47dff23..9f78aed 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
@@ -1,24 +1,18 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the Levenshtein metric. */
-class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Int] = {
+case object LevenshteinMetric extends StringMetricLike[Int] {
+ import com.rockymadden.stringmetric.CompareTuple
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Int] =
+ if (a.length == 0 || b.length == 0) None
+ else if (a.sameElements(b)) Some(0)
+ else Some(levenshtein(a, b))
- if (fca1.length == 0 || fca2.length == 0) None
- else if (fca1.sameElements(fca2)) Some(0)
- else Some(levenshtein(fca1, fca2))
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] =
- compare(string1.toCharArray, string2.toCharArray)
+ override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
- private[this] def levenshtein(ct: CompareTuple[Char]) = {
+ private def levenshtein(ct: CompareTuple[Char]) = {
val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1)
def distance(t: (Int, Int)): Int = {
@@ -46,13 +40,3 @@ class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringF
distance(ct._1.length, ct._2.length)
}
}
-
-object LevenshteinMetric {
- private lazy val self = apply()
-
- def apply(): LevenshteinMetric = new LevenshteinMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
-
- def compare(string1: String, string2: String) = self.compare(string1, string2)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
index e74e8eb..8025f38 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
@@ -1,21 +1,19 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
-import com.rockymadden.stringmetric.tokenization.NGramTokenizer
-import scala.math
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the N-Gram metric. */
-class NGramMetric extends StringMetric[Int, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
- if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+final case class NGramMetric(private val n: Int) extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.MatchTuple
+ import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+ import scala.math
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
+ if (n <= 0) return None
- if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (fca1.sameElements(fca2)) Some(1d)
- else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
- NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (a.sameElements(b)) Some(1d)
+ else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
+ NGramTokenizer(n).tokenize(b).map { ca2bg =>
val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString)))
ms.toDouble / math.max(ca1bg.length, ca2bg.length)
@@ -23,18 +21,7 @@ class NGramMetric extends StringMetric[Int, Double] { this: StringFilter =>
}
}
- final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)(n)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
}
-
-object NGramMetric {
- private lazy val self = apply()
-
- def apply(): NGramMetric = new NGramMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
-
- def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
index a543a7e..3bfe604 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
@@ -1,40 +1,27 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
-import com.rockymadden.stringmetric.tokenization.NGramTokenizer
-import scala.math
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/* An implementation of the overlap metric. */
-class OverlapMetric extends StringMetric[Int, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
- if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+final case class OverlapMetric(private val n: Int) extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.MatchTuple
+ import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+ import scala.math
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] = {
+ if (n <= 0) return None
- if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (fca1.sameElements(fca2)) Some(1d)
- else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
- NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ if (a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (a.sameElements(b)) Some(1d)
+ else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
+ NGramTokenizer(n).tokenize(b).map { ca2bg =>
val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
- ms.toDouble / (math.min(ca1bg.length, ca2bg.length))
+ ms.toDouble / math.min(ca1bg.length, ca2bg.length)
}
}
}
- final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)(n: Int)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
}
-
-object OverlapMetric {
- private lazy val self = apply()
-
- def apply(): OverlapMetric = new OverlapMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
-
- def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
index 1017b1f..197e59a 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
@@ -1,24 +1,18 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of the Ratcliff/Obershelp metric. */
-class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit di: DummyImplicit): Option[Double] = {
+case object RatcliffObershelpMetric extends StringMetricLike[Double] {
+ import com.rockymadden.stringmetric.CompareTuple
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (a.length == 0 || b.length == 0) None
+ else if (a.sameElements(b)) Some(1d)
+ else Some(2d * commonSequences(a, b).foldLeft(0)(_ + _.length) / (a.length + b.length))
- if (fca1.length == 0 || fca2.length == 0) None
- else if (fca1.sameElements(fca2)) Some(1d)
- else Some(2d * commonSequences(fca1, fca2).foldLeft(0)(_ + _.length) / (fca1.length + fca2.length))
- }
-
- final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
- compare(string1.toCharArray, string2.toCharArray)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def longestCommonSubsequence(ct: CompareTuple[Char]) = {
+ private def longestCommonSubsequence(ct: CompareTuple[Char]) = {
val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1)
var lrc = (0, 0, 0) // Length, row, column.
@@ -33,7 +27,7 @@ class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this
lrc
}
- private[this] def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = {
+ private def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = {
val lcs = longestCommonSubsequence(ct)
if (lcs._1 == 0) Array.empty
@@ -45,13 +39,3 @@ class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this
}
}
}
-
-object RatcliffObershelpMetric {
- private lazy val self = apply()
-
- def apply(): RatcliffObershelpMetric = new RatcliffObershelpMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
-
- def compare(string1: String, string2: String) = self.compare(string1, string2)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
index 976b01a..2564eb7 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
@@ -1,32 +1,18 @@
package com.rockymadden.stringmetric.similarity
-import com.rockymadden.stringmetric.{CompareTuple, StringMetric, StringFilter}
-import scala.math.BigDecimal
+import com.rockymadden.stringmetric.Metric.StringMetricLike
-/** An implementation of a weighted Levenshtein metric. */
-class WeightedLevenshteinMetric
- extends StringMetric[(BigDecimal, BigDecimal, BigDecimal), Double] { this: StringFilter =>
+final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal)
+ extends StringMetricLike[Double] {
- /** Options order is delete, insert, then substitute weight. */
- final override def compare(charArray1: Array[Char], charArray2: Array[Char])
- (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = {
+ import com.rockymadden.stringmetric.CompareTuple
- if (options._1 < 0 || options._2 < 0 || options._3 < 0)
- throw new IllegalArgumentException("Expected valid weight options.")
+ override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
+ if (a.length == 0 || b.length == 0) None
+ else if (a.sameElements(b)) Some(0d)
+ else Some(weightedLevenshtein((a, b), (delete, insert, substitute)).toDouble)
- val fca1 = filter(charArray1)
- lazy val fca2 = filter(charArray2)
-
- if (fca1.length == 0 || fca2.length == 0) None
- else if (fca1.sameElements(fca2)) Some(0d)
- else Some(weightedLevenshtein((fca1, fca2), options).toDouble)
- }
-
- /** Options order is delete, insert, then substitute weight. */
- final override def compare(string1: String, string2: String)
- (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] =
-
- compare(string1.toCharArray, string2.toCharArray)(options)
+ override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
private[this] def weightedLevenshtein(ct: CompareTuple[Char], w: (BigDecimal, BigDecimal, BigDecimal)) = {
val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1)
@@ -47,15 +33,3 @@ class WeightedLevenshteinMetric
m(ct._1.length)(ct._2.length)
}
}
-
-object WeightedLevenshteinMetric {
- private lazy val self = apply()
-
- def apply(): WeightedLevenshteinMetric = new WeightedLevenshteinMetric with StringFilter
-
- def compare(charArray1: Array[Char], charArray2: Array[Char])(options: (BigDecimal, BigDecimal, BigDecimal)) =
- self.compare(charArray1, charArray2)(options)
-
- def compare(string1: String, string2: String)(options: (BigDecimal, BigDecimal, BigDecimal)) =
- self.compare(string1, string2)(options)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala
index d858a59..209288f 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala
@@ -1,20 +1,16 @@
package com.rockymadden.stringmetric.tokenization
-import com.rockymadden.stringmetric.{StringFilter, StringTokenizer}
+import com.rockymadden.stringmetric.Tokenizer.StringTokenizerLike
-/** An implementation of the N-Gram tokenizer. */
-class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringFilter =>
- final override def tokenize(charArray: Array[Char])(implicit n: Int): Option[Array[Array[Char]]] = {
- if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+final case class NGramTokenizer(private val n: Int) extends StringTokenizerLike {
+ override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = {
+ if (n <= 0) return None
- val fca = filter(charArray)
-
- if (fca.length < n) None
- else Some(sequence(fca, Array.empty[Array[Char]], n))
+ if (a.length < n) None
+ else Some(sequence(a, Array.empty[Array[Char]], n))
}
- final override def tokenize(string: String)(implicit n: Int): Option[Array[String]] =
- tokenize(string.toCharArray)(n).map(_.map(_.mkString))
+ override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
@annotation.tailrec
private[this] def sequence(i: Array[Char], o: Array[Array[Char]], n: Int): Array[Array[Char]] = {
@@ -24,13 +20,3 @@ class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringF
else sequence(i.tail, o :+ i.take(n), n)
}
}
-
-object NGramTokenizer {
- private lazy val self = apply()
-
- def apply(): NGramTokenizer = new NGramTokenizer with StringFilter
-
- def tokenize(charArray: Array[Char])(n: Int) = self.tokenize(charArray)(n)
-
- def tokenize(string: String)(n: Int) = self.tokenize(string)(n)
-}