summaryrefslogtreecommitdiff
path: root/core/source/main
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2014-01-02 13:47:43 -0700
committerRocky Madden <git@rockymadden.com>2014-01-02 13:47:43 -0700
commit49de854bb464f1be37fbb27f942b9b65e52df751 (patch)
tree6c9a27ac1264648f67eba9c8707fa87d3dc5b3cd /core/source/main
parent42b990a1523a68717afcbdbc2cc4968c041451ec (diff)
downloadstringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.gz
stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.tar.bz2
stringmetric-49de854bb464f1be37fbb27f942b9b65e52df751.zip
Moved from gradle to sbt.
Diffstat (limited to 'core/source/main')
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala46
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala42
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Metric.scala77
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala33
-rw-r--r--core/source/main/scala/com/rockymadden/stringmetric/Transform.scala71
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/package.scala17
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala105
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala15
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala115
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala24
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala121
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala24
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala59
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala16
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala57
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala16
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala27
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala18
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala20
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala66
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala23
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala40
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala24
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala24
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala43
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala36
26 files changed, 0 insertions, 1159 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala
deleted file mode 100755
index 84f136d..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-package com.rockymadden.stringmetric
-
-object Algorithm {
- import Transform.StringTransform
-
-
- trait Algorithm[A] {
- def compute(a: A): Option[A]
- }
-
-
- trait StringAlgorithm extends Algorithm[Array[Char]] {
- def compute(a: String): Option[String]
- }
-
-
- object StringAlgorithm {
- final val Metaphone = phonetic.MetaphoneAlgorithm
- final val Nysiis = phonetic.NysiisAlgorithm
- final val RefinedNysiis = phonetic.RefinedNysiisAlgorithm
- final val RefinedSoundex = phonetic.RefinedSoundexAlgorithm
- final val Soundex = phonetic.SoundexAlgorithm
-
- def computeWithMetaphone(a: Array[Char]) = Metaphone.compute(a)
-
- def computeWithNysiis(a: Array[Char]) = Nysiis.compute(a)
-
- def computeWithRefinedNysiis(a: Array[Char]) = RefinedNysiis.compute(a)
-
- def computeWithRefinedSoundex(a: Array[Char]) = RefinedSoundex.compute(a)
-
- def computeWithSoundex(a: Array[Char]) = Soundex.compute(a)
- }
-
-
- final class StringAlgorithmDecorator(val sa: StringAlgorithm) {
- val withTransform: (StringTransform => StringAlgorithm) = (st) => new StringAlgorithm {
- private[this] val self: StringAlgorithm = sa
- private[this] val transform: StringTransform = st
-
- override def compute(a: Array[Char]): Option[Array[Char]] = self.compute(transform(a))
-
- override def compute(a: String): Option[String] = self.compute(transform(a.toCharArray)).map(_.mkString)
- }
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala
deleted file mode 100755
index 5e666d2..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-package com.rockymadden.stringmetric
-
-import scala.collection.immutable.Set
-
-object Alphabet {
- sealed abstract class AlphabetSet(val chars: Set[Char]) {
- def isSuperset(a: Char): Boolean = chars.contains(a)
-
- def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length
-
- def isSuperset(a: String): Boolean = isSuperset(a.toCharArray)
- }
-
-
- case object LowercaseConsonant extends AlphabetSet(
- Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z')
- )
-
- case object UppercaseConsonant extends AlphabetSet(
- Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z')
- )
-
- case object Consonant extends AlphabetSet(LowercaseConsonant.chars ++ UppercaseConsonant.chars)
-
- case object LowercaseVowel extends AlphabetSet(Set('a', 'e', 'i', 'o', 'u'))
-
- case object UppercaseVowel extends AlphabetSet(Set('A', 'E', 'I', 'O', 'U'))
-
- case object Vowel extends AlphabetSet(LowercaseVowel.chars ++ UppercaseVowel.chars)
-
- case object LowercaseY extends AlphabetSet(Set('y'))
-
- case object UppercaseY extends AlphabetSet(Set('Y'))
-
- case object Y extends AlphabetSet(LowercaseY.chars ++ UppercaseY.chars)
-
- case object LowercaseAlpha extends AlphabetSet(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars)
-
- case object UppercaseAlpha extends AlphabetSet(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars)
-
- case object Alpha extends AlphabetSet(LowercaseAlpha.chars ++ UppercaseAlpha.chars)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala
deleted file mode 100755
index f45dd14..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-package com.rockymadden.stringmetric
-
-object Metric {
- import Transform.StringTransform
-
-
- trait Metric[A, B] {
- def compare(a: A, b: A): Option[B]
- }
-
-
- trait StringMetric[A] extends Metric[Array[Char], A] {
- def compare(a: String, b: String): Option[A]
- }
-
-
- object StringMetric {
- final val DiceSorensen = similarity.DiceSorensenMetric
- final val Hamming = similarity.HammingMetric
- final val Jaccard = similarity.JaccardMetric
- final val Jaro = similarity.JaroMetric
- final val JaroWinkler = similarity.JaroWinklerMetric
- final val Levenshtein = similarity.LevenshteinMetric
- final val Metaphone = phonetic.MetaphoneMetric
- final val NGram = similarity.NGramMetric
- final val Nysiis = phonetic.NysiisMetric
- final val Overlap = similarity.OverlapMetric
- final val RefinedNysiis = phonetic.RefinedNysiisMetric
- final val RefinedSoundex = phonetic.RefinedSoundexMetric
- final val Soundex = phonetic.SoundexMetric
- final val WeightedLevenshtein = similarity.WeightedLevenshteinMetric
-
- def compareWithDiceSorensen(n: Int)(a: Array[Char], b: Array[Char]) = DiceSorensen(n).compare(a, b)
-
- def compareWithHamming(a: Array[Char], b: Array[Char]) = Hamming.compare(a, b)
-
- def compareWithJaccard(n: Int)(a: Array[Char], b: Array[Char]) = Jaccard(n).compare(a, b)
-
- def compareWithJaro(a: Array[Char], b: Array[Char]) = Jaro.compare(a, b)
-
- def compareWithJaroWinkler(a: Array[Char], b: Array[Char]) = JaroWinkler.compare(a, b)
-
- def compareWithLevenshtein(a: Array[Char], b: Array[Char]) = Levenshtein.compare(a, b)
-
- def compareWithMetaphone(a: Array[Char], b: Array[Char]) = Metaphone.compare(a, b)
-
- def compareWithNGram(n: Int)(a: Array[Char], b: Array[Char]) = NGram(n).compare(a, b)
-
- def compareWithNysiis(a: Array[Char], b: Array[Char]) = Nysiis.compare(a, b)
-
- def compareWithOverlap(n: Int)(a: Array[Char], b: Array[Char]) = Overlap(n).compare(a, b)
-
- def compareWithRefinedNysiis(a: Array[Char], b: Array[Char]) = RefinedNysiis.compare(a, b)
-
- def compareWithRefinedSoundex(a: Array[Char], b: Array[Char]) = RefinedSoundex.compare(a, b)
-
- def compareWithSoundex(a: Array[Char], b: Array[Char]) = Soundex.compare(a, b)
-
- def compareWithWeightedLevenshtein(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal)
- (a: Array[Char], b: Array[Char]) =
-
- WeightedLevenshtein(delete, insert, substitute).compare(a, b)
- }
-
- final class StringMetricDecorator[A](val sm: StringMetric[A]) {
- val withTransform: (StringTransform => StringMetric[A]) = (st) => new StringMetric[A] {
- private[this] val self: StringMetric[A] = sm
- private[this] val transform: StringTransform = st
-
- override def compare(a: Array[Char], b: Array[Char]): Option[A] =
- self.compare(transform(a), transform(b))
-
- override def compare(a: String, b: String): Option[A] =
- self.compare(transform(a.toCharArray), transform(b.toCharArray))
- }
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala b/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala
deleted file mode 100755
index a011c96..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.rockymadden.stringmetric
-
-object Tokenize {
- sealed trait Tokenizer[A] {
- def tokenize(a: A): Option[Array[A]]
- }
-
-
- sealed trait StringTokenizer extends Tokenizer[Array[Char]] {
- def tokenize(a: String): Option[Array[String]]
- }
-
-
- object StringTokenizer {
- val NGram = NGramTokenizer
-
- def tokenizeWithNGram(n: Int)(charArray: Array[Char]) = NGram(n).tokenize(charArray)
- }
-
-
- final case class NGramTokenizer(n: Int) extends StringTokenizer {
- override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] =
- if (n <= 0 || a.length < n) None
- else Some(sequence(a, Array.empty[Array[Char]], n))
-
- override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
-
- @annotation.tailrec
- private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) =>
- if (i.length <= n) o :+ i
- else sequence(i.tail, o :+ i.take(n), n)
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala b/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala
deleted file mode 100644
index c2cdace..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-package com.rockymadden.stringmetric
-
-object Transform {
- import scala.collection.immutable.NumericRange
-
-
- type Transform[A] = (A => A)
- type StringTransform = Transform[Array[Char]]
-
-
- object StringTransform {
- private final val Ascii = NumericRange(0x00, 0x7F, 1)
- private final val ExtendedAscii = NumericRange(0x00, 0x7F, 1)
- private final val Latin = NumericRange(0x00, 0x24F, 1)
- private final val LowerCase = NumericRange(0x61, 0x7A, 1)
- private final val Numbers = NumericRange(0x30, 0x39, 1)
- private final val UpperCase = NumericRange(0x41, 0x5A, 1)
-
- private final val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) =>
- ca.filter(c => f(c)).mkString
-
- private final val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) =>
- ca.filterNot(c => f(c)).mkString
-
- val filterAlpha: StringTransform = (ca) => filter(ca, c => {
- val ci = c.toInt
- LowerCase.contains(ci) || UpperCase.contains(ci)
- })
-
- val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => {
- val ci = c.toInt
- LowerCase.contains(ci) || UpperCase.contains(ci)
- })
-
- val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => {
- val ci = c.toInt
- LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci)
- })
-
- val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => {
- val ci = c.toInt
- LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci)
- })
-
- val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt))
-
- val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt))
-
- val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt))
-
- val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt))
-
- val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt))
-
- val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt))
-
- val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt))
-
- val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt))
-
- val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt))
-
- val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt))
-
- val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt))
-
- val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt))
-
- val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c)
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/package.scala b/core/source/main/scala/com/rockymadden/stringmetric/package.scala
deleted file mode 100755
index e5bc19d..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/package.scala
+++ /dev/null
@@ -1,17 +0,0 @@
-package com.rockymadden
-
-package object stringmetric {
- import scala.language.implicitConversions
- import Algorithm._
- import Metric._
-
- type CompareTuple[T] = (Array[T], Array[T])
- type MatchTuple[T] = (Array[T], Array[T])
-
- implicit def stringToCharArray(s: String): Array[Char] =
- s.toCharArray
- implicit def stringAlgorithmToDecoratedStringAlgorithm(sa: StringAlgorithm): StringAlgorithmDecorator =
- new StringAlgorithmDecorator(sa)
- implicit def stringMetricToDecoratedStringMetric[A](sa: StringMetric[A]): StringMetricDecorator[A] =
- new StringMetricDecorator[A](sa)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
deleted file mode 100755
index 3abe7cc..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ /dev/null
@@ -1,105 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Algorithm.StringAlgorithm
-
-case object MetaphoneAlgorithm extends StringAlgorithm {
- import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
-
- override def compute(a: Array[Char]): Option[Array[Char]] =
- if (a.length == 0 || !(Alpha isSuperset a.head)) None
- else {
- val th = (transcodeHead andThen deduplicate)(a.map(_.toLower))
- val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
-
- if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
- }
-
- override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
-
- private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length <= 1) ca
- else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last
-
- @annotation.tailrec
- private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) =>
- if (c == '\0' && r.length == 0) o
- else {
- def shift(d: Int, ca: Array[Char]) = {
- val sca = r.splitAt(d - 1)
-
- (
- if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
- if (sca._2.length > 0) sca._2.head else '\0',
- if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
- ca
- )
- }
-
- val t = {
- (c: @annotation.switch) match {
- case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o)
- case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c)
- case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b')
- case 'c' =>
- if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k')
- else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x')
- else if ((r.length >= 1 && r.head == 'h')
- || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x')
- else if (l.length >= 1 && r.length >= 1 && l.last == 's'
- && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o)
- else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's')
- else shift(1, o :+ 'k')
- case 'd' =>
- if (r.length >= 2 && r.head == 'g'
- && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j')
- else shift(1, o :+ 't')
- case 'g' =>
- if ((r.length > 1 && r.head == 'h')
- || (r.length == 1 && r.head == 'n')
- || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o)
- else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j')
- else shift(1, o :+ 'k')
- case 'h' =>
- if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head)))
- || (l.length >= 2 && l.last == 'h'
- && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p'
- || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o)
- else shift(1, o :+ 'h')
- case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k')
- case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p')
- case 'q' => shift(1, o :+ 'k')
- case 's' =>
- if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x')
- else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x')
- else shift(1, o :+ 's')
- case 't' =>
- if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x')
- else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0')
- else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o)
- else shift(1, o :+ 't')
- case 'v' => shift(1, o :+ 'f')
- case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c)
- case 'x' => shift(1, (o :+ 'k') :+ 's')
- case 'z' => shift(1, o :+ 's')
- case _ => shift(1, o)
- }
- }
-
- transcode(t._1, t._2, t._3, t._4)
- }
-
- private val transcodeHead: (Array[Char] => Array[Char]) = (ca) =>
- (ca.length: @annotation.switch) match {
- case 0 => ca
- case 1 => if (ca.head == 'x') Array('s') else ca
- case _ =>
- (ca.head: @annotation.switch) match {
- case 'a' if ca(1) == 'e' => ca.tail
- case 'g' | 'k' | 'p' if ca(1) == 'n' => ca.tail
- case 'w' if ca(1) == 'r' => ca.tail
- case 'w' if ca(1) == 'h' => 'w' +: ca.drop(2)
- case 'x' => 's' +: ca.tail
- case _ => ca
- }
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
deleted file mode 100755
index d06f774..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object MetaphoneMetric extends StringMetric[Boolean] {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
- if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap { mp1 =>
- MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_))
- }
-
- override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
deleted file mode 100755
index 3e46675..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
+++ /dev/null
@@ -1,115 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Algorithm.StringAlgorithm
-
-case object NysiisAlgorithm extends StringAlgorithm {
- import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
-
- override def compute(a: Array[Char]): Option[Array[Char]] =
- if (a.length == 0 || !(Alpha isSuperset a.head)) None
- else {
- val tr = transcodeRight(a.map(_.toLower))
- val tl = transcodeLeft(tr._1)
- val t =
- if (tl._2.length == 0) tl._1 ++ tr._2
- else tl._1 ++ transcodeCenter(
- Array.empty[Char],
- tl._2.head,
- if (tl._2.length > 1) tl._2.tail else Array.empty[Char],
- Array.empty[Char]
- ) ++ tr._2
-
- if (t.length == 1) Some(t)
- else Some(t.head +: (cleanLast andThen cleanTerminal andThen deduplicate)(t.tail))
- }
-
- override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
-
- private val cleanLast: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length == 0) ca
- else if(ca.last == 'a' || ca.last == 's')
- ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
- else ca
-
- private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
- else ca
-
- private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length <= 1) ca
- else ca.sliding(2).withFilter(a => a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last
-
- @annotation.tailrec
- private val transcodeCenter: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) =>
- if (c == '\0' && r.length == 0) o
- else {
- def shift(d: Int, ca: Array[Char]) = {
- val sca = r.splitAt(d - 1)
-
- (
- if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
- if (sca._2.length > 0) sca._2.head else '\0',
- if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
- ca
- )
- }
-
- val t = {
- (c: @annotation.switch) match {
- case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a')
- case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c)
- case 'e' =>
- if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
- else shift(1, o :+ 'a')
- case 'h' =>
- if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))))
- shift(1, o)
- else shift(1, o :+ c)
- case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
- case 'm' => shift(1, o :+ 'n')
- case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
- case 'q' => shift(1, o :+ 'g')
- case 's' =>
- if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
- else shift(1, o :+ c)
- case 'w' =>
- if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o)
- else shift(1, o :+ c)
- case 'z' => shift(1, o :+ 's')
- case _ => shift(1, o)
- }
- }
-
- transcodeCenter(t._1, t._2, t._3, t._4)
- }
-
- private val transcodeLeft: (Array[Char] => (Array[Char], Array[Char])) = (ca) =>
- if (ca.length == 0) (Array.empty[Char], ca)
- else {
- lazy val tr2 = ca.takeRight(ca.length - 2)
- lazy val tr3 = ca.takeRight(ca.length - 3)
-
- (ca.head: @annotation.switch) match {
- case 'k' if ca.length >= 2 && ca(1) == 'n' => (Array('n', 'n'), tr2)
- case 'k' => (Array('c'), ca.tail)
- case 'm' if ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c') => (Array('m', 'c'), tr3)
- case 'p' if ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f') => (Array('f', 'f'), tr2)
- case 's' if ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h') => (Array('s', 's'), tr3)
- case _ => (Array(ca.head), ca.tail)
- }
- }
-
- private val transcodeRight: (Array[Char] => (Array[Char], Array[Char])) = (ca) =>
- if (ca.length >= 2) {
- val lc = ca(ca.length - 1)
- val lcm1 = ca(ca.length - 2)
- lazy val t2 = ca.take(ca.length - 2)
-
- (lc: @annotation.switch) match {
- case 'd' if lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d'))
- case 'e' if lcm1 == 'e' || lcm1 == 'i' => (t2, Array('y'))
- case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d'))
- case _ => (ca, Array.empty[Char])
- }
- } else (ca, Array.empty[Char])
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
deleted file mode 100755
index c9a0914..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object NysiisMetric extends StringMetric[Boolean] {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = {
- val unequal: ((Char, Char) => Boolean) = (c1, c2) => {
- val lc1 = c1.toLower
- val lc2 = c2.toLower
-
- (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
- }
-
- if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else if (unequal(a.head, b.head)) Some(false)
- else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { ny1 =>
- NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_))
- }
- }
-
- override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
deleted file mode 100755
index 9976847..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
+++ /dev/null
@@ -1,121 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Algorithm.StringAlgorithm
-
-case object RefinedNysiisAlgorithm extends StringAlgorithm {
- import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
-
- override def compute(a: Array[Char]): Option[Array[Char]] =
- if (a.length == 0 || !(Alpha isSuperset a.head)) None
- else {
- val lca = a.map(_.toLower)
- val tlh = (transcodeHead andThen transcodeLast)(lca.head +: cleanLast(lca.tail, Set('s', 'z')))
- val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char])
-
- if (t.length == 1) Some(t)
- else Some(deduplicate(
- t.head +: (cleanLast.tupled andThen cleanTerminal)(t.tail, Set('a'))
- ))
- }
-
- override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
-
- private val cleanLast: ((Array[Char], Set[Char]) => Array[Char]) = (ca, s) =>
- if (ca.length == 0) ca
- else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length)
- else ca
-
- private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
- else ca
-
- private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length <= 1) ca
- else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
-
- @annotation.tailrec
- private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) =>
- if (c == '\0' && r.length == 0) o
- else {
- def shift(d: Int, ca: Array[Char]) = {
- val sca = r.splitAt(d - 1)
-
- (
- if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
- if (sca._2.length > 0) sca._2.head else '\0',
- if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
- ca
- )
- }
-
- val t = {
- (c: @annotation.switch) match {
- case 'a' | 'i' | 'o' | 'u' =>
- if (l.length == 0) shift(1, o :+ c)
- else shift(1, o :+ 'a')
- case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c)
- case 'd' =>
- if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c)
- case 'e' =>
- if (l.length == 0) shift(1, o :+ c)
- else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
- else shift(1, o :+ 'a')
- case 'g' =>
- if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't'))
- else shift(1, o :+ c)
- case 'h' =>
- if (l.length == 0) shift(1, o :+ c)
- else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))
- shift(1, o)
- else shift(1, o :+ c)
- case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
- case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n')
- case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
- case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g')
- case 's' =>
- if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
- else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c)
- else shift(1, o :+ c)
- case 'w' =>
- if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o)
- else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r')
- else shift(1, o :+ c)
- case 'y' =>
- if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a')
- else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c)
- else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a')
- else shift(1, o :+ c)
- case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's')
- case _ => shift(1, o)
- }
- }
-
- transcode(t._1, t._2, t._3, t._4)
- }
-
- private val transcodeHead: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length == 0) ca
- else
- (ca.head: @annotation.switch) match {
- case 'm' if ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c' =>
- Array('m', 'c') ++ ca.takeRight(ca.length - 3)
- case 'p' if ca.length >= 2 && ca(1) == 'f' =>'f' +: ca.takeRight(ca.length - 2)
- case _ => ca
- }
-
- private val transcodeLast: (Array[Char] => Array[Char]) = (ca) =>
- if (ca.length >= 2) {
- val lc = ca(ca.length - 1)
- val lcm1 = ca(ca.length - 2)
- lazy val t2 = ca.take(ca.length - 2)
-
- (lc: @annotation.switch) match {
- case 'd' if lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd'
- case 'e' if lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y' => t2 :+ 'y'
- case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd'
- case 'x' if lcm1 == 'e' => t2 ++ Array('e', 'c')
- case 'x' if lcm1 == 'i' => t2 ++ Array('i', 'c')
- case _ => ca
- }
- } else ca
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
deleted file mode 100755
index 488f261..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object RefinedNysiisMetric extends StringMetric[Boolean] {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = {
- val unequal = (c1: Char, c2: Char) => {
- val lc1 = c1.toLower
- val lc2 = c2.toLower
-
- (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
- }
-
- if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else if (unequal(a.head, b.head)) Some(false)
- else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { rny1 =>
- RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_))
- }
- }
-
- override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
deleted file mode 100755
index e8f3af6..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Algorithm.StringAlgorithm
-
-case object RefinedSoundexAlgorithm extends StringAlgorithm {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compute(a: Array[Char]): Option[Array[Char]] =
- if (a.length == 0 || !(Alpha isSuperset a.head)) None
- else Some(transcode(a, Array(a.head.toLower)))
-
- override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
-
- @annotation.tailrec
- private val transcode: ((Array[Char], Array[Char]) => Array[Char]) = (i, o) =>
- if (i.length == 0) o
- else {
- val c = i.head.toLower
- val m2 = (mc: Char) => (mc: @annotation.switch) match {
- case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0'
- case 'b' | 'p' => '1'
- case 'f' | 'v' => '2'
- case 'c' | 'k' | 's' => '3'
- case 'g' | 'j' => '4'
- case 'q' | 'x' | 'z' => '5'
- case 'd' | 't' => '6'
- case 'l' => '7'
- case 'm' | 'n' => '8'
- case 'r' => '9'
- case _ => '\0'
- }
- val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match {
- case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0'
- case 'b' | 'p' if pc != '1' => '1'
- case 'f' | 'v' if pc != '2' => '2'
- case 'c' | 'k' | 's' if pc != '3' => '3'
- case 'g' | 'j' if pc != '4' => '4'
- case 'q' | 'x' | 'z' if pc != '5' => '5'
- case 'd' | 't' if pc != '6' => '6'
- case 'l' if pc != '7' => '7'
- case 'm' | 'n' if pc != '8' => '8'
- case 'r' if pc != '9' => '9'
- case _ => '\0'
- }
- val a =
- // Code twice.
- if (o.length == 1) m2(c)
- // Code once.
- else m1(
- c,
- (o.last: @annotation.switch) match {
- case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last
- case _ => m2(o.last)
- }
- )
-
- transcode(i.tail, if (a != '\0') o :+ a else o)
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
deleted file mode 100755
index 289fe29..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
+++ /dev/null
@@ -1,16 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object RefinedSoundexMetric extends StringMetric[Boolean] {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
- if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else if (a.head.toLower != b.head.toLower) Some(false)
- else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { rse1 =>
- RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_))
- }
-
- override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
deleted file mode 100755
index b211908..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Algorithm.StringAlgorithm
-
-case object SoundexAlgorithm extends StringAlgorithm {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compute(a: Array[Char]): Option[Array[Char]] =
- if (a.length == 0 || !(Alpha isSuperset a.head)) None
- else {
- val fc = a.head.toLower
-
- Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0'))
- }
-
- override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
-
- @annotation.tailrec
- private val transcode: ((Array[Char], Char, Array[Char]) => Array[Char]) = (i, pc, o) =>
- if (i.length == 0) o
- else {
- val c = i.head.toLower
- val m2 = (mc: Char) => (mc: @annotation.switch) match {
- case 'b' | 'f' | 'p' | 'v' => '1'
- case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2'
- case 'd' | 't' => '3'
- case 'l' => '4'
- case 'm' | 'n' => '5'
- case 'r' => '6'
- case _ => '\0'
- }
- val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match {
- case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1'
- case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2'
- case 'd' | 't' if pc != '3' => '3'
- case 'l' if pc != '4' => '4'
- case 'm' | 'n' if pc != '5' => '5'
- case 'r' if pc != '6' => '6'
- case _ => '\0'
- }
- val a = pc match {
- // Code twice.
- case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
- // Code once.
- case _ => m1(
- c,
- (o.last: @annotation.switch) match {
- case '1' | '2' | '3' | '4' | '5' | '6' => o.last
- case _ => m2(o.last)
- }
- )
- }
-
- if (o.length == 3 && a != '\0') o :+ a
- else transcode(i.tail, c, if (a != '\0') o :+ a else o)
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
deleted file mode 100755
index eca32db..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
+++ /dev/null
@@ -1,16 +0,0 @@
-package com.rockymadden.stringmetric.phonetic
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object SoundexMetric extends StringMetric[Boolean] {
- import com.rockymadden.stringmetric.Alphabet.Alpha
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
- if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else if (a.head.toLower != b.head.toLower) Some(false)
- else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { se1 =>
- SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_))
- }
-
- final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
deleted file mode 100755
index 0ad3915..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-/**
- * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required.
- * Traditionally, the algorithm uses bigrams.
- */
-final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
- import com.rockymadden.stringmetric.MatchTuple
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
- else if (a.sameElements(b)) Some(1d)
- else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
- NGramTokenizer(n).tokenize(b).map { ca2bg =>
- val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
-
- (2d * ms) / (ca1bg.length + ca2bg.length)
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
deleted file mode 100755
index 4a90f32..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
+++ /dev/null
@@ -1,18 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object HammingMetric extends StringMetric[Int] {
- import com.rockymadden.stringmetric.CompareTuple
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Int] =
- if (a.length == 0 || b.length == 0 || a.length != b.length) None
- else if (a.sameElements(b)) Some(0)
- else Some(hamming(a, b))
-
- override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
-
- private val hamming: (CompareTuple[Char] => Int) = (ct) =>
- if (ct._1.length == 0) 0
- else ct._1.zip(ct._2).count(t => t._1 != t._2)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
deleted file mode 100755
index 6ec5db4..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-final case class JaccardMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
- else if (a.sameElements(b)) Some(1d)
- else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
- NGramTokenizer(n).tokenize(b).map { ca2bg =>
- val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length
-
- i.toDouble / (ca1bg.length + ca2bg.length - i)
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
deleted file mode 100755
index 575d67a..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-import scala.Some
-
-/**
- * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched
- * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios.
- */
-case object JaroMetric extends StringMetric[Double] {
- import com.rockymadden.stringmetric.{CompareTuple, MatchTuple}
- import scala.collection.mutable.{ArrayBuffer, HashSet}
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (a.length == 0 || b.length == 0) None
- else if (a.sameElements(b)) Some(1d)
- else {
- val mt = `match`(a, b)
- val ms = scoreMatches(mt._1, mt._2)
-
- if (ms == 0) Some(0d)
- else {
- val ts = scoreTranspositions(mt._1, mt._2)
-
- Some(((ms.toDouble / a.length) + (ms.toDouble / b.length) + ((ms.toDouble - ts) / ms)) / 3)
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private val `match`: (CompareTuple[Char] => MatchTuple[Char]) = (ct) => {
- lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1)
- val one = ArrayBuffer.empty[Int]
- val two = HashSet.empty[Int]
- var i = 0
- var bi = false
-
- while (i < ct._1.length && !bi) {
- val start = if (i - window <= 0) 0 else i - window
- val end = if (i + window >= ct._2.length - 1) ct._2.length - 1 else i + window
-
- if (start > ct._2.length - 1) bi = !bi
- else {
- var ii = start
- var bii = false
-
- while (ii <= end && !bii) {
- if (!two.contains(ii) && ct._1(i) == ct._2(ii)) {
- one += i
- two += ii
- bii = !bii
- } else ii += 1
- }
-
- i += 1
- }
- }
-
- (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_)))
- }
-
- private val scoreMatches: (MatchTuple[Char] => Int) = (mt) => mt._1.length
-
- private val scoreTranspositions: (MatchTuple[Char] => Int) = (mt) =>
- (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
deleted file mode 100755
index e83f73f..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-/**
- * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is
- * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios
- * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722).
- */
-case object JaroWinklerMetric extends StringMetric[Double] {
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- JaroMetric.compare(a, b).map {
- case 0d => 0d
- case 1d => 1d
- case jaro => {
- val prefix = a.zip(b).takeWhile(t => t._1 == t._2)
-
- jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro))
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
deleted file mode 100755
index fb90cdc..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object LevenshteinMetric extends StringMetric[Int] {
- import com.rockymadden.stringmetric.CompareTuple
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Int] =
- if (a.length == 0 || b.length == 0) None
- else if (a.sameElements(b)) Some(0)
- else Some(levenshtein(a, b))
-
- override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
-
- private val levenshtein: (CompareTuple[Char] => Int) = (ct) => {
- val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1)
-
- def distance(t: (Int, Int)): Int = t match {
- case (r, 0) => r
- case (0, c) => c
- case (r, c) if m(r)(c) != -1 => m(r)(c)
- case (r, c) => {
- val min =
- if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1)
- else math.min(
- math.min(
- distance(r - 1, c) + 1, // Delete (left).
- distance(r, c - 1) + 1 // Insert (up).
- ),
- distance(r - 1, c - 1) + 1 // Substitute (left-up).
- )
-
- m(r)(c) = min
- min
- }
- }
-
- distance(ct._1.length, ct._2.length)
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
deleted file mode 100755
index 8c194ce..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-final case class NGramMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.MatchTuple
- import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
- import scala.math
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
- else if (a.sameElements(b)) Some(1d)
- else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
- NGramTokenizer(n).tokenize(b).map { ca2bg =>
- val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString)))
-
- ms.toDouble / math.max(ca1bg.length, ca2bg.length)
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
deleted file mode 100755
index 8f0418b..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-final case class OverlapMetric(n: Int) extends StringMetric[Double] {
- import com.rockymadden.stringmetric.MatchTuple
- import com.rockymadden.stringmetric.Tokenize.NGramTokenizer
- import scala.math
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
- else if (a.sameElements(b)) Some(1d)
- else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
- NGramTokenizer(n).tokenize(b).map { ca2bg =>
- val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
-
- ms.toDouble / math.min(ca1bg.length, ca2bg.length)
- }
- }
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
deleted file mode 100755
index fa113bc..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-case object RatcliffObershelpMetric extends StringMetric[Double] {
- import com.rockymadden.stringmetric.CompareTuple
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (a.length == 0 || b.length == 0) None
- else if (a.sameElements(b)) Some(1d)
- else Some(2d * commonSequences(a, b).foldLeft(0)(_ + _.length) / (a.length + b.length))
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private def longestCommonSubsequence(ct: CompareTuple[Char]) = {
- val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1)
- var lrc = (0, 0, 0) // Length, row, column.
-
- for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) {
- if (ct._1(r) == ct._2(c)) {
- val l = m(r)(c) + 1
- m(r + 1)(c + 1) = l
- if (l > lrc._1) lrc = (l, r + 1, c + 1)
- }
- }
-
- lrc
- }
-
- private val commonSequences: (CompareTuple[Char] => Array[Array[Char]]) = (ct) => {
- val lcs = longestCommonSubsequence(ct)
-
- if (lcs._1 == 0) Array.empty
- else {
- val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2))
- val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3))
-
- Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++
- commonSequences(sct1._1, sct2._1) ++
- commonSequences(sct1._2, sct2._2)
- }
- }
-}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
deleted file mode 100755
index ae6f49c..0000000
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package com.rockymadden.stringmetric.similarity
-
-import com.rockymadden.stringmetric.Metric.StringMetric
-
-final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal)
- extends StringMetric[Double] {
-
- import com.rockymadden.stringmetric.CompareTuple
-
- override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
- if (a.length == 0 || b.length == 0) None
- else if (a.sameElements(b)) Some(0d)
- else Some(weightedLevenshtein((a, b), (delete, insert, substitute)).toDouble)
-
- override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
-
- private val weightedLevenshtein: ((CompareTuple[Char], (BigDecimal, BigDecimal, BigDecimal)) => BigDecimal) =
- (ct, w) => {
- val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1)
-
- for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r
- for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c
-
- for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) {
- m(r)(c) =
- if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1)
- else (m(r - 1)(c) + w._1).min( // Delete (left).
- (m(r)(c - 1) + w._2).min( // Insert (up).
- m(r - 1)(c - 1) + w._3 // Substitute (left-up).
- )
- )
- }
-
- m(ct._1.length)(ct._2.length)
- }
-}