summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2013-12-28 13:55:50 -0700
committerRocky Madden <git@rockymadden.com>2013-12-28 13:55:50 -0700
commitdad3399c32f066ebbbd18c9271b29a76dc45e3c0 (patch)
treee2bc921549e2783f60e727a29a6876f6853420fc
parenta43a304694dbf2b1a02854861cbae2569668d9f8 (diff)
downloadstringmetric-dad3399c32f066ebbbd18c9271b29a76dc45e3c0.tar.gz
stringmetric-dad3399c32f066ebbbd18c9271b29a76dc45e3c0.tar.bz2
stringmetric-dad3399c32f066ebbbd18c9271b29a76dc45e3c0.zip
Favored functional composition.
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala19
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala4
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala39
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala6
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala37
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala4
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala4
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala4
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala8
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala38
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala2
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala6
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala33
-rwxr-xr-xcore/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala2
19 files changed, 110 insertions, 106 deletions
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
index 126f170..64c16db 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -8,7 +8,7 @@ case object MetaphoneAlgorithm extends StringAlgorithmLike {
override def compute(a: Array[Char]): Option[Array[Char]] =
if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
- val th = deduplicate(transcodeHead(a.map(_.toLower)))
+ val th = (transcodeHead andThen deduplicate)(a.map(_.toLower))
val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
@@ -16,12 +16,12 @@ case object MetaphoneAlgorithm extends StringAlgorithmLike {
override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
- private def deduplicate(ca: Array[Char]) =
+ private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length <= 1) ca
- else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+ else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => {
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -89,19 +89,18 @@ case object MetaphoneAlgorithm extends StringAlgorithmLike {
}
}
- private def transcodeHead(ca: Array[Char]) = {
+ private val transcodeHead: (Array[Char] => Array[Char]) = (ca) =>
(ca.length: @annotation.switch) match {
case 0 => ca
case 1 => if (ca.head == 'x') Array('s') else ca
case _ =>
(ca.head: @annotation.switch) match {
- case 'a' if (ca(1) == 'e') => ca.tail
- case 'g' | 'k' | 'p' if (ca(1) == 'n') => ca.tail
- case 'w' if (ca(1) == 'r') => ca.tail
- case 'w' if (ca(1) == 'h') => 'w' +: ca.drop(2)
+ case 'a' if ca(1) == 'e' => ca.tail
+ case 'g' | 'k' | 'p' if ca(1) == 'n' => ca.tail
+ case 'w' if ca(1) == 'r' => ca.tail
+ case 'w' if ca(1) == 'h' => 'w' +: ca.drop(2)
case 'x' => 's' +: ca.tail
case _ => ca
}
}
- }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
index 083016c..7d9cd1d 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
@@ -7,9 +7,9 @@ case object MetaphoneMetric extends StringMetricLike[Boolean] {
override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
- else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap(mp1 =>
+ else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap { mp1 =>
MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_))
- )
+ }
override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
index 43c2bc2..c9245e0 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -20,26 +20,27 @@ case object NysiisAlgorithm extends StringAlgorithmLike {
) ++ tr._2
if (t.length == 1) Some(t)
- else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail))))
+ else Some(t.head +: (cleanLast andThen cleanTerminal andThen deduplicate)(t.tail))
}
override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
- private def cleanLast(ca: Array[Char]) =
+ private val cleanLast: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length == 0) ca
- else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
+ else if(ca.last == 'a' || ca.last == 's')
+ ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
else ca
- private def cleanTerminal(ca: Array[Char]) =
+ private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
else ca
- private def deduplicate(ca: Array[Char]) =
+ private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length <= 1) ca
- else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+ else ca.sliding(2).withFilter(a => a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private val transcodeCenter: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) =>
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -61,7 +62,8 @@ case object NysiisAlgorithm extends StringAlgorithmLike {
if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
else shift(1, o :+ 'a')
case 'h' =>
- if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))) shift(1, o)
+ if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))))
+ shift(1, o)
else shift(1, o :+ c)
case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
case 'm' => shift(1, o :+ 'n')
@@ -80,37 +82,34 @@ case object NysiisAlgorithm extends StringAlgorithmLike {
transcodeCenter(t._1, t._2, t._3, t._4)
}
- }
- private def transcodeLeft(ca: Array[Char]) = {
+ private val transcodeLeft: (Array[Char] => (Array[Char], Array[Char])) = (ca) =>
if (ca.length == 0) (Array.empty[Char], ca)
else {
lazy val tr2 = ca.takeRight(ca.length - 2)
lazy val tr3 = ca.takeRight(ca.length - 3)
(ca.head: @annotation.switch) match {
- case 'k' if (ca.length >= 2 && ca(1) == 'n') => (Array('n', 'n'), tr2)
+ case 'k' if ca.length >= 2 && ca(1) == 'n' => (Array('n', 'n'), tr2)
case 'k' => (Array('c'), ca.tail)
- case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => (Array('m', 'c'), tr3)
- case 'p' if (ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f')) => (Array('f', 'f'), tr2)
- case 's' if (ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h')) => (Array('s', 's'), tr3)
+ case 'm' if ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c') => (Array('m', 'c'), tr3)
+ case 'p' if ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f') => (Array('f', 'f'), tr2)
+ case 's' if ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h') => (Array('s', 's'), tr3)
case _ => (Array(ca.head), ca.tail)
}
}
- }
- private def transcodeRight(ca: Array[Char]) = {
+ private val transcodeRight: (Array[Char] => (Array[Char], Array[Char])) = (ca) =>
if (ca.length >= 2) {
val lc = ca(ca.length - 1)
val lcm1 = ca(ca.length - 2)
lazy val t2 = ca.take(ca.length - 2)
(lc: @annotation.switch) match {
- case 'd' if (lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
- case 'e' if (lcm1 == 'e' || lcm1 == 'i') => (t2, Array('y'))
- case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
+ case 'd' if lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d'))
+ case 'e' if lcm1 == 'e' || lcm1 == 'i' => (t2, Array('y'))
+ case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d'))
case _ => (ca, Array.empty[Char])
}
} else (ca, Array.empty[Char])
- }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
index 6316981..84c2073 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
@@ -6,7 +6,7 @@ case object NysiisMetric extends StringMetricLike[Boolean] {
import com.rockymadden.stringmetric.Alphabet.Alpha
override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = {
- val unequal = (c1: Char, c2: Char) => {
+ val unequal: ((Char, Char) => Boolean) = (c1, c2) => {
val lc1 = c1.toLower
val lc2 = c2.toLower
@@ -15,9 +15,9 @@ case object NysiisMetric extends StringMetricLike[Boolean] {
if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
else if (unequal(a.head, b.head)) Some(false)
- else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(ny1 =>
+ else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { ny1 =>
NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_))
- )
+ }
}
override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
index 72bd84e..c35175d 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -9,30 +9,32 @@ case object RefinedNysiisAlgorithm extends StringAlgorithmLike {
if (a.length == 0 || !(Alpha isSuperset a.head)) None
else {
val lca = a.map(_.toLower)
- val tlh = transcodeLast(transcodeHead(lca.head +: cleanLast(lca.tail, Set('s', 'z'))))
+ val tlh = (transcodeHead andThen transcodeLast)(lca.head +: cleanLast(lca.tail, Set('s', 'z')))
val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char])
if (t.length == 1) Some(t)
- else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a')))))
+ else Some(deduplicate(
+ t.head +: (cleanLast.tupled andThen cleanTerminal)(t.tail, Set('a'))
+ ))
}
override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
- private def cleanLast(ca: Array[Char], s: Set[Char]) =
+ private val cleanLast: ((Array[Char], Set[Char]) => Array[Char]) = (ca, s) =>
if (ca.length == 0) ca
else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length)
else ca
- private def cleanTerminal(ca: Array[Char]) =
+ private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
else ca
- private def deduplicate(ca: Array[Char]) =
+ private val deduplicate: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length <= 1) ca
else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
@annotation.tailrec
- private def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) =>
if (c == '\0' && r.length == 0) o
else {
def shift(d: Int, ca: Array[Char]) = {
@@ -63,7 +65,8 @@ case object RefinedNysiisAlgorithm extends StringAlgorithmLike {
else shift(1, o :+ c)
case 'h' =>
if (l.length == 0) shift(1, o :+ c)
- else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))) shift(1, o)
+ else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))
+ shift(1, o)
else shift(1, o :+ c)
case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n')
@@ -89,29 +92,29 @@ case object RefinedNysiisAlgorithm extends StringAlgorithmLike {
transcode(t._1, t._2, t._3, t._4)
}
- }
- private def transcodeHead(ca: Array[Char]) =
+ private val transcodeHead: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length == 0) ca
else
(ca.head: @annotation.switch) match {
- case 'm' if (ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c') => Array('m', 'c') ++ ca.takeRight(ca.length - 3)
- case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2)
+ case 'm' if ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c' =>
+ Array('m', 'c') ++ ca.takeRight(ca.length - 3)
+ case 'p' if ca.length >= 2 && ca(1) == 'f' =>'f' +: ca.takeRight(ca.length - 2)
case _ => ca
}
- private def transcodeLast(ca: Array[Char]) =
+ private val transcodeLast: (Array[Char] => Array[Char]) = (ca) =>
if (ca.length >= 2) {
val lc = ca(ca.length - 1)
val lcm1 = ca(ca.length - 2)
lazy val t2 = ca.take(ca.length - 2)
(lc: @annotation.switch) match {
- case 'd' if (lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
- case 'e' if (lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y') => t2 :+ 'y'
- case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
- case 'x' if (lcm1 == 'e') => t2 ++ Array('e', 'c')
- case 'x' if (lcm1 == 'i') => t2 ++ Array('i', 'c')
+ case 'd' if lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd'
+ case 'e' if lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y' => t2 :+ 'y'
+ case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd'
+ case 'x' if lcm1 == 'e' => t2 ++ Array('e', 'c')
+ case 'x' if lcm1 == 'i' => t2 ++ Array('i', 'c')
case _ => ca
}
} else ca
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
index 73795a0..86111e4 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -15,9 +15,9 @@ case object RefinedNysiisMetric extends StringMetricLike[Boolean] {
if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
else if (unequal(a.head, b.head)) Some(false)
- else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap(rny1 =>
+ else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { rny1 =>
RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_))
- )
+ }
}
override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
index 9f7fce9..7fb6dfc 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
@@ -12,7 +12,7 @@ case object RefinedSoundexAlgorithm extends StringAlgorithmLike {
override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString)
@annotation.tailrec
- private def transcode(i: Array[Char], o: Array[Char]): Array[Char] =
+ private val transcode: ((Array[Char], Array[Char]) => Array[Char]) = (i, o) =>
if (i.length == 0) o
else {
val c = i.head.toLower
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
index 5ad0e30..a7d3984 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -8,9 +8,9 @@ case object RefinedSoundexMetric extends StringMetricLike[Boolean] {
override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
else if (a.head.toLower != b.head.toLower) Some(false)
- else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(rse1 =>
+ else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { rse1 =>
RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_))
- )
+ }
override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
index d615144..98898e9 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
@@ -16,7 +16,7 @@ case object SoundexAlgorithm extends StringAlgorithmLike {
override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString)
@annotation.tailrec
- private def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] =
+ private val transcode: ((Array[Char], Char, Array[Char]) => Array[Char]) = (i, pc, o) =>
if (i.length == 0) o
else {
val c = i.head.toLower
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
index 7e0bf5c..29e2606 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
@@ -8,9 +8,9 @@ case object SoundexMetric extends StringMetricLike[Boolean] {
override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] =
if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None
else if (a.head.toLower != b.head.toLower) Some(false)
- else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap(se1 =>
+ else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { se1 =>
SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_))
- )
+ }
final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
index 21c9f16..2c16bba 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
@@ -26,5 +26,5 @@ final case class DiceSorensenMetric(private val n: Int) extends StringMetricLike
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+ private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
index 09b62bc..f1527d2 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
@@ -12,7 +12,7 @@ case object HammingMetric extends StringMetricLike[Int] {
override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
- private def hamming(ct: CompareTuple[Char]) =
+ private val hamming: (CompareTuple[Char] => Int) = (ct) =>
if (ct._1.length == 0) 0
else ct._1.zip(ct._2).count(t => t._1 != t._2)
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
index b9d1434..c58b864 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
@@ -1,6 +1,7 @@
package com.rockymadden.stringmetric.similarity
import com.rockymadden.stringmetric.Metric.StringMetricLike
+import scala.Some
/**
* An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched
@@ -27,7 +28,7 @@ case object JaroMetric extends StringMetricLike[Double] {
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = {
+ private val `match`: (CompareTuple[Char] => MatchTuple[Char]) = (ct) => {
lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1)
val one = ArrayBuffer.empty[Int]
val two = HashSet.empty[Int]
@@ -58,7 +59,8 @@ case object JaroMetric extends StringMetricLike[Double] {
(one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_)))
}
- private def scoreMatches(mt: MatchTuple[Char]) = mt._1.length
+ private val scoreMatches: (MatchTuple[Char] => Int) = (mt) => mt._1.length
- private def scoreTranspositions(mt: MatchTuple[Char]) = (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
+ private val scoreTranspositions: (MatchTuple[Char] => Int) = (mt) =>
+ (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
index 9f78aed..e145e1f 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
@@ -12,28 +12,26 @@ case object LevenshteinMetric extends StringMetricLike[Int] {
override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray)
- private def levenshtein(ct: CompareTuple[Char]) = {
+ private val levenshtein: (CompareTuple[Char] => Int) = (ct) => {
val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1)
- def distance(t: (Int, Int)): Int = {
- t match {
- case (r, 0) => r
- case (0, c) => c
- case (r, c) if m(r)(c) != -1 => m(r)(c)
- case (r, c) => {
- val min =
- if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1)
- else math.min(
- math.min(
- distance(r - 1, c) + 1, // Delete (left).
- distance(r, c - 1) + 1 // Insert (up).
- ),
- distance(r - 1, c - 1) + 1 // Substitute (left-up).
- )
-
- m(r)(c) = min
- min
- }
+ def distance(t: (Int, Int)): Int = t match {
+ case (r, 0) => r
+ case (0, c) => c
+ case (r, c) if m(r)(c) != -1 => m(r)(c)
+ case (r, c) => {
+ val min =
+ if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1)
+ else math.min(
+ math.min(
+ distance(r - 1, c) + 1, // Delete (left).
+ distance(r, c - 1) + 1 // Insert (up).
+ ),
+ distance(r - 1, c - 1) + 1 // Substitute (left-up).
+ )
+
+ m(r)(c) = min
+ min
}
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
index f6397f1..2b2fc01 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
@@ -23,5 +23,5 @@ final case class NGramMetric(private val n: Int) extends StringMetricLike[Double
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+ private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
index f4c66b7..b6b3bb9 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
@@ -23,5 +23,5 @@ final case class OverlapMetric(private val n: Int) extends StringMetricLike[Doub
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+ private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
index 197e59a..1562f79 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
@@ -27,7 +27,7 @@ case object RatcliffObershelpMetric extends StringMetricLike[Double] {
lrc
}
- private def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = {
+ private val commonSequences: (CompareTuple[Char] => Array[Array[Char]]) = (ct) => {
val lcs = longestCommonSubsequence(ct)
if (lcs._1 == 0) Array.empty
@@ -35,7 +35,9 @@ case object RatcliffObershelpMetric extends StringMetricLike[Double] {
val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2))
val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3))
- Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ commonSequences(sct1._1, sct2._1) ++ commonSequences(sct1._2, sct2._2)
+ Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++
+ commonSequences(sct1._1, sct2._1) ++
+ commonSequences(sct1._2, sct2._2)
}
}
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
index 2564eb7..74fb320 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
@@ -14,22 +14,23 @@ final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecima
override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
- private[this] def weightedLevenshtein(ct: CompareTuple[Char], w: (BigDecimal, BigDecimal, BigDecimal)) = {
- val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1)
-
- for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r
- for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c
-
- for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) {
- m(r)(c) =
- if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1)
- else (m(r - 1)(c) + w._1).min( // Delete (left).
- (m(r)(c - 1) + w._2).min( // Insert (up).
- m(r - 1)(c - 1) + w._3 // Substitute (left-up).
+ private val weightedLevenshtein: ((CompareTuple[Char], (BigDecimal, BigDecimal, BigDecimal)) => BigDecimal) =
+ (ct, w) => {
+ val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1)
+
+ for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r
+ for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c
+
+ for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) {
+ m(r)(c) =
+ if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1)
+ else (m(r - 1)(c) + w._1).min( // Delete (left).
+ (m(r)(c - 1) + w._2).min( // Insert (up).
+ m(r - 1)(c - 1) + w._3 // Substitute (left-up).
+ )
)
- )
- }
+ }
- m(ct._1.length)(ct._2.length)
- }
+ m(ct._1.length)(ct._2.length)
+ }
}
diff --git a/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala b/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala
index 09d2a42..e2a2731 100755
--- a/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala
+++ b/core/source/main/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizer.scala
@@ -13,7 +13,7 @@ final case class NGramTokenizer(private val n: Int) extends StringTokenizerLike
override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
@annotation.tailrec
- private[this] def sequence(i: Array[Char], o: Array[Array[Char]], n: Int): Array[Array[Char]] =
+ private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) =>
if (i.length <= n) o :+ i
else sequence(i.tail, o :+ i.take(n), n)
}