summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala14
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala10
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala27
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala19
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala35
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala19
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala10
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala8
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala12
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala16
18 files changed, 129 insertions, 125 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
index 944025c..685a952 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -9,11 +9,11 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
type ComputeReturn = String
override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length == 0 || !Alphabet.is(ca.head)) None
+ if (fca.length == 0 || !Alphabet.is(fca.head)) None
else {
- val th = deduplicate(transcodeHead(ca.map(_.toLower)))
+ val th = deduplicate(transcodeHead(fca.map(_.toLower)))
val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
@@ -32,12 +32,12 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
if (c == '\0' && r.length == 0) o
else {
val shift = (d: Int, ca: Array[Char]) => {
- val sa = r.splitAt(d - 1)
+ val sca = r.splitAt(d - 1)
(
- if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c,
- if (sa._2.length > 0) sa._2.head else '\0',
- if (sa._2.length > 1) sa._2.tail else Array.empty[Char],
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
ca
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
index efd65b8..3afaee9 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
@@ -10,12 +10,12 @@ object MetaphoneMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
- else MetaphoneAlgorithm.compute(ca1).filter(_.length > 0).flatMap(mp1 =>
- MetaphoneAlgorithm.compute(ca2).filter(_.length > 0).map(mp1.sameElements(_))
+ if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None
+ else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 =>
+ MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_))
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
index 33abe1d..e2debd0 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -9,11 +9,11 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
type ComputeReturn = String
override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length == 0 || !Alphabet.is(ca.head)) None
+ if (fca.length == 0 || !Alphabet.is(fca.head)) None
else {
- val tr = transcodeRight(ca.map(_.toLower))
+ val tr = transcodeRight(fca.map(_.toLower))
val tl = transcodeLeft(tr._1)
val t =
if (tl._2.length == 0) tl._1 ++ tr._2
@@ -46,15 +46,16 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
if (c == '\0' && r.length == 0) o
else {
val shift = (d: Int, ca: Array[Char]) => {
- val sa = r.splitAt(d - 1)
+ val sca = r.splitAt(d - 1)
(
- if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c,
- if (sa._2.length > 0) sa._2.head else '\0',
- if (sa._2.length > 1) sa._2.tail else Array.empty[Char],
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
ca
)
}
+
val t = {
c match {
case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a')
@@ -103,14 +104,14 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
private[this] def transcodeRight(ca: Array[Char]) = {
if (ca.length >= 2) {
- val l = ca(ca.length - 1)
- val lm1 = ca(ca.length - 2)
+ val lc = ca(ca.length - 1)
+ val lcm1 = ca(ca.length - 2)
lazy val t2 = ca.take(ca.length - 2)
- l match {
- case 'd' if (lm1 == 'n' || lm1 == 'r') => (t2, Array('d'))
- case 'e' if (lm1 == 'e' || lm1 == 'i') => (t2, Array('y'))
- case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => (t2, Array('d'))
+ lc match {
+ case 'd' if (lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
+ case 'e' if (lcm1 == 'e' || lcm1 == 'i') => (t2, Array('y'))
+ case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
case _ => (ca, Array.empty[Char])
}
} else (ca, Array.empty[Char])
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
index 279ec82..3e1bd57 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
@@ -10,19 +10,20 @@ object NysiisMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
val unequal = (c1: Char, c2: Char) => {
- val c1l = c1.toLower
- val c2l = c2.toLower
+ val lc1 = c1.toLower
+ val lc2 = c2.toLower
- (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l)
+ (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
}
- if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
- else if (unequal(ca1.head, ca2.head)) Some(false)
- else NysiisAlgorithm.compute(ca1).filter(_.length > 0).flatMap(ny1 =>
- NysiisAlgorithm.compute(ca2).filter(_.length > 0).map(ny1.sameElements(_))
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
+
+ if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None
+ else if (unequal(fca1.head, fca2.head)) Some(false)
+ else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 =>
+ NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_))
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
index d3870b2..b1055e4 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -9,13 +9,13 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor
type ComputeReturn = String
override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length == 0 || !Alphabet.is(ca.head)) None
+ if (fca.length == 0 || !Alphabet.is(fca.head)) None
else {
- val cal = ca.map(_.toLower)
- val thl = transcodeLast(transcodeHead(cal.head +: cleanLast(cal.tail, Set('s', 'z'))))
- val t = transcode(Array.empty[Char], thl.head, thl.tail, Array.empty[Char])
+ val lfca = fca.map(_.toLower)
+ val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z'))))
+ val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char])
if (t.length == 1) Some(t)
else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a')))))
@@ -43,15 +43,16 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor
if (c == '\0' && r.length == 0) o
else {
val shift = (d: Int, ca: Array[Char]) => {
- val sa = r.splitAt(d - 1)
+ val sca = r.splitAt(d - 1)
(
- if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c,
- if (sa._2.length > 0) sa._2.head else '\0',
- if (sa._2.length > 1) sa._2.tail else Array.empty[Char],
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
ca
)
}
+
val t = {
c match {
case 'a' | 'i' | 'o' | 'u' =>
@@ -109,16 +110,16 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor
private[this] def transcodeLast(ca: Array[Char]) = {
if (ca.length >= 2) {
- val l = ca(ca.length - 1)
- val lm1 = ca(ca.length - 2)
+ val lc = ca(ca.length - 1)
+ val lcm1 = ca(ca.length - 2)
lazy val t2 = ca.take(ca.length - 2)
- l match {
- case 'd' if (lm1 == 'n' || lm1 == 'r') => t2 :+ 'd'
- case 'e' if (lm1 == 'e' || lm1 == 'i' || lm1 =='y') => t2 :+ 'y'
- case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => t2 :+ 'd'
- case 'x' if (lm1 == 'e') => t2 ++ Array('e', 'c')
- case 'x' if (lm1 == 'i') => t2 ++ Array('i', 'c')
+ lc match {
+ case 'd' if (lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
+ case 'e' if (lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y') => t2 :+ 'y'
+ case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
+ case 'x' if (lcm1 == 'e') => t2 ++ Array('e', 'c')
+ case 'x' if (lcm1 == 'i') => t2 ++ Array('i', 'c')
case _ => ca
}
} else ca
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
index 8d688ce..9fcdc30 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -10,19 +10,20 @@ object RefinedNysiisMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
val unequal = (c1: Char, c2: Char) => {
- val c1l = c1.toLower
- val c2l = c2.toLower
+ val lc1 = c1.toLower
+ val lc2 = c2.toLower
- (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l)
+ (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
}
- if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
- else if (unequal(ca1.head, ca2.head)) Some(false)
- else RefinedNysiisAlgorithm.compute(ca1).filter(_.length > 0).flatMap(rny1 =>
- RefinedNysiisAlgorithm.compute(ca2).filter(_.length > 0).map(rny1.sameElements(_))
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
+
+ if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None
+ else if (unequal(fca1.head, fca2.head)) Some(false)
+ else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 =>
+ RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_))
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
index 7fe6d70..9b53d20 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
@@ -9,10 +9,10 @@ object RefinedSoundexAlgorithm extends StringAlgorithm with FilterableStringAlgo
type ComputeReturn = String
override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length == 0 || !Alphabet.is(ca.head)) None
- else Some(transcode(ca, Array(ca.head.toLower)))
+ if (fca.length == 0 || !Alphabet.is(fca.head)) None
+ else Some(transcode(fca, Array(fca.head.toLower)))
}
override def compute(string: String)(implicit stringFilter: StringFilter): Option[ComputeReturn] =
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
index 89ebb1d..badf5f5 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -10,13 +10,13 @@ object RefinedSoundexMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
- else if (ca1.head.toLower != ca2.head.toLower) Some(false)
- else RefinedSoundexAlgorithm.compute(ca1).filter(_.length > 0).flatMap(rse1 =>
- RefinedSoundexAlgorithm.compute(ca2).filter(_.length > 0).map(rse1.sameElements(_))
+ if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None
+ else if (fca1.head.toLower != fca2.head.toLower) Some(false)
+ else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 =>
+ RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_))
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
index 9f2ed92..8d261d1 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
@@ -9,13 +9,13 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
type ComputeReturn = String
override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length == 0 || !Alphabet.is(ca.head)) None
+ if (fca.length == 0 || !Alphabet.is(fca.head)) None
else {
- val fc = ca.head.toLower
+ val fc = fca.head.toLower
- Some(transcode(ca.tail, fc, Array(fc)).padTo(4, '0'))
+ Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0'))
}
}
@@ -23,7 +23,7 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate).map(_.mkString)
@tailrec
- private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = {
+ private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = {
if (i.length == 0) o
else {
val c = i.head.toLower
@@ -45,7 +45,7 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
case 'r' if pc != '6' => '6'
case _ => '\0'
}
- val a = p match {
+ val a = pc match {
// Code twice.
case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
// Code once.
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
index d446730..bff3017 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
@@ -10,13 +10,13 @@ object SoundexMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
- else if (ca1.head.toLower != ca2.head.toLower) Some(false)
- else SoundexAlgorithm.compute(ca1).filter(_.length > 0).flatMap(se1 =>
- SoundexAlgorithm.compute(ca2).filter(_.length > 0).map(se1.sameElements(_))
+ if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None
+ else if (fca1.head.toLower != fca2.head.toLower) Some(false)
+ else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 =>
+ SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_))
)
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala
index 513b5ed..98a1275 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala
@@ -15,14 +15,14 @@ object DiceSorensenMetric extends StringMetric with FilterableConfigurableString
if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length < n || ca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (ca1.sameElements(ca2)) Some(1d)
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
else
- NGramAlgorithm.compute(ca1)(n).flatMap { ca1bg =>
- NGramAlgorithm.compute(ca2)(n).map { ca2bg =>
+ NGramAlgorithm.compute(fca1)(n).flatMap { ca1bg =>
+ NGramAlgorithm.compute(fca2)(n).map { ca2bg =>
val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString)))
(2d * ms) / (ca1bg.length + ca2bg.length)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala
index 223ca54..1fd5d57 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala
@@ -10,12 +10,12 @@ object HammingMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None
- else if (ca1.sameElements(ca2)) Some(0)
- else Some(hamming(ca1, ca2))
+ if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None
+ else if (fca1.sameElements(fca2)) Some(0)
+ else Some(hamming(fca1, fca2))
}
override def compare(string1: String, string2: String)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala
index 92a373e..e4c2441 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala
@@ -15,20 +15,20 @@ object JaroMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || ca2.length == 0) None
- else if (ca1.sameElements(ca2)) Some(1d)
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(1d)
else {
- val mt = `match`((ca1, ca2))
+ val mt = `match`((fca1, fca2))
val ms = scoreMatches((mt._1, mt._2))
if (ms == 0) Some(0d)
else {
val ts = scoreTranspositions((mt._1, mt._2))
- Some(((ms.toDouble / ca1.length) + (ms.toDouble / ca2.length) + ((ms.toDouble - ts) / ms)) / 3)
+ Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3)
}
}
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala
index 49ff5cf..c44088c 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala
@@ -14,14 +14,14 @@ object JaroWinklerMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ val fca2 = stringFilter.filter(charArray2)
- JaroMetric.compare(ca1, ca2)(new StringFilterDelegate) match {
+ JaroMetric.compare(fca1, fca2)(new StringFilterDelegate) match {
case Some(0d) => Some(0d)
case Some(1d) => Some(1d)
case Some(jaro) => {
- val prefix = ca1.zip(ca2).takeWhile(t => t._1 == t._2)
+ val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2)
Some(jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro)))
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala
index 71ab895..5c8c8b2 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala
@@ -10,13 +10,13 @@ object LevenshteinMetric extends StringMetric with FilterableStringMetric {
override def compare(charArray1: Array[Char], charArray2: Array[Char])
(implicit stringFilter: StringFilter): Option[CompareReturn] = {
- val ca1 = stringFilter.filter(charArray1)
- val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ val fca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 && ca2.length == 0) None
- else if (ca1.length == 0) Some(ca2.length)
- else if (ca2.length == 0) Some(ca1.length)
- else Some(levenshtein(ca1, ca2))
+ if (fca1.length == 0 && fca2.length == 0) None
+ else if (fca1.length == 0) Some(fca2.length)
+ else if (fca2.length == 0) Some(fca1.length)
+ else Some(levenshtein(fca1, fca2))
}
override def compare(string1: String, string2: String)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala
index f3f6b8a..5e6c022 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala
@@ -13,10 +13,10 @@ object NGramAlgorithm extends StringAlgorithm with FilterableConfigurableStringA
if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
- val ca = stringFilter.filter(charArray)
+ val fca = stringFilter.filter(charArray)
- if (ca.length < n) None
- else Some(sequence(ca, Array.empty[Array[Char]], n))
+ if (fca.length < n) None
+ else Some(sequence(fca, Array.empty[Array[Char]], n))
}
override def compute(string: String)(n: Int)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala
index 586552c..6977e84 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala
@@ -13,14 +13,14 @@ object NGramMetric extends StringMetric with FilterableConfigurableStringMetric[
if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
- val ca1 = stringFilter.filter(charArray1)
- lazy val ca2 = stringFilter.filter(charArray2)
+ val fca1 = stringFilter.filter(charArray1)
+ lazy val fca2 = stringFilter.filter(charArray2)
- if (ca1.length < n || ca2.length < n) None // Because length is less than n, it is not possible to compare.
- else if (ca1.sameElements(ca2)) Some(1d)
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
else
- NGramAlgorithm.compute(ca1)(n).flatMap { ca1bg =>
- NGramAlgorithm.compute(ca2)(n).map { ca2bg =>
+ NGramAlgorithm.compute(fca1)(n).flatMap { ca1bg =>
+ NGramAlgorithm.compute(fca2)(n).map { ca2bg =>
val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString)))
ms.toDouble / math.max(ca1bg.length, ca2bg.length)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala
index ba57b10..2691c03 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala
@@ -18,14 +18,14 @@ object WeightedLevenshteinMetric
if (options._1 < 0 || options._2 < 0 || options._3 < 0)
throw new IllegalArgumentException("Expected valid weight options.")
- val ca1 = stringFilter.filter(charArray1)
- val ca2 = stringFilter.filter(charArray2)
-
- if (ca1.length == 0 && ca2.length == 0) None
- else if (ca1.length == 0) Some((options._2 * ca2.length).toDouble)
- else if (ca2.length == 0) Some((options._1 * ca1.length).toDouble)
- else if (ca1.sameElements(ca2)) Some(0d)
- else Some(weightedLevenshtein((ca1, ca2), options).toDouble)
+ val fca1 = stringFilter.filter(charArray1)
+ val fca2 = stringFilter.filter(charArray2)
+
+ if (fca1.length == 0 && fca2.length == 0) None
+ else if (fca1.length == 0) Some((options._2 * fca2.length).toDouble)
+ else if (fca2.length == 0) Some((options._1 * fca1.length).toDouble)
+ else if (fca1.sameElements(fca2)) Some(0d)
+ else Some(weightedLevenshtein((fca1, fca2), options).toDouble)
}
/** Options order is delete, insert, then substitute weight. */