diff options
18 files changed, 129 insertions, 125 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala index 944025c..685a952 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -9,11 +9,11 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm type ComputeReturn = String override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length == 0 || !Alphabet.is(ca.head)) None + if (fca.length == 0 || !Alphabet.is(fca.head)) None else { - val th = deduplicate(transcodeHead(ca.map(_.toLower))) + val th = deduplicate(transcodeHead(fca.map(_.toLower))) val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. @@ -32,12 +32,12 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm if (c == '\0' && r.length == 0) o else { val shift = (d: Int, ca: Array[Char]) => { - val sa = r.splitAt(d - 1) + val sca = r.splitAt(d - 1) ( - if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, - if (sa._2.length > 0) sa._2.head else '\0', - if (sa._2.length > 1) sa._2.tail else Array.empty[Char], + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], ca ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala index efd65b8..3afaee9 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala @@ -10,12 +10,12 @@ object MetaphoneMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None - else MetaphoneAlgorithm.compute(ca1).filter(_.length > 0).flatMap(mp1 => - MetaphoneAlgorithm.compute(ca2).filter(_.length > 0).map(mp1.sameElements(_)) + if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None + else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 => + MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_)) ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala index 33abe1d..e2debd0 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala @@ -9,11 +9,11 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { type ComputeReturn = String override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length == 0 || !Alphabet.is(ca.head)) None + if (fca.length == 0 || !Alphabet.is(fca.head)) None else { - val tr = transcodeRight(ca.map(_.toLower)) + val tr = transcodeRight(fca.map(_.toLower)) val tl = transcodeLeft(tr._1) val t = if (tl._2.length == 0) tl._1 ++ tr._2 @@ -46,15 +46,16 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { if (c == '\0' && r.length == 0) o else { val shift = (d: Int, ca: Array[Char]) => { - val sa = r.splitAt(d - 1) + val sca = r.splitAt(d - 1) ( - if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, - if (sa._2.length > 0) sa._2.head else '\0', - if (sa._2.length > 1) sa._2.tail else Array.empty[Char], + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], ca ) } + val t = { c match { case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') @@ -103,14 +104,14 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { private[this] def transcodeRight(ca: Array[Char]) = { if (ca.length >= 2) { - val l = ca(ca.length - 1) - val lm1 = ca(ca.length - 2) + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) lazy val t2 = ca.take(ca.length - 2) - l match { - case 'd' if (lm1 == 'n' || lm1 == 'r') => (t2, Array('d')) - case 'e' if (lm1 == 'e' || lm1 == 'i') => (t2, Array('y')) - case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => (t2, Array('d')) + lc match { + case 'd' if (lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d')) + case 'e' if (lcm1 == 'e' || lcm1 == 'i') => (t2, Array('y')) + case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d')) case _ => (ca, Array.empty[Char]) } } else (ca, Array.empty[Char]) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala index 279ec82..3e1bd57 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala @@ -10,19 +10,20 @@ object NysiisMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) val unequal = (c1: Char, c2: Char) => { - val c1l = c1.toLower - val c2l = c2.toLower + val lc1 = c1.toLower + val lc2 = c2.toLower - (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l) + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) } - if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None - else if (unequal(ca1.head, ca2.head)) Some(false) - else NysiisAlgorithm.compute(ca1).filter(_.length > 0).flatMap(ny1 => - NysiisAlgorithm.compute(ca2).filter(_.length > 0).map(ny1.sameElements(_)) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) + + if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None + else if (unequal(fca1.head, fca2.head)) Some(false) + else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 => + NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_)) ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala index d3870b2..b1055e4 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -9,13 +9,13 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor type ComputeReturn = String override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length == 0 || !Alphabet.is(ca.head)) None + if (fca.length == 0 || !Alphabet.is(fca.head)) None else { - val cal = ca.map(_.toLower) - val thl = transcodeLast(transcodeHead(cal.head +: cleanLast(cal.tail, Set('s', 'z')))) - val t = transcode(Array.empty[Char], thl.head, thl.tail, Array.empty[Char]) + val lfca = fca.map(_.toLower) + val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z')))) + val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char]) if (t.length == 1) Some(t) else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a'))))) @@ -43,15 +43,16 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor if (c == '\0' && r.length == 0) o else { val shift = (d: Int, ca: Array[Char]) => { - val sa = r.splitAt(d - 1) + val sca = r.splitAt(d - 1) ( - if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, - if (sa._2.length > 0) sa._2.head else '\0', - if (sa._2.length > 1) sa._2.tail else Array.empty[Char], + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], ca ) } + val t = { c match { case 'a' | 'i' | 'o' | 'u' => @@ -109,16 +110,16 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor private[this] def transcodeLast(ca: Array[Char]) = { if (ca.length >= 2) { - val l = ca(ca.length - 1) - val lm1 = ca(ca.length - 2) + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) lazy val t2 = ca.take(ca.length - 2) - l match { - case 'd' if (lm1 == 'n' || lm1 == 'r') => t2 :+ 'd' - case 'e' if (lm1 == 'e' || lm1 == 'i' || lm1 =='y') => t2 :+ 'y' - case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => t2 :+ 'd' - case 'x' if (lm1 == 'e') => t2 ++ Array('e', 'c') - case 'x' if (lm1 == 'i') => t2 ++ Array('i', 'c') + lc match { + case 'd' if (lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd' + case 'e' if (lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y') => t2 :+ 'y' + case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd' + case 'x' if (lcm1 == 'e') => t2 ++ Array('e', 'c') + case 'x' if (lcm1 == 'i') => t2 ++ Array('i', 'c') case _ => ca } } else ca diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala index 8d688ce..9fcdc30 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -10,19 +10,20 @@ object RefinedNysiisMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) val unequal = (c1: Char, c2: Char) => { - val c1l = c1.toLower - val c2l = c2.toLower + val lc1 = c1.toLower + val lc2 = c2.toLower - (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l) + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) } - if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None - else if (unequal(ca1.head, ca2.head)) Some(false) - else RefinedNysiisAlgorithm.compute(ca1).filter(_.length > 0).flatMap(rny1 => - RefinedNysiisAlgorithm.compute(ca2).filter(_.length > 0).map(rny1.sameElements(_)) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) + + if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None + else if (unequal(fca1.head, fca2.head)) Some(false) + else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 => + RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_)) ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala index 7fe6d70..9b53d20 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -9,10 +9,10 @@ object RefinedSoundexAlgorithm extends StringAlgorithm with FilterableStringAlgo type ComputeReturn = String override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length == 0 || !Alphabet.is(ca.head)) None - else Some(transcode(ca, Array(ca.head.toLower))) + if (fca.length == 0 || !Alphabet.is(fca.head)) None + else Some(transcode(fca, Array(fca.head.toLower))) } override def compute(string: String)(implicit stringFilter: StringFilter): Option[ComputeReturn] = diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala index 89ebb1d..badf5f5 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -10,13 +10,13 @@ object RefinedSoundexMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None - else if (ca1.head.toLower != ca2.head.toLower) Some(false) - else RefinedSoundexAlgorithm.compute(ca1).filter(_.length > 0).flatMap(rse1 => - RefinedSoundexAlgorithm.compute(ca2).filter(_.length > 0).map(rse1.sameElements(_)) + if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None + else if (fca1.head.toLower != fca2.head.toLower) Some(false) + else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 => + RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_)) ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala index 9f2ed92..8d261d1 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala @@ -9,13 +9,13 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { type ComputeReturn = String override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length == 0 || !Alphabet.is(ca.head)) None + if (fca.length == 0 || !Alphabet.is(fca.head)) None else { - val fc = ca.head.toLower + val fc = fca.head.toLower - Some(transcode(ca.tail, fc, Array(fc)).padTo(4, '0')) + Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0')) } } @@ -23,7 +23,7 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate).map(_.mkString) @tailrec - private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { + private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = { if (i.length == 0) o else { val c = i.head.toLower @@ -45,7 +45,7 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { case 'r' if pc != '6' => '6' case _ => '\0' } - val a = p match { + val a = pc match { // Code twice. case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) // Code once. diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala index d446730..bff3017 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala @@ -10,13 +10,13 @@ object SoundexMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None - else if (ca1.head.toLower != ca2.head.toLower) Some(false) - else SoundexAlgorithm.compute(ca1).filter(_.length > 0).flatMap(se1 => - SoundexAlgorithm.compute(ca2).filter(_.length > 0).map(se1.sameElements(_)) + if (fca1.length == 0 || !Alphabet.is(fca1.head) || fca2.length == 0 || !Alphabet.is(fca2.head)) None + else if (fca1.head.toLower != fca2.head.toLower) Some(false) + else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 => + SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_)) ) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala index 513b5ed..98a1275 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala @@ -15,14 +15,14 @@ object DiceSorensenMetric extends StringMetric with FilterableConfigurableString if (n <= 0) throw new IllegalArgumentException("Expected valid n.") - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length < n || ca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (ca1.sameElements(ca2)) Some(1d) + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) else - NGramAlgorithm.compute(ca1)(n).flatMap { ca1bg => - NGramAlgorithm.compute(ca2)(n).map { ca2bg => + NGramAlgorithm.compute(fca1)(n).flatMap { ca1bg => + NGramAlgorithm.compute(fca2)(n).map { ca2bg => val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) (2d * ms) / (ca1bg.length + ca2bg.length) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala index 223ca54..1fd5d57 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala @@ -10,12 +10,12 @@ object HammingMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None - else if (ca1.sameElements(ca2)) Some(0) - else Some(hamming(ca1, ca2)) + if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None + else if (fca1.sameElements(fca2)) Some(0) + else Some(hamming(fca1, fca2)) } override def compare(string1: String, string2: String) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala index 92a373e..e4c2441 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala @@ -15,20 +15,20 @@ object JaroMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || ca2.length == 0) None - else if (ca1.sameElements(ca2)) Some(1d) + if (fca1.length == 0 || fca2.length == 0) None + else if (fca1.sameElements(fca2)) Some(1d) else { - val mt = `match`((ca1, ca2)) + val mt = `match`((fca1, fca2)) val ms = scoreMatches((mt._1, mt._2)) if (ms == 0) Some(0d) else { val ts = scoreTranspositions((mt._1, mt._2)) - Some(((ms.toDouble / ca1.length) + (ms.toDouble / ca2.length) + ((ms.toDouble - ts) / ms)) / 3) + Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3) } } } diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala index 49ff5cf..c44088c 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetric.scala @@ -14,14 +14,14 @@ object JaroWinklerMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + val fca2 = stringFilter.filter(charArray2) - JaroMetric.compare(ca1, ca2)(new StringFilterDelegate) match { + JaroMetric.compare(fca1, fca2)(new StringFilterDelegate) match { case Some(0d) => Some(0d) case Some(1d) => Some(1d) case Some(jaro) => { - val prefix = ca1.zip(ca2).takeWhile(t => t._1 == t._2) + val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2) Some(jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro))) } diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala index 71ab895..5c8c8b2 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/LevenshteinMetric.scala @@ -10,13 +10,13 @@ object LevenshteinMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit stringFilter: StringFilter): Option[CompareReturn] = { - val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + val fca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 && ca2.length == 0) None - else if (ca1.length == 0) Some(ca2.length) - else if (ca2.length == 0) Some(ca1.length) - else Some(levenshtein(ca1, ca2)) + if (fca1.length == 0 && fca2.length == 0) None + else if (fca1.length == 0) Some(fca2.length) + else if (fca2.length == 0) Some(fca1.length) + else Some(levenshtein(fca1, fca2)) } override def compare(string1: String, string2: String) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala index f3f6b8a..5e6c022 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramAlgorithm.scala @@ -13,10 +13,10 @@ object NGramAlgorithm extends StringAlgorithm with FilterableConfigurableStringA if (n <= 0) throw new IllegalArgumentException("Expected valid n.") - val ca = stringFilter.filter(charArray) + val fca = stringFilter.filter(charArray) - if (ca.length < n) None - else Some(sequence(ca, Array.empty[Array[Char]], n)) + if (fca.length < n) None + else Some(sequence(fca, Array.empty[Array[Char]], n)) } override def compute(string: String)(n: Int) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala index 586552c..6977e84 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala @@ -13,14 +13,14 @@ object NGramMetric extends StringMetric with FilterableConfigurableStringMetric[ if (n <= 0) throw new IllegalArgumentException("Expected valid n.") - val ca1 = stringFilter.filter(charArray1) - lazy val ca2 = stringFilter.filter(charArray2) + val fca1 = stringFilter.filter(charArray1) + lazy val fca2 = stringFilter.filter(charArray2) - if (ca1.length < n || ca2.length < n) None // Because length is less than n, it is not possible to compare. - else if (ca1.sameElements(ca2)) Some(1d) + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) else - NGramAlgorithm.compute(ca1)(n).flatMap { ca1bg => - NGramAlgorithm.compute(ca2)(n).map { ca2bg => + NGramAlgorithm.compute(fca1)(n).flatMap { ca1bg => + NGramAlgorithm.compute(fca2)(n).map { ca2bg => val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) ms.toDouble / math.max(ca1bg.length, ca2bg.length) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala index ba57b10..2691c03 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetric.scala @@ -18,14 +18,14 @@ object WeightedLevenshteinMetric if (options._1 < 0 || options._2 < 0 || options._3 < 0) throw new IllegalArgumentException("Expected valid weight options.") - val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) - - if (ca1.length == 0 && ca2.length == 0) None - else if (ca1.length == 0) Some((options._2 * ca2.length).toDouble) - else if (ca2.length == 0) Some((options._1 * ca1.length).toDouble) - else if (ca1.sameElements(ca2)) Some(0d) - else Some(weightedLevenshtein((ca1, ca2), options).toDouble) + val fca1 = stringFilter.filter(charArray1) + val fca2 = stringFilter.filter(charArray2) + + if (fca1.length == 0 && fca2.length == 0) None + else if (fca1.length == 0) Some((options._2 * fca2.length).toDouble) + else if (fca2.length == 0) Some((options._1 * fca1.length).toDouble) + else if (fca1.sameElements(fca2)) Some(0d) + else Some(weightedLevenshtein((fca1, fca2), options).toDouble) } /** Options order is delete, insert, then substitute weight. */ |