diff options
author | Rocky Madden <git@rockymadden.com> | 2012-11-19 10:39:08 -0700 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-11-19 10:39:08 -0700 |
commit | c99fc5ce6e40772af5c5bc592166fa56fc45c3bb (patch) | |
tree | e75d70c226366b80449783ac9064c2805519f96e | |
parent | dfe88eb56a95a0bc88ece493a3d6590f07f07590 (diff) | |
download | stringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.tar.gz stringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.tar.bz2 stringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.zip |
Performance enhancements.
11 files changed, 52 insertions, 37 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala index 15aa071..172c7b5 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala @@ -4,7 +4,20 @@ object Alphabet { final val SometimesVowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u', 'y') final val Vowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u') - def isSometimesVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u' || c == 'y') + def is(char: Char) = ((char >= 65 && char <= 90) || (char >= 97 && char <= 122)) - def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u') + def isSometimesVowel(char: Char): Boolean = (char == 'y' || char == 'Y' || isVowel(char)) + + def isVowel(char: Char): Boolean = ( + char == 'a' || char == 'e' || char == 'i' || char == 'o' || char =='u' + || char == 'A' || char == 'E' || char == 'I' || char == 'O' || char =='U' + ) + + def startsWith(charArray: Array[Char]): Boolean = + if (charArray.length == 0) false + else is(charArray.head) + + def startsWith(string: String): Boolean = + if (string.length == 0) false + else is(string.charAt(0)) }
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala index cc836a0..e69cb22 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -13,10 +13,9 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm if (ca.length == 0) None else { - val th = deduplicate(transcodeHead(ca.map(_.toLower))) - - if (th.head < 97 || th.head > 122) None + if (!Alphabet.is(ca.head)) None else { + val th = deduplicate(transcodeHead(ca.map(_.toLower))) val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala index 7cfc23a..6652e36 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala @@ -13,7 +13,7 @@ object MetaphoneMetric extends StringMetric with FilterableStringMetric { val ca1 = stringFilter.filter(charArray1) lazy val ca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || ca2.length == 0) None + if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None else { val mp1 = MetaphoneAlgorithm.compute(ca1) lazy val mp2 = MetaphoneAlgorithm.compute(ca2) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala index eaf3872..e009fee 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala @@ -13,11 +13,9 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { if (ca.length == 0) None else { - val cal = ca.map(_.toLower) - - if (cal.head < 97 || cal.head > 122) None + if (!Alphabet.is(ca.head)) None else { - val tr = transcodeRight(cal) + val tr = transcodeRight(ca.map(_.toLower)) val tl = transcodeLeft(tr._1) val t = if (tl._2.length == 0) tl._1 ++ tr._2 diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala index a7c84e0..46f53d1 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala @@ -12,8 +12,15 @@ object NysiisMetric extends StringMetric with FilterableStringMetric { val ca1 = stringFilter.filter(charArray1) lazy val ca2 = stringFilter.filter(charArray2) + val unequal = (c1: Char, c2: Char) => { + val c1l = c1.toLower + val c2l = c2.toLower - if (ca1.length == 0 || ca2.length == 0) None + (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l) + } + + if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None + else if (unequal(ca1.head, ca2.head)) Some(false) else { val ny1 = NysiisAlgorithm.compute(ca1) lazy val ny2 = NysiisAlgorithm.compute(ca2) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala index 98cf043..6a48991 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -13,10 +13,9 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor if (ca.length == 0) None else { - val cal = ca.map(_.toLower) - - if (cal.head < 97 || cal.head > 122) None + if (!Alphabet.is(ca.head)) None else { + val cal = ca.map(_.toLower) val thl = transcodeLast(transcodeHead(cal.head +: cleanLast(cal.tail, Set('s', 'z')))) val t = transcode(Array.empty[Char], thl.head, thl.tail, Array.empty[Char]) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala index 732386d..10d7bc0 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -12,8 +12,15 @@ object RefinedNysiisMetric extends StringMetric with FilterableStringMetric { val ca1 = stringFilter.filter(charArray1) lazy val ca2 = stringFilter.filter(charArray2) + val unequal = (c1: Char, c2: Char) => { + val c1l = c1.toLower + val c2l = c2.toLower - if (ca1.length == 0 || ca2.length == 0) None + (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l) + } + + if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None + else if (unequal(ca1.head, ca2.head)) Some(false) else { val rny1 = RefinedNysiisAlgorithm.compute(ca1) lazy val rny2 = RefinedNysiisAlgorithm.compute(ca2) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala index 7f2191a..a0ea389 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -13,17 +13,12 @@ object RefinedSoundexAlgorithm extends StringAlgorithm with FilterableStringAlgo if (ca.length == 0) None else { - val fc = ca.head.toLower + if (!Alphabet.is(ca.head)) None + else { + val fc = ca.head.toLower - if (fc < 97 || fc > 122) None - else - Some( - transcode( - ca, - fc, // Pass first letter. - Array(fc) // Pass array with first letter. - ) - ) + Some(transcode(ca, fc, Array(fc))) + } } } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala index 326b654..778c9d2 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -13,7 +13,8 @@ object RefinedSoundexMetric extends StringMetric with FilterableStringMetric { val ca1 = stringFilter.filter(charArray1) lazy val ca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || ca2.length == 0) None + if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None + else if (ca1.head.toLower != ca2.head.toLower) Some(false) else { val rse1 = RefinedSoundexAlgorithm.compute(ca1) lazy val rse2 = RefinedSoundexAlgorithm.compute(ca2) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala index 648edc7..c5d099f 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala @@ -13,17 +13,12 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm { if (ca.length == 0) None else { - val fc = ca.head.toLower + if (!Alphabet.is(ca.head)) None + else { + val fc = ca.head.toLower - if (fc < 97 || fc > 122) None - else - Some( - transcode( - ca.tail, - fc, // Pass first letter. - Array(fc) // Pass array with first letter. - ).padTo(4, '0') - ) + Some(transcode(ca.tail, fc, Array(fc)).padTo(4, '0')) + } } } diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala index a111df4..e405688 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala @@ -13,7 +13,8 @@ object SoundexMetric extends StringMetric with FilterableStringMetric { val ca1 = stringFilter.filter(charArray1) lazy val ca2 = stringFilter.filter(charArray2) - if (ca1.length == 0 || ca2.length == 0) None + if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None + else if (ca1.head.toLower != ca2.head.toLower) Some(false) else { val se1 = SoundexAlgorithm.compute(ca1) lazy val se2 = SoundexAlgorithm.compute(ca2) |