summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-11-19 10:39:08 -0700
committerRocky Madden <git@rockymadden.com>2012-11-19 10:39:08 -0700
commitc99fc5ce6e40772af5c5bc592166fa56fc45c3bb (patch)
treee75d70c226366b80449783ac9064c2805519f96e
parentdfe88eb56a95a0bc88ece493a3d6590f07f07590 (diff)
downloadstringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.tar.gz
stringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.tar.bz2
stringmetric-c99fc5ce6e40772af5c5bc592166fa56fc45c3bb.zip
Performance enhancements.
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala17
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala5
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala2
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala9
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala5
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala9
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala15
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala3
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala15
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala3
11 files changed, 52 insertions, 37 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala
index 15aa071..172c7b5 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala
@@ -4,7 +4,20 @@ object Alphabet {
final val SometimesVowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u', 'y')
final val Vowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u')
- def isSometimesVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u' || c == 'y')
+ def is(char: Char) = ((char >= 65 && char <= 90) || (char >= 97 && char <= 122))
- def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u')
+ def isSometimesVowel(char: Char): Boolean = (char == 'y' || char == 'Y' || isVowel(char))
+
+ def isVowel(char: Char): Boolean = (
+ char == 'a' || char == 'e' || char == 'i' || char == 'o' || char =='u'
+ || char == 'A' || char == 'E' || char == 'I' || char == 'O' || char =='U'
+ )
+
+ def startsWith(charArray: Array[Char]): Boolean =
+ if (charArray.length == 0) false
+ else is(charArray.head)
+
+ def startsWith(string: String): Boolean =
+ if (string.length == 0) false
+ else is(string.charAt(0))
} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
index cc836a0..e69cb22 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -13,10 +13,9 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
if (ca.length == 0) None
else {
- val th = deduplicate(transcodeHead(ca.map(_.toLower)))
-
- if (th.head < 97 || th.head > 122) None
+ if (!Alphabet.is(ca.head)) None
else {
+ val th = deduplicate(transcodeHead(ca.map(_.toLower)))
val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
index 7cfc23a..6652e36 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala
@@ -13,7 +13,7 @@ object MetaphoneMetric extends StringMetric with FilterableStringMetric {
val ca1 = stringFilter.filter(charArray1)
lazy val ca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || ca2.length == 0) None
+ if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
else {
val mp1 = MetaphoneAlgorithm.compute(ca1)
lazy val mp2 = MetaphoneAlgorithm.compute(ca2)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
index eaf3872..e009fee 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -13,11 +13,9 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
if (ca.length == 0) None
else {
- val cal = ca.map(_.toLower)
-
- if (cal.head < 97 || cal.head > 122) None
+ if (!Alphabet.is(ca.head)) None
else {
- val tr = transcodeRight(cal)
+ val tr = transcodeRight(ca.map(_.toLower))
val tl = transcodeLeft(tr._1)
val t =
if (tl._2.length == 0) tl._1 ++ tr._2
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
index a7c84e0..46f53d1 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala
@@ -12,8 +12,15 @@ object NysiisMetric extends StringMetric with FilterableStringMetric {
val ca1 = stringFilter.filter(charArray1)
lazy val ca2 = stringFilter.filter(charArray2)
+ val unequal = (c1: Char, c2: Char) => {
+ val c1l = c1.toLower
+ val c2l = c2.toLower
- if (ca1.length == 0 || ca2.length == 0) None
+ (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l)
+ }
+
+ if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
+ else if (unequal(ca1.head, ca2.head)) Some(false)
else {
val ny1 = NysiisAlgorithm.compute(ca1)
lazy val ny2 = NysiisAlgorithm.compute(ca2)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
index 98cf043..6a48991 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -13,10 +13,9 @@ object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgor
if (ca.length == 0) None
else {
- val cal = ca.map(_.toLower)
-
- if (cal.head < 97 || cal.head > 122) None
+ if (!Alphabet.is(ca.head)) None
else {
+ val cal = ca.map(_.toLower)
val thl = transcodeLast(transcodeHead(cal.head +: cleanLast(cal.tail, Set('s', 'z'))))
val t = transcode(Array.empty[Char], thl.head, thl.tail, Array.empty[Char])
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
index 732386d..10d7bc0 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -12,8 +12,15 @@ object RefinedNysiisMetric extends StringMetric with FilterableStringMetric {
val ca1 = stringFilter.filter(charArray1)
lazy val ca2 = stringFilter.filter(charArray2)
+ val unequal = (c1: Char, c2: Char) => {
+ val c1l = c1.toLower
+ val c2l = c2.toLower
- if (ca1.length == 0 || ca2.length == 0) None
+ (if (c1l == 'k') 'c' else c1l) != (if (c2l == 'k') 'c' else c2l)
+ }
+
+ if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
+ else if (unequal(ca1.head, ca2.head)) Some(false)
else {
val rny1 = RefinedNysiisAlgorithm.compute(ca1)
lazy val rny2 = RefinedNysiisAlgorithm.compute(ca2)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
index 7f2191a..a0ea389 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
@@ -13,17 +13,12 @@ object RefinedSoundexAlgorithm extends StringAlgorithm with FilterableStringAlgo
if (ca.length == 0) None
else {
- val fc = ca.head.toLower
+ if (!Alphabet.is(ca.head)) None
+ else {
+ val fc = ca.head.toLower
- if (fc < 97 || fc > 122) None
- else
- Some(
- transcode(
- ca,
- fc, // Pass first letter.
- Array(fc) // Pass array with first letter.
- )
- )
+ Some(transcode(ca, fc, Array(fc)))
+ }
}
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
index 326b654..778c9d2 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -13,7 +13,8 @@ object RefinedSoundexMetric extends StringMetric with FilterableStringMetric {
val ca1 = stringFilter.filter(charArray1)
lazy val ca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || ca2.length == 0) None
+ if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
+ else if (ca1.head.toLower != ca2.head.toLower) Some(false)
else {
val rse1 = RefinedSoundexAlgorithm.compute(ca1)
lazy val rse2 = RefinedSoundexAlgorithm.compute(ca2)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
index 648edc7..c5d099f 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala
@@ -13,17 +13,12 @@ object SoundexAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
if (ca.length == 0) None
else {
- val fc = ca.head.toLower
+ if (!Alphabet.is(ca.head)) None
+ else {
+ val fc = ca.head.toLower
- if (fc < 97 || fc > 122) None
- else
- Some(
- transcode(
- ca.tail,
- fc, // Pass first letter.
- Array(fc) // Pass array with first letter.
- ).padTo(4, '0')
- )
+ Some(transcode(ca.tail, fc, Array(fc)).padTo(4, '0'))
+ }
}
}
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
index a111df4..e405688 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
@@ -13,7 +13,8 @@ object SoundexMetric extends StringMetric with FilterableStringMetric {
val ca1 = stringFilter.filter(charArray1)
lazy val ca2 = stringFilter.filter(charArray2)
- if (ca1.length == 0 || ca2.length == 0) None
+ if (ca1.length == 0 || !Alphabet.is(ca1.head) || ca2.length == 0 || !Alphabet.is(ca2.head)) None
+ else if (ca1.head.toLower != ca2.head.toLower) Some(false)
else {
val se1 = SoundexAlgorithm.compute(ca1)
lazy val se2 = SoundexAlgorithm.compute(ca2)