summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-11-18 19:11:46 -0700
committerRocky Madden <git@rockymadden.com>2012-11-18 19:11:46 -0700
commita40947a8554332e97d05db8850ab1b6cd0540d9a (patch)
tree5ffbb750da26c894906bcf3ea72fb8469f0d2aff /core
parent7f67b1e0831b475932b1a590c78603914d865691 (diff)
downloadstringmetric-a40947a8554332e97d05db8850ab1b6cd0540d9a.tar.gz
stringmetric-a40947a8554332e97d05db8850ab1b6cd0540d9a.tar.bz2
stringmetric-a40947a8554332e97d05db8850ab1b6cd0540d9a.zip
Created constant object. Fixed bug where terminal might not be applied in certain circumstances.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala10
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala64
3 files changed, 42 insertions, 38 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala
new file mode 100755
index 0000000..15aa071
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Alphabet.scala
@@ -0,0 +1,10 @@
+package org.hashtree.stringmetric.phonetic
+
+object Alphabet {
+ final val SometimesVowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u', 'y')
+ final val Vowels: Set[Char] = Set('a', 'e', 'i', 'o', 'u')
+
+ def isSometimesVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u' || c == 'y')
+
+ def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c =='u')
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
index 4f1adda..cc836a0 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -35,8 +35,6 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
else
ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
- private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
-
@tailrec
private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
if (c == '\0' && r.length == 0) o
@@ -92,7 +90,7 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
else
shift(1, o :+ 'k')
case 'h' =>
- if ((l.length >= 1 && isVowel(l.last) && (r.length == 0 || !isVowel(r.head)))
+ if ((l.length >= 1 && Alphabet.isVowel(l.last) && (r.length == 0 || !Alphabet.isVowel(r.head)))
|| (l.length >= 2 && l.last == 'h'
&& (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p'
|| l(l.length - 2) == 't' || l(l.length - 2) == 'g'
@@ -122,7 +120,7 @@ object MetaphoneAlgorithm extends StringAlgorithm with FilterableStringAlgorithm
else
shift(1, o :+ 't')
case 'v' => shift(1, o :+ 'f')
- case 'w' | 'y' => if (r.length == 0 || !isVowel(r.head)) shift(1, o) else shift(1, o :+ c)
+ case 'w' | 'y' => if (r.length == 0 || !Alphabet.isVowel(r.head)) shift(1, o) else shift(1, o :+ c)
case 'x' => shift(1, (o :+ 'k') :+ 's')
case 'z' => shift(1, o :+ 's')
case _ => shift(1, o)
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
index 7d18b45..eb2b2f5 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -20,15 +20,12 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
val tr = transcodeRight(cal)
val tl = transcodeLeft(tr._1)
val t =
- tl._2.length match {
- case 0 => tl._1 ++ tr._2
- case 1 =>
- tl._1 ++ transcodeCenter(Array.empty[Char], tl._2.head, Array.empty[Char], Array.empty[Char]) ++ tr._2
- case _ =>
- tl._1 ++ transcodeCenter(Array.empty[Char], tl._2.head, tl._2.tail, Array.empty[Char]) ++ tr._2
- }
-
- Some(t.head +: deduplicate(cleanRight(t.tail)))
+ if (tl._2.length == 0) tl._1 ++ tr._2
+ else
+ tl._1 ++ transcodeCenter(Array.empty[Char], tl._2.head, if (tl._2.length > 1) tl._2.tail else Array.empty[Char], Array.empty[Char]) ++ tr._2
+
+ if (t.length == 1) Some(t)
+ else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail))))
}
}
}
@@ -39,24 +36,20 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
case None => None
}
- private[this] def cleanRight(ca: Array[Char]) =
+ private[this] def cleanLast(ca: Array[Char]) =
if (ca.length == 0) ca
- else
- ca.last match {
- // All vowels will be encoded as 'a' at the point this is called. All 'z' will be encoded as 's' too.
- case 'a' | 's' =>
- ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
- case 'y' if (ca.length >= 2 && ca(ca.length - 2) == 'a') => ca.dropRight(2) :+ 'y'
- case _ => ca
- }
+ else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
+ else ca
+
+ private[this] def cleanTerminal(ca: Array[Char]) =
+ if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
+ else ca
private[this] def deduplicate(ca: Array[Char]) =
if (ca.length <= 1) ca
else
ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
- private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
-
@tailrec
private[this] def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
if (c == '\0' && r.length == 0) o
@@ -71,7 +64,6 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
ca
)
}
-
val t = {
c match {
case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a')
@@ -80,17 +72,17 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
else shift(1, o :+ 'a')
case 'h' =>
- if (l.length >= 1 && (!isVowel(l.last) || (r.length >= 1 && !isVowel(r.head)))) shift(1, o)
+ if (l.length >= 1 && (!Alphabet.isVowel(l.last) || (r.length >= 1 && !Alphabet.isVowel(r.head)))) shift(1, o)
else shift(1, o :+ c)
case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
case 'm' => shift(1, o :+ 'n')
- case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p')
+ case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
case 'q' => shift(1, o :+ 'g')
case 's' =>
- if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ 's')
+ if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
else shift(1, o :+ c)
case 'w' =>
- if (l.length >= 1 && isVowel(l.last)) shift(1, o)
+ if (l.length >= 1 && Alphabet.isVowel(l.last)) shift(1, o)
else shift(1, o :+ c)
case 'z' => shift(1, o :+ 's')
case _ => shift(1, o)
@@ -103,27 +95,31 @@ object NysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
private[this] def transcodeLeft(ca: Array[Char]) = {
if (ca.length == 0) (Array.empty[Char], ca)
- else
+ else {
+ lazy val takeRight2 = ca.takeRight(ca.length - 2)
+ lazy val takeRight3 = ca.takeRight(ca.length - 3)
+
ca.head match {
- case 'k' if (ca.length >= 2 && ca(1) == 'n') => (Array('n', 'n'), ca.takeRight(ca.length - 2))
+ case 'k' if (ca.length >= 2 && ca(1) == 'n') => (Array('n', 'n'), takeRight2)
case 'k' => (Array('c'), ca.tail)
- case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => (Array('m', 'c'), ca.takeRight(ca.length - 3))
- case 'p' if (ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f')) => (Array('f', 'f'), ca.takeRight(ca.length - 2))
- case 's' if (ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h')) => (Array('s', 's'), ca.takeRight(ca.length - 3))
+ case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => (Array('m', 'c'), takeRight3)
+ case 'p' if (ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f')) => (Array('f', 'f'), takeRight2)
+ case 's' if (ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h')) => (Array('s', 's'), takeRight3)
case _ => (Array(ca.head), ca.tail)
}
+ }
}
private[this] def transcodeRight(ca: Array[Char]) = {
if (ca.length >= 2) {
val l = ca(ca.length - 1)
val lm1 = ca(ca.length - 2)
- lazy val take = ca.take(ca.length - 2)
+ lazy val take2 = ca.take(ca.length - 2)
l match {
- case 'd' if (lm1 == 'n' || lm1 == 'r') => (take, Array('d'))
- case 'e' if (lm1 == 'e' || lm1 == 'i') => (take, Array('y'))
- case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => (take, Array('d'))
+ case 'd' if (lm1 == 'n' || lm1 == 'r') => (take2, Array('d'))
+ case 'e' if (lm1 == 'e' || lm1 == 'i') => (take2, Array('y'))
+ case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => (take2, Array('d'))
case _ => (ca, Array.empty[Char])
}
} else (ca, Array.empty[Char])