From 9e8964fbe63e412f4327543126be9105e1fc45a9 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Mon, 29 Oct 2012 17:19:43 -0600 Subject: Appended algorithm to names to help diferentiate between metrics and phonetic algorithms. --- .../hashtree/stringmetric/phonetic/Metaphone.scala | 156 --------------- .../stringmetric/phonetic/MetaphoneAlgorithm.scala | 156 +++++++++++++++ .../stringmetric/phonetic/MetaphoneMetric.scala | 4 +- .../hashtree/stringmetric/phonetic/Nysiis.scala | 131 ------------- .../stringmetric/phonetic/NysiisAlgorithm.scala | 131 +++++++++++++ .../stringmetric/phonetic/NysiisMetric.scala | 4 +- .../stringmetric/phonetic/RefinedSoundex.scala | 84 -------- .../phonetic/RefinedSoundexAlgorithm.scala | 84 ++++++++ .../phonetic/RefinedSoundexMetric.scala | 4 +- .../hashtree/stringmetric/phonetic/Soundex.scala | 79 -------- .../stringmetric/phonetic/SoundexAlgorithm.scala | 79 ++++++++ .../stringmetric/phonetic/SoundexMetric.scala | 4 +- .../phonetic/MetaphoneAlgorithmSpec.scala | 213 +++++++++++++++++++++ .../stringmetric/phonetic/MetaphoneSpec.scala | 213 --------------------- .../phonetic/NysiisAlgorithmSpec.scala | 163 ++++++++++++++++ .../stringmetric/phonetic/NysiisSpec.scala | 163 ---------------- .../phonetic/RefinedSoundexAlgorithmSpec.scala | 54 ++++++ .../stringmetric/phonetic/RefinedSoundexSpec.scala | 54 ------ .../phonetic/SoundexAlgorithmSpec.scala | 54 ++++++ .../stringmetric/phonetic/SoundexSpec.scala | 54 ------ 20 files changed, 942 insertions(+), 942 deletions(-) delete mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/Metaphone.scala create mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala delete mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/Nysiis.scala create mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala delete mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala create mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala delete mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala create mode 100755 core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala create mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala delete mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneSpec.scala create mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmSpec.scala delete mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisSpec.scala create mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala delete mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala create mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmSpec.scala delete mode 100755 core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala (limited to 'core') diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Metaphone.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Metaphone.scala deleted file mode 100755 index 557829f..0000000 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Metaphone.scala +++ /dev/null @@ -1,156 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } -import scala.annotation.tailrec - -/** An implementation of the Metaphone [[org.hashtree.stringmetric.StringAlgorithm]]. */ -object Metaphone extends StringAlgorithm { - override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) - - if (ca.length == 0) None - else { - val th = deduplicate(transcodeHead(ca.map(_.toLower))) - - if (th.head < 97 || th.head > 122) None - else { - val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) - - if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. - } - } - } - - override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = - compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { - case Some(mp) => Some(mp.mkString) - case None => None - } - - private[this] def deduplicate(ca: Array[Char]) = - if (ca.length <= 1) ca - else - ca.sliding(2).filter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last - - private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') - - @tailrec - private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { - if (c == '\0' && r.length == 0) o - else { - val shift = (d: Int, ca: Array[Char]) => { - val sa = r.splitAt(d - 1) - - ( - if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, - if (sa._2.length > 0) sa._2.head else '\0', - if (sa._2.length > 1) sa._2.tail else Array.empty[Char], - ca - ) - } - - val t = { - c match { - case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o) - case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c) - case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b') - case 'c' => { - if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') - shift(1, o :+ 'k') - else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') - shift(3, o :+ 'x') - else if ( - (r.length >= 1 && r.head == 'h') || - (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h') - ) - shift(2, o :+ 'x') - else if (l.length >= 1 && r.length >= 1 && l.last == 's' && ( - r.head == 'i' || r.head == 'e' || r.head == 'y' - ) - ) - shift(1, o) - else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) - shift(1, o :+ 's') - else - shift(1, o :+ 'k') - } - case 'd' => { - if (r.length >= 2 && r.head == 'g' && ( - r(1) == 'e' || r(1) == 'y' || r(1) == 'i' - ) - ) - shift(1, o :+ 'j') - else - shift(1, o :+ 't') - } - case 'g' => { - if ((r.length > 1 && r.head == 'h') || - (r.length == 1 && r.head == 'n') || - (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd') - ) - shift(1, o) - else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) - shift(2, o :+ 'j') - else - shift(1, o :+ 'k') - } - case 'h' => { - if ((l.length >= 1 && isVowel(l.last) && (r.length == 0 || !isVowel(r.head))) || - (l.length >= 2 && l.last == 'h' && ( - l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p' || - l(l.length - 2) == 't' || l(l.length - 2) == 'g' - ) - ) - ) - shift(1, o) - else - shift(1, o :+ 'h') - } - case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k') - case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p') - case 'q' => shift(1, o :+ 'k') - case 's' => { - if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) - shift(3, o :+ 'x') - else if (r.length >= 1 && r.head == 'h') - shift(2, o :+ 'x') - else - shift(1, o :+ 's') - } - case 't' => { - if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) - shift(3, o :+ 'x') - else if (r.length >= 1 && r.head == 'h') - shift(2, o :+ '0') - else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') - shift(1, o) - else - shift(1, o :+ 't') - } - case 'v' => shift(1, o :+ 'f') - case 'w' | 'y' => if (r.length == 0 || !isVowel(r.head)) shift(1, o) else shift(1, o :+ c) - case 'x' => shift(1, (o :+ 'k') :+ 's') - case 'z' => shift(1, o :+ 's') - case _ => shift(1, o) - } - } - - transcode(t._1, t._2, t._3, t._4) - } - } - - private[this] def transcodeHead(ca: Array[Char]) = { - val h = ca.take(2).padTo(2, '\0') - - if ((h.head == 'a' && h.last == 'e') || - (h.last == 'n' && (h.head == 'g' || h.head == 'k' || h.head == 'p')) || - (h.head == 'w' && h.last == 'r') - ) - ca.tail - else if (h.head == 'w' && h.last == 'h') - 'w' +: ca.drop(2) - else if (h.head == 'x') - 's' +: ca.tail - else ca - } -} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala new file mode 100755 index 0000000..b82a8c7 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -0,0 +1,156 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } +import scala.annotation.tailrec + +/** An implementation of the Metaphone [[org.hashtree.stringmetric.StringAlgorithm]]. */ +object MetaphoneAlgorithm extends StringAlgorithm { + override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { + val ca = stringFilter.filter(charArray) + + if (ca.length == 0) None + else { + val th = deduplicate(transcodeHead(ca.map(_.toLower))) + + if (th.head < 97 || th.head > 122) None + else { + val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) + + if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. + } + } + } + + override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = + compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { + case Some(mp) => Some(mp.mkString) + case None => None + } + + private[this] def deduplicate(ca: Array[Char]) = + if (ca.length <= 1) ca + else + ca.sliding(2).filter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') + + @tailrec + private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + if (c == '\0' && r.length == 0) o + else { + val shift = (d: Int, ca: Array[Char]) => { + val sa = r.splitAt(d - 1) + + ( + if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, + if (sa._2.length > 0) sa._2.head else '\0', + if (sa._2.length > 1) sa._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + c match { + case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o) + case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c) + case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b') + case 'c' => { + if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') + shift(1, o :+ 'k') + else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') + shift(3, o :+ 'x') + else if ( + (r.length >= 1 && r.head == 'h') || + (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h') + ) + shift(2, o :+ 'x') + else if (l.length >= 1 && r.length >= 1 && l.last == 's' && ( + r.head == 'i' || r.head == 'e' || r.head == 'y' + ) + ) + shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) + shift(1, o :+ 's') + else + shift(1, o :+ 'k') + } + case 'd' => { + if (r.length >= 2 && r.head == 'g' && ( + r(1) == 'e' || r(1) == 'y' || r(1) == 'i' + ) + ) + shift(1, o :+ 'j') + else + shift(1, o :+ 't') + } + case 'g' => { + if ((r.length > 1 && r.head == 'h') || + (r.length == 1 && r.head == 'n') || + (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd') + ) + shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) + shift(2, o :+ 'j') + else + shift(1, o :+ 'k') + } + case 'h' => { + if ((l.length >= 1 && isVowel(l.last) && (r.length == 0 || !isVowel(r.head))) || + (l.length >= 2 && l.last == 'h' && ( + l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p' || + l(l.length - 2) == 't' || l(l.length - 2) == 'g' + ) + ) + ) + shift(1, o) + else + shift(1, o :+ 'h') + } + case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p') + case 'q' => shift(1, o :+ 'k') + case 's' => { + if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) + shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') + shift(2, o :+ 'x') + else + shift(1, o :+ 's') + } + case 't' => { + if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) + shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') + shift(2, o :+ '0') + else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') + shift(1, o) + else + shift(1, o :+ 't') + } + case 'v' => shift(1, o :+ 'f') + case 'w' | 'y' => if (r.length == 0 || !isVowel(r.head)) shift(1, o) else shift(1, o :+ c) + case 'x' => shift(1, (o :+ 'k') :+ 's') + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + } + + private[this] def transcodeHead(ca: Array[Char]) = { + val h = ca.take(2).padTo(2, '\0') + + if ((h.head == 'a' && h.last == 'e') || + (h.last == 'n' && (h.head == 'g' || h.head == 'k' || h.head == 'p')) || + (h.head == 'w' && h.last == 'r') + ) + ca.tail + else if (h.head == 'w' && h.last == 'h') + 'w' +: ca.drop(2) + else if (h.head == 'x') + 's' +: ca.tail + else ca + } +} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala index 73f48af..6bdc130 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala @@ -10,8 +10,8 @@ object MetaphoneMetric extends StringMetric { if (ca1.length == 0 || ca2.length == 0) None else { - val mp1 = Metaphone.compute(ca1) - val mp2 = Metaphone.compute(ca2) + val mp1 = MetaphoneAlgorithm.compute(ca1) + val mp2 = MetaphoneAlgorithm.compute(ca2) if (!mp1.isDefined || !mp2.isDefined || (mp1.get.length == 0 && mp2.get.length == 0)) None else Some(mp1.get.sameElements(mp2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Nysiis.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Nysiis.scala deleted file mode 100755 index 6718fb2..0000000 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Nysiis.scala +++ /dev/null @@ -1,131 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } -import scala.annotation.tailrec - -/** An implementation of the NYSIIS [[org.hashtree.stringmetric.StringAlgorithm]]. */ -object Nysiis extends StringAlgorithm { - override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) - - if (ca.length == 0) None - else { - val thl = transcodeLast(transcodeHead(ca.map(_.toLower))) - - if (thl.head < 97 || thl.head > 122) None - else { - if (thl.length == 1) Some(thl) - else { - val ts = thl.splitAt(1) - val t = transcode(ts._1, ts._2.head, ts._2.tail, ts._1) - - Some(t.head +: deduplicate(transcodeClean(t.tail))) - } - } - } - } - - override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = - compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { - case Some(se) => Some(se.mkString) - case None => None - } - - private[this] def deduplicate(ca: Array[Char]) = - if (ca.length <= 1) ca - else - ca.sliding(2).filter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last - - private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') - - @tailrec - private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { - if (c == '\0' && r.length == 0) o - else { - val shift = (d: Int, ca: Array[Char]) => { - val sa = r.splitAt(d - 1) - - ( - if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, - if (sa._2.length > 0) sa._2.head else '\0', - if (sa._2.length > 1) sa._2.tail else Array.empty[Char], - ca - ) - } - - val t = { - c match { - case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') - case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c) - case 'e' => { - if (r.length >= 1 && r.head == 'v') - shift(2, o ++ Array('a', 'f')) - else - shift(1, o :+ 'a') - } - case 'h' => { - if (l.length >= 1 && (!isVowel(l.last) || (r.length >= 1 && !isVowel(r.head)))) - shift(1, o :+ l.last) - else - shift(1, o :+ c) - } - case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') - case 'm' => shift(1, o :+ 'n') - case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o ++ Array('f', 'f')) else shift(1, o :+ 'p') - case 'q' => shift(1, o :+ 'g') - case 's' => { - if (r.length >= 2 && r.head == 'c' && r(1) == 'h') - shift(3, o ++ Array('s', 's', 's')) - else - shift(1, o :+ c) - } - case 'w' => { - if (l.length >= 1 && !isVowel(l.last)) - shift(1, o :+ l.last) - else - shift(1, o :+ c) - } - case 'z' => shift(1, o :+ 's') - case _ => shift(1, o) - } - } - - transcode(t._1, t._2, t._3, t._4) - } - } - - private[this] def transcodeClean(ca: Array[Char]) = - if (ca.length >= 1 && (ca.last == 'a' || ca.last == 's')) - ca.reverse.dropWhile(c => c == 'a' || c == 's').reverse - else if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') - ca.dropRight(2) :+ 'y' - else ca - - private[this] def transcodeHead(ca: Array[Char]) = { - val h = ca.take(3).padTo(3, '\0') - - if (h.head == 'm' && h(1) == 'a' && h.last == 'c') - Array('m', 'c', 'c') ++ ca.takeRight(ca.length - 3) - else if (h.head == 's' && h(1) == 'c' && h.last == 'h') - Array('s', 's', 's') ++ ca.takeRight(ca.length - 3) - else if (h.head == 'p' && (h(1) == 'h' || h(1) == 'f')) - Array('f', 'f') ++ ca.takeRight(ca.length - 2) - else if (h.head == 'k' && h(1) == 'n') - Array('n', 'n') ++ ca.takeRight(ca.length - 2) - else if (h.head == 'k') - Array('c') ++ ca.takeRight(ca.length - 1) - else ca - } - - private[this] def transcodeLast(ca: Array[Char]) = { - val h = ca.take(2).padTo(2, '\0') - - if ((h.last == 't' && (h.head == 'd' || h.head == 'r' || h.head == 'n')) || - (h.last == 'd' && (h.head == 'r' || h.head == 'n')) - ) - Array('d') ++ ca.takeRight(ca.length - 2) - else if (h.last == 'e' && (h.head == 'i' || h.head == 'e')) - Array('y') ++ ca.takeRight(ca.length - 2) - else ca - } -} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala new file mode 100755 index 0000000..67b9552 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithm.scala @@ -0,0 +1,131 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } +import scala.annotation.tailrec + +/** An implementation of the NYSIIS [[org.hashtree.stringmetric.StringAlgorithm]]. */ +object NysiisAlgorithm extends StringAlgorithm { + override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { + val ca = stringFilter.filter(charArray) + + if (ca.length == 0) None + else { + val thl = transcodeLast(transcodeHead(ca.map(_.toLower))) + + if (thl.head < 97 || thl.head > 122) None + else { + if (thl.length == 1) Some(thl) + else { + val ts = thl.splitAt(1) + val t = transcode(ts._1, ts._2.head, ts._2.tail, ts._1) + + Some(t.head +: deduplicate(transcodeClean(t.tail))) + } + } + } + } + + override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = + compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { + case Some(se) => Some(se.mkString) + case None => None + } + + private[this] def deduplicate(ca: Array[Char]) = + if (ca.length <= 1) ca + else + ca.sliding(2).filter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + private[this] def isVowel(c: Char) = (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') + + @tailrec + private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + if (c == '\0' && r.length == 0) o + else { + val shift = (d: Int, ca: Array[Char]) => { + val sa = r.splitAt(d - 1) + + ( + if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c, + if (sa._2.length > 0) sa._2.head else '\0', + if (sa._2.length > 1) sa._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + c match { + case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') + case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c) + case 'e' => { + if (r.length >= 1 && r.head == 'v') + shift(2, o ++ Array('a', 'f')) + else + shift(1, o :+ 'a') + } + case 'h' => { + if (l.length >= 1 && (!isVowel(l.last) || (r.length >= 1 && !isVowel(r.head)))) + shift(1, o :+ l.last) + else + shift(1, o :+ c) + } + case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') + case 'm' => shift(1, o :+ 'n') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o ++ Array('f', 'f')) else shift(1, o :+ 'p') + case 'q' => shift(1, o :+ 'g') + case 's' => { + if (r.length >= 2 && r.head == 'c' && r(1) == 'h') + shift(3, o ++ Array('s', 's', 's')) + else + shift(1, o :+ c) + } + case 'w' => { + if (l.length >= 1 && !isVowel(l.last)) + shift(1, o :+ l.last) + else + shift(1, o :+ c) + } + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + } + + private[this] def transcodeClean(ca: Array[Char]) = + if (ca.length >= 1 && (ca.last == 'a' || ca.last == 's')) + ca.reverse.dropWhile(c => c == 'a' || c == 's').reverse + else if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') + ca.dropRight(2) :+ 'y' + else ca + + private[this] def transcodeHead(ca: Array[Char]) = { + val h = ca.take(3).padTo(3, '\0') + + if (h.head == 'm' && h(1) == 'a' && h.last == 'c') + Array('m', 'c', 'c') ++ ca.takeRight(ca.length - 3) + else if (h.head == 's' && h(1) == 'c' && h.last == 'h') + Array('s', 's', 's') ++ ca.takeRight(ca.length - 3) + else if (h.head == 'p' && (h(1) == 'h' || h(1) == 'f')) + Array('f', 'f') ++ ca.takeRight(ca.length - 2) + else if (h.head == 'k' && h(1) == 'n') + Array('n', 'n') ++ ca.takeRight(ca.length - 2) + else if (h.head == 'k') + Array('c') ++ ca.takeRight(ca.length - 1) + else ca + } + + private[this] def transcodeLast(ca: Array[Char]) = { + val h = ca.take(2).padTo(2, '\0') + + if ((h.last == 't' && (h.head == 'd' || h.head == 'r' || h.head == 'n')) || + (h.last == 'd' && (h.head == 'r' || h.head == 'n')) + ) + Array('d') ++ ca.takeRight(ca.length - 2) + else if (h.last == 'e' && (h.head == 'i' || h.head == 'e')) + Array('y') ++ ca.takeRight(ca.length - 2) + else ca + } +} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala index 6d4cec1..e8f48d2 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala @@ -10,8 +10,8 @@ object NysiisMetric extends StringMetric { if (ca1.length == 0 || ca2.length == 0) None else { - val ny1 = Nysiis.compute(ca1) - val ny2 = Nysiis.compute(ca2) + val ny1 = NysiisAlgorithm.compute(ca1) + val ny2 = NysiisAlgorithm.compute(ca2) if (!ny1.isDefined || !ny2.isDefined || (ny1.get.length == 0 && ny2.get.length == 0)) None else Some(ny1.get.sameElements(ny2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala deleted file mode 100755 index 8443a5f..0000000 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala +++ /dev/null @@ -1,84 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } -import scala.annotation.tailrec - -/** An implementation of the refined Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */ -object RefinedSoundex extends StringAlgorithm { - override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) - - if (ca.length == 0) None - else { - val fc = ca.head.toLower - - if (fc < 97 || fc > 122) None - else { - Some( - transcode( - ca, - fc, // Pass first letter. - Array(fc) // Pass array with first letter. - ) - ) - } - } - } - - override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = - compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { - case Some(se) => Some(se.mkString) - case None => None - } - - @tailrec - private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { - require(o.length > 0) - - if (i.length == 0) o - else { - val c = i.head.toLower - val m2 = (mc: Char) => mc match { - case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0' - case 'b' | 'p' => '1' - case 'f' | 'v' => '2' - case 'c' | 'k' | 's' => '3' - case 'g' | 'j' => '4' - case 'q' | 'x' | 'z' => '5' - case 'd' | 't' => '6' - case 'l' => '7' - case 'm' | 'n' => '8' - case 'r' => '9' - case _ => '\0' - } - val m1 = (mc: Char, pc: Char) => mc match { - case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0' - case 'b' | 'p' if pc != '1' => '1' - case 'f' | 'v' if pc != '2' => '2' - case 'c' | 'k' | 's' if pc != '3' => '3' - case 'g' | 'j' if pc != '4' => '4' - case 'q' | 'x' | 'z' if pc != '5' => '5' - case 'd' | 't' if pc != '6' => '6' - case 'l' if pc != '7' => '7' - case 'm' | 'n' if pc != '8' => '8' - case 'r' if pc != '9' => '9' - case _ => '\0' - } - val a = - // Code twice. - if (o.length == 1) - m2(c) - // Code once. - else - m1( - c, - o.last match { - case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last - case _ => m2(o.last) - } - ) - - transcode(i.tail, c, if (a != '\0') o :+ a else o) - } - } -} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala new file mode 100755 index 0000000..df03712 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -0,0 +1,84 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } +import scala.annotation.tailrec + +/** An implementation of the refined Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */ +object RefinedSoundexAlgorithm extends StringAlgorithm { + override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { + val ca = stringFilter.filter(charArray) + + if (ca.length == 0) None + else { + val fc = ca.head.toLower + + if (fc < 97 || fc > 122) None + else { + Some( + transcode( + ca, + fc, // Pass first letter. + Array(fc) // Pass array with first letter. + ) + ) + } + } + } + + override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = + compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { + case Some(se) => Some(se.mkString) + case None => None + } + + @tailrec + private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { + require(o.length > 0) + + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => mc match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0' + case 'b' | 'p' => '1' + case 'f' | 'v' => '2' + case 'c' | 'k' | 's' => '3' + case 'g' | 'j' => '4' + case 'q' | 'x' | 'z' => '5' + case 'd' | 't' => '6' + case 'l' => '7' + case 'm' | 'n' => '8' + case 'r' => '9' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => mc match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0' + case 'b' | 'p' if pc != '1' => '1' + case 'f' | 'v' if pc != '2' => '2' + case 'c' | 'k' | 's' if pc != '3' => '3' + case 'g' | 'j' if pc != '4' => '4' + case 'q' | 'x' | 'z' if pc != '5' => '5' + case 'd' | 't' if pc != '6' => '6' + case 'l' if pc != '7' => '7' + case 'm' | 'n' if pc != '8' => '8' + case 'r' if pc != '9' => '9' + case _ => '\0' + } + val a = + // Code twice. + if (o.length == 1) + m2(c) + // Code once. + else + m1( + c, + o.last match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last + case _ => m2(o.last) + } + ) + + transcode(i.tail, c, if (a != '\0') o :+ a else o) + } + } +} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala index 0c233b0..7c5f012 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -10,8 +10,8 @@ object RefinedSoundexMetric extends StringMetric { if (ca1.length == 0 || ca2.length == 0) None else { - val rse1 = RefinedSoundex.compute(ca1) - val rse2 = RefinedSoundex.compute(ca2) + val rse1 = RefinedSoundexAlgorithm.compute(ca1) + val rse2 = RefinedSoundexAlgorithm.compute(ca2) if (!rse1.isDefined || !rse2.isDefined || (rse1.get.length == 0 && rse2.get.length == 0)) None else Some(rse1.get.sameElements(rse2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala deleted file mode 100755 index 8d1a783..0000000 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala +++ /dev/null @@ -1,79 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } -import scala.annotation.tailrec - -/** An implementation of the Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */ -object Soundex extends StringAlgorithm { - override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { - val ca = stringFilter.filter(charArray) - - if (ca.length == 0) None - else { - val fc = ca.head.toLower - - if (fc < 97 || fc > 122) None - else { - Some( - transcode( - ca.tail, - fc, // Pass first letter. - Array(fc) // Pass array with first letter. - ).padTo(4, '0') - ) - } - } - } - - override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = - compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { - case Some(se) => Some(se.mkString) - case None => None - } - - @tailrec - private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { - require(o.length > 0) - - if (i.length == 0) o - else { - val c = i.head.toLower - val m2 = (mc: Char) => mc match { - case 'b' | 'f' | 'p' | 'v' => '1' - case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' - case 'd' | 't' => '3' - case 'l' => '4' - case 'm' | 'n' => '5' - case 'r' => '6' - case _ => '\0' - } - val m1 = (mc: Char, pc: Char) => mc match { - case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' - case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' - case 'd' | 't' if pc != '3' => '3' - case 'l' if pc != '4' => '4' - case 'm' | 'n' if pc != '5' => '5' - case 'r' if pc != '6' => '6' - case _ => '\0' - } - val a = p match { - // Code twice. - case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) - // Code once. - case _ => { - m1( - c, - o.last match { - case '1' | '2' | '3' | '4' | '5' | '6' => o.last - case _ => m2(o.last) - } - ) - } - } - - if (o.length == 3 && a != '\0') o :+ a - else - transcode(i.tail, c, if (a != '\0') o :+ a else o) - } - } -} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala new file mode 100755 index 0000000..579e677 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithm.scala @@ -0,0 +1,79 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate } +import scala.annotation.tailrec + +/** An implementation of the Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */ +object SoundexAlgorithm extends StringAlgorithm { + override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = { + val ca = stringFilter.filter(charArray) + + if (ca.length == 0) None + else { + val fc = ca.head.toLower + + if (fc < 97 || fc > 122) None + else { + Some( + transcode( + ca.tail, + fc, // Pass first letter. + Array(fc) // Pass array with first letter. + ).padTo(4, '0') + ) + } + } + } + + override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] = + compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match { + case Some(se) => Some(se.mkString) + case None => None + } + + @tailrec + private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { + require(o.length > 0) + + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => mc match { + case 'b' | 'f' | 'p' | 'v' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' + case 'd' | 't' => '3' + case 'l' => '4' + case 'm' | 'n' => '5' + case 'r' => '6' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => mc match { + case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' + case 'd' | 't' if pc != '3' => '3' + case 'l' if pc != '4' => '4' + case 'm' | 'n' if pc != '5' => '5' + case 'r' if pc != '6' => '6' + case _ => '\0' + } + val a = p match { + // Code twice. + case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) + // Code once. + case _ => { + m1( + c, + o.last match { + case '1' | '2' | '3' | '4' | '5' | '6' => o.last + case _ => m2(o.last) + } + ) + } + } + + if (o.length == 3 && a != '\0') o :+ a + else + transcode(i.tail, c, if (a != '\0') o :+ a else o) + } + } +} \ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala index 439cf14..7cd9939 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala @@ -10,8 +10,8 @@ object SoundexMetric extends StringMetric { if (ca1.length == 0 || ca2.length == 0) None else { - val se1 = Soundex.compute(ca1) - val se2 = Soundex.compute(ca2) + val se1 = SoundexAlgorithm.compute(ca1) + val se2 = SoundexAlgorithm.compute(ca2) if (!se1.isDefined || !se2.isDefined || (se1.get.length == 0 && se2.get.length == 0)) None else Some(se1.get.sameElements(se2.get)) diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala new file mode 100755 index 0000000..1df8ff3 --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala @@ -0,0 +1,213 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetaphoneAlgorithmSpec extends ScalaTest { + "MetaphoneAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + MetaphoneAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + MetaphoneAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // z + MetaphoneAlgorithm.compute("z").get should equal ("s") + MetaphoneAlgorithm.compute("zz").get should equal ("s") + + // y + MetaphoneAlgorithm.compute("y").isDefined should be (false) + MetaphoneAlgorithm.compute("zy").get should equal ("s") + MetaphoneAlgorithm.compute("zyz").get should equal ("ss") + MetaphoneAlgorithm.compute("zya").get should equal ("sy") + + // x + MetaphoneAlgorithm.compute("x").get should equal ("s") + MetaphoneAlgorithm.compute("zx").get should equal ("sks") + MetaphoneAlgorithm.compute("zxz").get should equal ("skss") + + // w + MetaphoneAlgorithm.compute("w").isDefined should be (false) + MetaphoneAlgorithm.compute("zw").get should equal ("s") + MetaphoneAlgorithm.compute("zwz").get should equal ("ss") + MetaphoneAlgorithm.compute("zwa").get should equal ("sw") + + // v + MetaphoneAlgorithm.compute("v").get should equal ("f") + MetaphoneAlgorithm.compute("zv").get should equal ("sf") + MetaphoneAlgorithm.compute("zvz").get should equal ("sfs") + + // u + MetaphoneAlgorithm.compute("u").get should equal ("u") + MetaphoneAlgorithm.compute("zu").get should equal ("s") + + // t + MetaphoneAlgorithm.compute("t").get should equal ("t") + MetaphoneAlgorithm.compute("ztiaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("ztioz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zthz").get should equal ("s0s") + MetaphoneAlgorithm.compute("ztchz").get should equal ("sxs") + MetaphoneAlgorithm.compute("ztz").get should equal ("sts") + + // s + MetaphoneAlgorithm.compute("s").get should equal ("s") + MetaphoneAlgorithm.compute("zshz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zsioz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zsiaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zs").get should equal ("ss") + MetaphoneAlgorithm.compute("zsz").get should equal ("sss") + + // r + MetaphoneAlgorithm.compute("r").get should equal ("r") + MetaphoneAlgorithm.compute("zr").get should equal ("sr") + MetaphoneAlgorithm.compute("zrz").get should equal ("srs") + + // q + MetaphoneAlgorithm.compute("q").get should equal ("k") + MetaphoneAlgorithm.compute("zq").get should equal ("sk") + MetaphoneAlgorithm.compute("zqz").get should equal ("sks") + + // p + MetaphoneAlgorithm.compute("p").get should equal ("p") + MetaphoneAlgorithm.compute("zp").get should equal ("sp") + MetaphoneAlgorithm.compute("zph").get should equal ("sf") + MetaphoneAlgorithm.compute("zpz").get should equal ("sps") + + // o + MetaphoneAlgorithm.compute("o").get should equal ("o") + MetaphoneAlgorithm.compute("zo").get should equal ("s") + + // n + MetaphoneAlgorithm.compute("n").get should equal ("n") + MetaphoneAlgorithm.compute("zn").get should equal ("sn") + MetaphoneAlgorithm.compute("znz").get should equal ("sns") + + // m + MetaphoneAlgorithm.compute("m").get should equal ("m") + MetaphoneAlgorithm.compute("zm").get should equal ("sm") + MetaphoneAlgorithm.compute("zmz").get should equal ("sms") + + // l + MetaphoneAlgorithm.compute("l").get should equal ("l") + MetaphoneAlgorithm.compute("zl").get should equal ("sl") + MetaphoneAlgorithm.compute("zlz").get should equal ("sls") + + // k + MetaphoneAlgorithm.compute("k").get should equal ("k") + MetaphoneAlgorithm.compute("zk").get should equal ("sk") + MetaphoneAlgorithm.compute("zck").get should equal ("sk") + + // j + MetaphoneAlgorithm.compute("j").get should equal ("j") + MetaphoneAlgorithm.compute("zj").get should equal ("sj") + MetaphoneAlgorithm.compute("zjz").get should equal ("sjs") + + // i + MetaphoneAlgorithm.compute("i").get should equal ("i") + MetaphoneAlgorithm.compute("zi").get should equal ("s") + + // h + MetaphoneAlgorithm.compute("h").get should equal ("h") // php wrongly says nothing + MetaphoneAlgorithm.compute("zh").get should equal ("sh") // php wrongly says s + MetaphoneAlgorithm.compute("zah").get should equal ("s") + MetaphoneAlgorithm.compute("zchh").get should equal ("sx") + MetaphoneAlgorithm.compute("ha").get should equal ("h") + + // g + MetaphoneAlgorithm.compute("g").get should equal ("k") + MetaphoneAlgorithm.compute("zg").get should equal ("sk") + MetaphoneAlgorithm.compute("zgh").get should equal ("skh") // php wrongly says sf + MetaphoneAlgorithm.compute("zghz").get should equal ("shs") // php wrongly says sfs + MetaphoneAlgorithm.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh + MetaphoneAlgorithm.compute("zgn").get should equal ("sn") + MetaphoneAlgorithm.compute("zgns").get should equal ("skns") + MetaphoneAlgorithm.compute("zgned").get should equal ("snt") // others wrongly says sknt + MetaphoneAlgorithm.compute("zgneds").get should equal ("sknts") // php wrongly says snts + MetaphoneAlgorithm.compute("zgi").get should equal ("sj") + MetaphoneAlgorithm.compute("zgiz").get should equal ("sjs") + MetaphoneAlgorithm.compute("zge").get should equal ("sj") + MetaphoneAlgorithm.compute("zgez").get should equal ("sjs") + MetaphoneAlgorithm.compute("zgy").get should equal ("sj") + MetaphoneAlgorithm.compute("zgyz").get should equal ("sjs") + MetaphoneAlgorithm.compute("zgz").get should equal ("sks") + + // f + MetaphoneAlgorithm.compute("f").get should equal ("f") + MetaphoneAlgorithm.compute("zf").get should equal ("sf") + MetaphoneAlgorithm.compute("zfz").get should equal ("sfs") + + // e + MetaphoneAlgorithm.compute("e").get should equal ("e") + MetaphoneAlgorithm.compute("ze").get should equal ("s") + + // d + MetaphoneAlgorithm.compute("d").get should equal ("t") + MetaphoneAlgorithm.compute("fudge").get should equal ("fjj") // php wrongly says fj + MetaphoneAlgorithm.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy + MetaphoneAlgorithm.compute("dodgi").get should equal ("tjj") // php wrongly says tj + MetaphoneAlgorithm.compute("zd").get should equal ("st") + MetaphoneAlgorithm.compute("zdz").get should equal ("sts") + + // c + MetaphoneAlgorithm.compute("c").get should equal ("k") + MetaphoneAlgorithm.compute("zcia").get should equal ("sx") + MetaphoneAlgorithm.compute("zciaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zch").get should equal ("sx") + MetaphoneAlgorithm.compute("zchz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zci").get should equal ("ss") + MetaphoneAlgorithm.compute("zciz").get should equal ("sss") + MetaphoneAlgorithm.compute("zce").get should equal ("ss") + MetaphoneAlgorithm.compute("zcez").get should equal ("sss") + MetaphoneAlgorithm.compute("zcy").get should equal ("ss") + MetaphoneAlgorithm.compute("zcyz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsci").get should equal ("ss") + MetaphoneAlgorithm.compute("zsciz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsce").get should equal ("ss") + MetaphoneAlgorithm.compute("zscez").get should equal ("sss") + MetaphoneAlgorithm.compute("zscy").get should equal ("ss") + MetaphoneAlgorithm.compute("zscyz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsch").get should equal ("sskh") // php wrongly says ssx + MetaphoneAlgorithm.compute("zc").get should equal ("sk") + MetaphoneAlgorithm.compute("zcz").get should equal ("sks") + + // b + MetaphoneAlgorithm.compute("b").get should equal ("b") + MetaphoneAlgorithm.compute("zb").get should equal ("sb") + MetaphoneAlgorithm.compute("zbz").get should equal ("sbs") + MetaphoneAlgorithm.compute("zmb").get should equal ("sm") + + // a + MetaphoneAlgorithm.compute("a").get should equal ("a") + MetaphoneAlgorithm.compute("za").get should equal ("s") + + // Miscellaneous. + MetaphoneAlgorithm.compute("dumb").get should equal ("tm") + MetaphoneAlgorithm.compute("smith").get should equal ("sm0") + MetaphoneAlgorithm.compute("school").get should equal ("skhl") // php wrongly says sxl + MetaphoneAlgorithm.compute("merci").get should equal ("mrs") + MetaphoneAlgorithm.compute("cool").get should equal ("kl") + MetaphoneAlgorithm.compute("aebersold").get should equal ("ebrslt") + MetaphoneAlgorithm.compute("gnagy").get should equal ("nj") + MetaphoneAlgorithm.compute("knuth").get should equal ("n0") + MetaphoneAlgorithm.compute("pniewski").get should equal ("nsk") + MetaphoneAlgorithm.compute("wright").get should equal ("rht") // php wrongly says rft + MetaphoneAlgorithm.compute("phone").get should equal ("fn") + MetaphoneAlgorithm.compute("aggregate").get should equal ("akrkt") + MetaphoneAlgorithm.compute("accuracy").get should equal ("akkrs") + MetaphoneAlgorithm.compute("encyclopedia").get should equal ("ensklpt") + MetaphoneAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs") + MetaphoneAlgorithm.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm") + } + } + } + } +} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneSpec.scala deleted file mode 100755 index 2cfcc36..0000000 --- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/MetaphoneSpec.scala +++ /dev/null @@ -1,213 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class MetaphoneSpec extends ScalaTest { - "Metaphone" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - Metaphone.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - Metaphone.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // z - Metaphone.compute("z").get should equal ("s") - Metaphone.compute("zz").get should equal ("s") - - // y - Metaphone.compute("y").isDefined should be (false) - Metaphone.compute("zy").get should equal ("s") - Metaphone.compute("zyz").get should equal ("ss") - Metaphone.compute("zya").get should equal ("sy") - - // x - Metaphone.compute("x").get should equal ("s") - Metaphone.compute("zx").get should equal ("sks") - Metaphone.compute("zxz").get should equal ("skss") - - // w - Metaphone.compute("w").isDefined should be (false) - Metaphone.compute("zw").get should equal ("s") - Metaphone.compute("zwz").get should equal ("ss") - Metaphone.compute("zwa").get should equal ("sw") - - // v - Metaphone.compute("v").get should equal ("f") - Metaphone.compute("zv").get should equal ("sf") - Metaphone.compute("zvz").get should equal ("sfs") - - // u - Metaphone.compute("u").get should equal ("u") - Metaphone.compute("zu").get should equal ("s") - - // t - Metaphone.compute("t").get should equal ("t") - Metaphone.compute("ztiaz").get should equal ("sxs") - Metaphone.compute("ztioz").get should equal ("sxs") - Metaphone.compute("zthz").get should equal ("s0s") - Metaphone.compute("ztchz").get should equal ("sxs") - Metaphone.compute("ztz").get should equal ("sts") - - // s - Metaphone.compute("s").get should equal ("s") - Metaphone.compute("zshz").get should equal ("sxs") - Metaphone.compute("zsioz").get should equal ("sxs") - Metaphone.compute("zsiaz").get should equal ("sxs") - Metaphone.compute("zs").get should equal ("ss") - Metaphone.compute("zsz").get should equal ("sss") - - // r - Metaphone.compute("r").get should equal ("r") - Metaphone.compute("zr").get should equal ("sr") - Metaphone.compute("zrz").get should equal ("srs") - - // q - Metaphone.compute("q").get should equal ("k") - Metaphone.compute("zq").get should equal ("sk") - Metaphone.compute("zqz").get should equal ("sks") - - // p - Metaphone.compute("p").get should equal ("p") - Metaphone.compute("zp").get should equal ("sp") - Metaphone.compute("zph").get should equal ("sf") - Metaphone.compute("zpz").get should equal ("sps") - - // o - Metaphone.compute("o").get should equal ("o") - Metaphone.compute("zo").get should equal ("s") - - // n - Metaphone.compute("n").get should equal ("n") - Metaphone.compute("zn").get should equal ("sn") - Metaphone.compute("znz").get should equal ("sns") - - // m - Metaphone.compute("m").get should equal ("m") - Metaphone.compute("zm").get should equal ("sm") - Metaphone.compute("zmz").get should equal ("sms") - - // l - Metaphone.compute("l").get should equal ("l") - Metaphone.compute("zl").get should equal ("sl") - Metaphone.compute("zlz").get should equal ("sls") - - // k - Metaphone.compute("k").get should equal ("k") - Metaphone.compute("zk").get should equal ("sk") - Metaphone.compute("zck").get should equal ("sk") - - // j - Metaphone.compute("j").get should equal ("j") - Metaphone.compute("zj").get should equal ("sj") - Metaphone.compute("zjz").get should equal ("sjs") - - // i - Metaphone.compute("i").get should equal ("i") - Metaphone.compute("zi").get should equal ("s") - - // h - Metaphone.compute("h").get should equal ("h") // php wrongly says nothing - Metaphone.compute("zh").get should equal ("sh") // php wrongly says s - Metaphone.compute("zah").get should equal ("s") - Metaphone.compute("zchh").get should equal ("sx") - Metaphone.compute("ha").get should equal ("h") - - // g - Metaphone.compute("g").get should equal ("k") - Metaphone.compute("zg").get should equal ("sk") - Metaphone.compute("zgh").get should equal ("skh") // php wrongly says sf - Metaphone.compute("zghz").get should equal ("shs") // php wrongly says sfs - Metaphone.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh - Metaphone.compute("zgn").get should equal ("sn") - Metaphone.compute("zgns").get should equal ("skns") - Metaphone.compute("zgned").get should equal ("snt") // others wrongly says sknt - Metaphone.compute("zgneds").get should equal ("sknts") // php wrongly says snts - Metaphone.compute("zgi").get should equal ("sj") - Metaphone.compute("zgiz").get should equal ("sjs") - Metaphone.compute("zge").get should equal ("sj") - Metaphone.compute("zgez").get should equal ("sjs") - Metaphone.compute("zgy").get should equal ("sj") - Metaphone.compute("zgyz").get should equal ("sjs") - Metaphone.compute("zgz").get should equal ("sks") - - // f - Metaphone.compute("f").get should equal ("f") - Metaphone.compute("zf").get should equal ("sf") - Metaphone.compute("zfz").get should equal ("sfs") - - // e - Metaphone.compute("e").get should equal ("e") - Metaphone.compute("ze").get should equal ("s") - - // d - Metaphone.compute("d").get should equal ("t") - Metaphone.compute("fudge").get should equal ("fjj") // php wrongly says fj - Metaphone.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy - Metaphone.compute("dodgi").get should equal ("tjj") // php wrongly says tj - Metaphone.compute("zd").get should equal ("st") - Metaphone.compute("zdz").get should equal ("sts") - - // c - Metaphone.compute("c").get should equal ("k") - Metaphone.compute("zcia").get should equal ("sx") - Metaphone.compute("zciaz").get should equal ("sxs") - Metaphone.compute("zch").get should equal ("sx") - Metaphone.compute("zchz").get should equal ("sxs") - Metaphone.compute("zci").get should equal ("ss") - Metaphone.compute("zciz").get should equal ("sss") - Metaphone.compute("zce").get should equal ("ss") - Metaphone.compute("zcez").get should equal ("sss") - Metaphone.compute("zcy").get should equal ("ss") - Metaphone.compute("zcyz").get should equal ("sss") - Metaphone.compute("zsci").get should equal ("ss") - Metaphone.compute("zsciz").get should equal ("sss") - Metaphone.compute("zsce").get should equal ("ss") - Metaphone.compute("zscez").get should equal ("sss") - Metaphone.compute("zscy").get should equal ("ss") - Metaphone.compute("zscyz").get should equal ("sss") - Metaphone.compute("zsch").get should equal ("sskh") // php wrongly says ssx - Metaphone.compute("zc").get should equal ("sk") - Metaphone.compute("zcz").get should equal ("sks") - - // b - Metaphone.compute("b").get should equal ("b") - Metaphone.compute("zb").get should equal ("sb") - Metaphone.compute("zbz").get should equal ("sbs") - Metaphone.compute("zmb").get should equal ("sm") - - // a - Metaphone.compute("a").get should equal ("a") - Metaphone.compute("za").get should equal ("s") - - // Miscellaneous. - Metaphone.compute("dumb").get should equal ("tm") - Metaphone.compute("smith").get should equal ("sm0") - Metaphone.compute("school").get should equal ("skhl") // php wrongly says sxl - Metaphone.compute("merci").get should equal ("mrs") - Metaphone.compute("cool").get should equal ("kl") - Metaphone.compute("aebersold").get should equal ("ebrslt") - Metaphone.compute("gnagy").get should equal ("nj") - Metaphone.compute("knuth").get should equal ("n0") - Metaphone.compute("pniewski").get should equal ("nsk") - Metaphone.compute("wright").get should equal ("rht") // php wrongly says rft - Metaphone.compute("phone").get should equal ("fn") - Metaphone.compute("aggregate").get should equal ("akrkt") - Metaphone.compute("accuracy").get should equal ("akkrs") - Metaphone.compute("encyclopedia").get should equal ("ensklpt") - Metaphone.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs") - Metaphone.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm") - } - } - } - } -} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmSpec.scala new file mode 100755 index 0000000..d1a209f --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmSpec.scala @@ -0,0 +1,163 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NysiisAlgorithmSpec extends ScalaTest { + "NysiisAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + NysiisAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + NysiisAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + NysiisAlgorithm.compute("a").get should equal ("a") + NysiisAlgorithm.compute("aa").get should equal ("a") + + // b + NysiisAlgorithm.compute("b").get should equal ("b") + NysiisAlgorithm.compute("bb").get should equal ("bb") + + // c + NysiisAlgorithm.compute("c").get should equal ("c") + NysiisAlgorithm.compute("cc").get should equal ("cc") + + // d + NysiisAlgorithm.compute("d").get should equal ("d") + NysiisAlgorithm.compute("dd").get should equal ("dd") + + // e + NysiisAlgorithm.compute("e").get should equal ("e") + NysiisAlgorithm.compute("ee").get should equal ("y") + + // f + NysiisAlgorithm.compute("f").get should equal ("f") + NysiisAlgorithm.compute("ff").get should equal ("ff") + + // g + NysiisAlgorithm.compute("g").get should equal ("g") + NysiisAlgorithm.compute("gg").get should equal ("gg") + + // h + NysiisAlgorithm.compute("h").get should equal ("h") + NysiisAlgorithm.compute("hh").get should equal ("hh") + + // i + NysiisAlgorithm.compute("i").get should equal ("i") + NysiisAlgorithm.compute("ii").get should equal ("i") + + // j + NysiisAlgorithm.compute("j").get should equal ("j") + NysiisAlgorithm.compute("jj").get should equal ("jj") + + // k + NysiisAlgorithm.compute("k").get should equal ("c") + NysiisAlgorithm.compute("kk").get should equal ("cc") + + // l + NysiisAlgorithm.compute("l").get should equal ("l") + NysiisAlgorithm.compute("ll").get should equal ("ll") + + // m + NysiisAlgorithm.compute("m").get should equal ("m") + NysiisAlgorithm.compute("mm").get should equal ("mn") + + // n + NysiisAlgorithm.compute("n").get should equal ("n") + NysiisAlgorithm.compute("nn").get should equal ("nn") + + // o + NysiisAlgorithm.compute("o").get should equal ("o") + NysiisAlgorithm.compute("oo").get should equal ("o") + + // p + NysiisAlgorithm.compute("p").get should equal ("p") + NysiisAlgorithm.compute("pp").get should equal ("pp") + + // q + NysiisAlgorithm.compute("q").get should equal ("q") + NysiisAlgorithm.compute("qq").get should equal ("qg") + + // r + NysiisAlgorithm.compute("r").get should equal ("r") + NysiisAlgorithm.compute("rr").get should equal ("rr") + + // s + NysiisAlgorithm.compute("s").get should equal ("s") + NysiisAlgorithm.compute("ss").get should equal ("s") + + // t + NysiisAlgorithm.compute("t").get should equal ("t") + NysiisAlgorithm.compute("tt").get should equal ("tt") + + // u + NysiisAlgorithm.compute("u").get should equal ("u") + NysiisAlgorithm.compute("uu").get should equal ("u") + + // v + NysiisAlgorithm.compute("v").get should equal ("v") + NysiisAlgorithm.compute("vv").get should equal ("vv") + + // w + NysiisAlgorithm.compute("w").get should equal ("w") + NysiisAlgorithm.compute("ww").get should equal ("ww") + + // x + NysiisAlgorithm.compute("x").get should equal ("x") + NysiisAlgorithm.compute("xx").get should equal ("xx") + + // y + NysiisAlgorithm.compute("y").get should equal ("y") + NysiisAlgorithm.compute("yy").get should equal ("yy") + + // z + NysiisAlgorithm.compute("z").get should equal ("z") + NysiisAlgorithm.compute("zz").get should equal ("z") + + // Head cases. + NysiisAlgorithm.compute("mac").get should equal ("mc") + NysiisAlgorithm.compute("kn").get should equal ("nn") + NysiisAlgorithm.compute("k").get should equal ("c") + NysiisAlgorithm.compute("ph").get should equal ("ff") + NysiisAlgorithm.compute("pf").get should equal ("ff") + NysiisAlgorithm.compute("sch").get should equal ("s") // dropby wrongly says ss + + // Last cases. + NysiisAlgorithm.compute("ee").get should equal ("y") + NysiisAlgorithm.compute("ie").get should equal ("y") + NysiisAlgorithm.compute("dt").get should equal ("d") + NysiisAlgorithm.compute("rt").get should equal ("d") + NysiisAlgorithm.compute("rd").get should equal ("d") + NysiisAlgorithm.compute("nt").get should equal ("d") + NysiisAlgorithm.compute("nd").get should equal ("d") + + // Core cases. + NysiisAlgorithm.compute("eev").get should equal ("yv") //dropby wrongly says eaf + NysiisAlgorithm.compute("zev").get should equal ("zaf") + NysiisAlgorithm.compute("kkn").get should equal ("cn") + NysiisAlgorithm.compute("sschn").get should equal ("ssn") + NysiisAlgorithm.compute("pph").get should equal ("pf") + + // Miscellaneous. + NysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") + NysiisAlgorithm.compute("phone").get should equal ("ffan") + NysiisAlgorithm.compute("aggregate").get should equal ("agragat") + NysiisAlgorithm.compute("accuracy").get should equal ("acaracy") + NysiisAlgorithm.compute("encyclopedia").get should equal ("encyclapad") + NysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") + NysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") + } + } + } + } +} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisSpec.scala deleted file mode 100755 index 7186238..0000000 --- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/NysiisSpec.scala +++ /dev/null @@ -1,163 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class NysiisSpec extends ScalaTest { - "Nysiis" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - Nysiis.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - Nysiis.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // a - Nysiis.compute("a").get should equal ("a") - Nysiis.compute("aa").get should equal ("a") - - // b - Nysiis.compute("b").get should equal ("b") - Nysiis.compute("bb").get should equal ("bb") - - // c - Nysiis.compute("c").get should equal ("c") - Nysiis.compute("cc").get should equal ("cc") - - // d - Nysiis.compute("d").get should equal ("d") - Nysiis.compute("dd").get should equal ("dd") - - // e - Nysiis.compute("e").get should equal ("e") - Nysiis.compute("ee").get should equal ("y") - - // f - Nysiis.compute("f").get should equal ("f") - Nysiis.compute("ff").get should equal ("ff") - - // g - Nysiis.compute("g").get should equal ("g") - Nysiis.compute("gg").get should equal ("gg") - - // h - Nysiis.compute("h").get should equal ("h") - Nysiis.compute("hh").get should equal ("hh") - - // i - Nysiis.compute("i").get should equal ("i") - Nysiis.compute("ii").get should equal ("i") - - // j - Nysiis.compute("j").get should equal ("j") - Nysiis.compute("jj").get should equal ("jj") - - // k - Nysiis.compute("k").get should equal ("c") - Nysiis.compute("kk").get should equal ("cc") - - // l - Nysiis.compute("l").get should equal ("l") - Nysiis.compute("ll").get should equal ("ll") - - // m - Nysiis.compute("m").get should equal ("m") - Nysiis.compute("mm").get should equal ("mn") - - // n - Nysiis.compute("n").get should equal ("n") - Nysiis.compute("nn").get should equal ("nn") - - // o - Nysiis.compute("o").get should equal ("o") - Nysiis.compute("oo").get should equal ("o") - - // p - Nysiis.compute("p").get should equal ("p") - Nysiis.compute("pp").get should equal ("pp") - - // q - Nysiis.compute("q").get should equal ("q") - Nysiis.compute("qq").get should equal ("qg") - - // r - Nysiis.compute("r").get should equal ("r") - Nysiis.compute("rr").get should equal ("rr") - - // s - Nysiis.compute("s").get should equal ("s") - Nysiis.compute("ss").get should equal ("s") - - // t - Nysiis.compute("t").get should equal ("t") - Nysiis.compute("tt").get should equal ("tt") - - // u - Nysiis.compute("u").get should equal ("u") - Nysiis.compute("uu").get should equal ("u") - - // v - Nysiis.compute("v").get should equal ("v") - Nysiis.compute("vv").get should equal ("vv") - - // w - Nysiis.compute("w").get should equal ("w") - Nysiis.compute("ww").get should equal ("ww") - - // x - Nysiis.compute("x").get should equal ("x") - Nysiis.compute("xx").get should equal ("xx") - - // y - Nysiis.compute("y").get should equal ("y") - Nysiis.compute("yy").get should equal ("yy") - - // z - Nysiis.compute("z").get should equal ("z") - Nysiis.compute("zz").get should equal ("z") - - // Head cases. - Nysiis.compute("mac").get should equal ("mc") - Nysiis.compute("kn").get should equal ("nn") - Nysiis.compute("k").get should equal ("c") - Nysiis.compute("ph").get should equal ("ff") - Nysiis.compute("pf").get should equal ("ff") - Nysiis.compute("sch").get should equal ("s") // dropby wrongly says ss - - // Last cases. - Nysiis.compute("ee").get should equal ("y") - Nysiis.compute("ie").get should equal ("y") - Nysiis.compute("dt").get should equal ("d") - Nysiis.compute("rt").get should equal ("d") - Nysiis.compute("rd").get should equal ("d") - Nysiis.compute("nt").get should equal ("d") - Nysiis.compute("nd").get should equal ("d") - - // Core cases. - Nysiis.compute("eev").get should equal ("yv") //dropby wrongly says eaf - Nysiis.compute("zev").get should equal ("zaf") - Nysiis.compute("kkn").get should equal ("cn") - Nysiis.compute("sschn").get should equal ("ssn") - Nysiis.compute("pph").get should equal ("pf") - - // Miscellaneous. - Nysiis.compute("macdonald").get should equal ("mcdanald") - Nysiis.compute("phone").get should equal ("ffan") - Nysiis.compute("aggregate").get should equal ("agragat") - Nysiis.compute("accuracy").get should equal ("acaracy") - Nysiis.compute("encyclopedia").get should equal ("encyclapad") - Nysiis.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") - Nysiis.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") - } - } - } - } -} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala new file mode 100755 index 0000000..7d7d76b --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala @@ -0,0 +1,54 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedSoundexAlgorithmSpec extends ScalaTest { + "RefinedSoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + RefinedSoundexAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + RefinedSoundexAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + RefinedSoundexAlgorithm.compute("x123").get should equal ("x5") + + RefinedSoundexAlgorithm.compute("braz").get should equal ("b1905") + RefinedSoundexAlgorithm.compute("broz").get should equal ("b1905") + RefinedSoundexAlgorithm.compute("caren").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("carren").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("coram").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("corran").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("curreen").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("curwen").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("hairs").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hark").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hars").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hayers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("heers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hiers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("lambard").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambart").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambird").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lampaert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lampart").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lamport").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("limbert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lombard").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("nolton").get should equal ("n807608") + RefinedSoundexAlgorithm.compute("noulton").get should equal ("n807608") + } + } + } + } +} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala deleted file mode 100755 index 1531eb0..0000000 --- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala +++ /dev/null @@ -1,54 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RefinedSoundexSpec extends ScalaTest { - "RefinedSoundex" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - RefinedSoundex.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - RefinedSoundex.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - RefinedSoundex.compute("x123").get should equal ("x5") - - RefinedSoundex.compute("braz").get should equal ("b1905") - RefinedSoundex.compute("broz").get should equal ("b1905") - RefinedSoundex.compute("caren").get should equal ("c30908") - RefinedSoundex.compute("carren").get should equal ("c30908") - RefinedSoundex.compute("coram").get should equal ("c30908") - RefinedSoundex.compute("corran").get should equal ("c30908") - RefinedSoundex.compute("curreen").get should equal ("c30908") - RefinedSoundex.compute("curwen").get should equal ("c30908") - RefinedSoundex.compute("hairs").get should equal ("h093") - RefinedSoundex.compute("hark").get should equal ("h093") - RefinedSoundex.compute("hars").get should equal ("h093") - RefinedSoundex.compute("hayers").get should equal ("h093") - RefinedSoundex.compute("heers").get should equal ("h093") - RefinedSoundex.compute("hiers").get should equal ("h093") - RefinedSoundex.compute("lambard").get should equal ("l7081096") - RefinedSoundex.compute("lambart").get should equal ("l7081096") - RefinedSoundex.compute("lambert").get should equal ("l7081096") - RefinedSoundex.compute("lambird").get should equal ("l7081096") - RefinedSoundex.compute("lampaert").get should equal ("l7081096") - RefinedSoundex.compute("lampart").get should equal ("l7081096") - RefinedSoundex.compute("lamport").get should equal ("l7081096") - RefinedSoundex.compute("limbert").get should equal ("l7081096") - RefinedSoundex.compute("lombard").get should equal ("l7081096") - RefinedSoundex.compute("nolton").get should equal ("n807608") - RefinedSoundex.compute("noulton").get should equal ("n807608") - } - } - } - } -} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmSpec.scala new file mode 100755 index 0000000..8a8c21b --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmSpec.scala @@ -0,0 +1,54 @@ +package org.hashtree.stringmetric.phonetic + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexAlgorithmSpec extends ScalaTest { + "SoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + SoundexAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + SoundexAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + SoundexAlgorithm.compute("x123").get should equal ("x000") + + SoundexAlgorithm.compute("abc").get should equal ("a120") + SoundexAlgorithm.compute("xyz").get should equal ("x200") + + SoundexAlgorithm.compute("robert").get should equal ("r163") + SoundexAlgorithm.compute("rupert").get should equal ("r163") + SoundexAlgorithm.compute("rubin").get should equal ("r150") + SoundexAlgorithm.compute("ashcraft").get should equal ("a261") + SoundexAlgorithm.compute("tymczak").get should equal ("t522") + SoundexAlgorithm.compute("pfister").get should equal ("p236") + SoundexAlgorithm.compute("euler").get should equal ("e460") + SoundexAlgorithm.compute("gauss").get should equal ("g200") + SoundexAlgorithm.compute("hilbert").get should equal ("h416") + SoundexAlgorithm.compute("knuth").get should equal ("k530") + SoundexAlgorithm.compute("lloyd").get should equal ("l300") + SoundexAlgorithm.compute("lukasiewicz").get should equal ("l222") + SoundexAlgorithm.compute("ashcroft").get should equal ("a261") + SoundexAlgorithm.compute("tymczak").get should equal ("t522") + SoundexAlgorithm.compute("pfister").get should equal ("p236") + SoundexAlgorithm.compute("ellery").get should equal ("e460") + SoundexAlgorithm.compute("ghosh").get should equal ("g200") + SoundexAlgorithm.compute("heilbronn").get should equal ("h416") + SoundexAlgorithm.compute("kant").get should equal ("k530") + SoundexAlgorithm.compute("ladd").get should equal ("l300") + SoundexAlgorithm.compute("lissajous").get should equal ("l222") + SoundexAlgorithm.compute("fusedale").get should equal ("f234") + } + } + } + } +} \ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala deleted file mode 100755 index 9f18bc9..0000000 --- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala +++ /dev/null @@ -1,54 +0,0 @@ -package org.hashtree.stringmetric.phonetic - -import org.hashtree.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class SoundexSpec extends ScalaTest { - "Soundex" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - Soundex.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - Soundex.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - Soundex.compute("x123").get should equal ("x000") - - Soundex.compute("abc").get should equal ("a120") - Soundex.compute("xyz").get should equal ("x200") - - Soundex.compute("robert").get should equal ("r163") - Soundex.compute("rupert").get should equal ("r163") - Soundex.compute("rubin").get should equal ("r150") - Soundex.compute("ashcraft").get should equal ("a261") - Soundex.compute("tymczak").get should equal ("t522") - Soundex.compute("pfister").get should equal ("p236") - Soundex.compute("euler").get should equal ("e460") - Soundex.compute("gauss").get should equal ("g200") - Soundex.compute("hilbert").get should equal ("h416") - Soundex.compute("knuth").get should equal ("k530") - Soundex.compute("lloyd").get should equal ("l300") - Soundex.compute("lukasiewicz").get should equal ("l222") - Soundex.compute("ashcroft").get should equal ("a261") - Soundex.compute("tymczak").get should equal ("t522") - Soundex.compute("pfister").get should equal ("p236") - Soundex.compute("ellery").get should equal ("e460") - Soundex.compute("ghosh").get should equal ("g200") - Soundex.compute("heilbronn").get should equal ("h416") - Soundex.compute("kant").get should equal ("k530") - Soundex.compute("ladd").get should equal ("l300") - Soundex.compute("lissajous").get should equal ("l222") - Soundex.compute("fusedale").get should equal ("f234") - } - } - } - } -} \ No newline at end of file -- cgit v1.2.3