summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-11-18 19:31:32 -0700
committerRocky Madden <git@rockymadden.com>2012-11-18 19:31:32 -0700
commit72dfa5ebe75b707f192cbe65c23393d08dbd8f8a (patch)
treed14db05d2a671f9e6cf9c0cb4c139003f36a8e50
parenta40947a8554332e97d05db8850ab1b6cd0540d9a (diff)
downloadstringmetric-72dfa5ebe75b707f192cbe65c23393d08dbd8f8a.tar.gz
stringmetric-72dfa5ebe75b707f192cbe65c23393d08dbd8f8a.tar.bz2
stringmetric-72dfa5ebe75b707f192cbe65c23393d08dbd8f8a.zip
Created refined NYSIIS algorithm, metric, commands, and specs.
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithm.scala50
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetric.scala54
-rwxr-xr-xcli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithmSpec.scala39
-rwxr-xr-xcli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetricSpec.scala46
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala135
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala33
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala208
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetricSpec.scala37
-rwxr-xr-xreadme.md2
9 files changed, 603 insertions, 1 deletions
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithm.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithm.scala
new file mode 100755
index 0000000..9953116
--- /dev/null
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithm.scala
@@ -0,0 +1,50 @@
+package org.hashtree.stringmetric.cli.phonetic
+
+import org.hashtree.stringmetric.cli._
+import org.hashtree.stringmetric.phonetic.RefinedNysiisAlgorithm
+
+/**
+ * The refinedNysiisAlgorithm [[org.hashtree.stringmetric.cli.Command]]. Returns the phonetic representation of the
+ * passed string, per the refined NYSIIS algorithm.
+ */
+object refinedNysiisAlgorithm extends Command {
+ override def main(args: Array[String]): Unit = {
+ val options = OptionMapUtility.toOptionMap(args)
+
+ try {
+ // Help.
+ if (options.contains('h) || options.contains('help)) {
+ help()
+ exit(options)
+ // Execute.
+ } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 0) {
+ execute(options)
+ exit(options)
+ // Invalid syntax.
+ } else throw new IllegalArgumentException("Expected valid syntax. See --help.")
+ } catch {
+ case e => error(e, options)
+ }
+ }
+
+ override def help(): Unit = {
+ val ls = sys.props("line.separator")
+ val tab = " "
+
+ println(
+ "Returns the phonetic representation of the passed string, per the refined NYSIIS algorithm." + ls + ls +
+ "Syntax:" + ls +
+ tab + "refinedNysiisAlgorithm [Options] string..." + ls + ls +
+ "Options:" + ls +
+ tab + "-h, --help" + ls +
+ tab + tab + "Outputs description, syntax, and options."
+ )
+ }
+
+ override def execute(options: OptionMap): Unit =
+ println(
+ RefinedNysiisAlgorithm.compute(
+ options('dashless)
+ ).getOrElse("not computable").toString
+ )
+} \ No newline at end of file
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetric.scala
new file mode 100755
index 0000000..b09075a
--- /dev/null
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetric.scala
@@ -0,0 +1,54 @@
+package org.hashtree.stringmetric.cli.phonetic
+
+import org.hashtree.stringmetric.cli._
+import org.hashtree.stringmetric.phonetic.RefinedNysiisMetric
+
+/**
+ * The refinedNysiisMetric [[org.hashtree.stringmetric.cli.Command]]. Compares two strings to determine if they are
+ * phonetically similarly, per the refined NYSIIS algorithm.
+ */
+object refinedNysiisMetric extends Command {
+ override def main(args: Array[String]): Unit = {
+ val options = OptionMapUtility.toOptionMap(args)
+
+ try {
+ // Help.
+ if (options.contains('h) || options.contains('help)) {
+ help()
+ exit(options)
+ // Execute.
+ } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) {
+ execute(options)
+ exit(options)
+ // Invalid syntax.
+ } else throw new IllegalArgumentException("Expected valid syntax. See --help.")
+ } catch {
+ case e => error(e, options)
+ }
+ }
+
+ override def help(): Unit = {
+ val ls = sys.props("line.separator")
+ val tab = " "
+
+ println(
+ "Compares two strings to determine if they are phonetically similarly, per the refined NYSIIS algorithm." + ls + ls +
+ "Syntax:" + ls +
+ tab + "refinedNysiisMetric [Options] string1 string2..." + ls + ls +
+ "Options:" + ls +
+ tab + "-h, --help" + ls +
+ tab + tab + "Outputs description, syntax, and options."
+ )
+ }
+
+ override def execute(options: OptionMap): Unit = {
+ val strings = options('dashless).split(" ")
+
+ println(
+ RefinedNysiisMetric.compare(
+ strings(0),
+ strings(1)
+ ).getOrElse("not comparable").toString
+ )
+ }
+} \ No newline at end of file
diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithmSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithmSpec.scala
new file mode 100755
index 0000000..2567221
--- /dev/null
+++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisAlgorithmSpec.scala
@@ -0,0 +1,39 @@
+package org.hashtree.stringmetric.cli.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class refinedNysiisAlgorithmSpec extends ScalaTest {
+ "refinedNysiisAlgorithm" should provide {
+ "main method" when passed {
+ "valid dashless argument" should executes {
+ "print phonetic representation" in {
+ val out = new java.io.ByteArrayOutputStream()
+
+ Console.withOut(out)(
+ refinedNysiisAlgorithm.main(Array("--unitTest", "--debug", "abc"))
+ )
+
+ out.toString should equal ("abc\n")
+ out.reset()
+
+ Console.withOut(out)(
+ refinedNysiisAlgorithm.main(Array("--unitTest", "--debug", "1"))
+ )
+
+ out.toString should equal ("not computable\n")
+ out.reset()
+ }
+ }
+ "no dashless argument" should throws {
+ "IllegalArgumentException" in {
+ evaluating {
+ refinedNysiisAlgorithm.main(Array("--unitTest", "--debug"))
+ } should produce [IllegalArgumentException]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetricSpec.scala
new file mode 100755
index 0000000..eddee0a
--- /dev/null
+++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/phonetic/refinedNysiisMetricSpec.scala
@@ -0,0 +1,46 @@
+package org.hashtree.stringmetric.cli.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class refinedNysiisMetricSpec extends ScalaTest {
+ "refinedNysiisMetric" should provide {
+ "main method" when passed {
+ "valid dashless arguments" should executes {
+ "print if they are a match" in {
+ val out = new java.io.ByteArrayOutputStream()
+
+ Console.withOut(out)(
+ refinedNysiisMetric.main(Array("--unitTest", "--debug", "abc", "abc"))
+ )
+
+ out.toString should equal ("true\n")
+ out.reset()
+
+ Console.withOut(out)(
+ refinedNysiisMetric.main(Array("--unitTest", "--debug", "abc", "xyz"))
+ )
+
+ out.toString should equal ("false\n")
+ out.reset()
+
+ Console.withOut(out)(
+ refinedNysiisMetric.main(Array("--unitTest", "--debug", "1", "1"))
+ )
+
+ out.toString should equal ("not comparable\n")
+ out.reset()
+ }
+ }
+ "no dashless arguments" should throws {
+ "IllegalArgumentException" in {
+ evaluating {
+ refinedNysiisMetric.main(Array("--unitTest", "--debug"))
+ } should produce [IllegalArgumentException]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
new file mode 100755
index 0000000..61393ec
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -0,0 +1,135 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.{ FilterableStringAlgorithm, StringAlgorithm, StringFilter }
+import org.hashtree.stringmetric.filter.StringFilterDelegate
+import scala.annotation.tailrec
+
+/** An implementation of the refined NYSIIS [[org.hashtree.stringmetric.StringAlgorithm]]. */
+object RefinedNysiisAlgorithm extends StringAlgorithm with FilterableStringAlgorithm {
+ type ComputeReturn = String
+
+ override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
+ val ca = stringFilter.filter(charArray)
+
+ if (ca.length == 0) None
+ else {
+ val cal = ca.map(_.toLower)
+
+ if (cal.head < 97 || cal.head > 122) None
+ else {
+ val thl = transcodeLast(transcodeHead(cal.head +: cleanLast(cal.tail, Set('s', 'z'))))
+ val t = transcode(Array.empty[Char], thl.head, thl.tail, Array.empty[Char])
+
+ if (t.length == 1) Some(t)
+ else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a')))))
+ }
+ }
+ }
+
+ override def compute(string: String)(implicit stringFilter: StringFilter): Option[ComputeReturn] =
+ compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match {
+ case Some(ny) => Some(ny.mkString)
+ case None => None
+ }
+
+ private[this] def cleanLast(ca: Array[Char], s: Set[Char]) =
+ if (ca.length == 0) ca
+ else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length)
+ else ca
+
+ private[this] def cleanTerminal(ca: Array[Char]) =
+ if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
+ else ca
+
+ private[this] def deduplicate(ca: Array[Char]) =
+ if (ca.length <= 1) ca
+ else
+ ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+
+ @tailrec
+ private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ if (c == '\0' && r.length == 0) o
+ else {
+ val shift = (d: Int, ca: Array[Char]) => {
+ val sa = r.splitAt(d - 1)
+
+ (
+ if (sa._1.length > 0) (l :+ c) ++ sa._1 else l :+ c,
+ if (sa._2.length > 0) sa._2.head else '\0',
+ if (sa._2.length > 1) sa._2.tail else Array.empty[Char],
+ ca
+ )
+ }
+
+ val t = {
+ c match {
+ case 'a' | 'i' | 'o' | 'u' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else shift(1, o :+ 'a')
+ case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c)
+ case 'd' =>
+ if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c)
+ case 'e' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
+ else shift(1, o :+ 'a')
+ case 'g' =>
+ if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't'))
+ else shift(1, o :+ c)
+ case 'h' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else if (!Alphabet.isVowel(l.last) || (r.length >= 1 && !Alphabet.isVowel(r.head))) shift(1, o)
+ else shift(1, o :+ c)
+ case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
+ case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n')
+ case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
+ case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g')
+ case 's' =>
+ if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
+ else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c)
+ else shift(1, o :+ c)
+ case 'w' =>
+ if (l.length >= 1 && Alphabet.isVowel(l.last)) shift(1, o)
+ else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r')
+ else shift(1, o :+ c)
+ case 'y' =>
+ if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a')
+ else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c)
+ else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a')
+ else shift(1, o :+ c)
+ case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's')
+ case _ => shift(1, o)
+ }
+ }
+
+ transcode(t._1, t._2, t._3, t._4)
+ }
+ }
+
+ private[this] def transcodeHead(ca: Array[Char]) = {
+ if (ca.length == 0) ca
+ else
+ ca.head match {
+ case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => Array('m', 'c') ++ ca.takeRight(ca.length - 3)
+ case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2)
+ case _ => ca
+ }
+ }
+
+ private[this] def transcodeLast(ca: Array[Char]) = {
+ if (ca.length >= 2) {
+ val l = ca(ca.length - 1)
+ val lm1 = ca(ca.length - 2)
+ lazy val take2 = ca.take(ca.length - 2)
+
+ l match {
+ case 'd' if (lm1 == 'n' || lm1 == 'r') => take2 :+ 'd'
+ case 'e' if (lm1 == 'e' || lm1 == 'i' || lm1 =='y') => take2 :+ 'y'
+ case 't' if (lm1 == 'd' || lm1 == 'n' || lm1 == 'r') => take2 :+ 'd'
+ case 'x' if (lm1 == 'e') => take2 ++ Array('e', 'c')
+ case 'x' if (lm1 == 'i') => take2 ++ Array('i', 'c')
+ case _ => ca
+ }
+ } else ca
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
new file mode 100755
index 0000000..732386d
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -0,0 +1,33 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.{ FilterableStringMetric, StringFilter, StringMetric }
+import org.hashtree.stringmetric.filter.StringFilterDelegate
+
+/** An implementation of the refined NYSIIS [[org.hashtree.stringmetric.StringMetric]]. */
+object RefinedNysiisMetric extends StringMetric with FilterableStringMetric {
+ type CompareReturn = Boolean
+
+ override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit stringFilter: StringFilter): Option[CompareReturn] = {
+
+ val ca1 = stringFilter.filter(charArray1)
+ lazy val ca2 = stringFilter.filter(charArray2)
+
+ if (ca1.length == 0 || ca2.length == 0) None
+ else {
+ val rny1 = RefinedNysiisAlgorithm.compute(ca1)
+ lazy val rny2 = RefinedNysiisAlgorithm.compute(ca2)
+
+ if (!rny1.isDefined || rny1.get.length == 0 || !rny2.isDefined || rny2.get.length == 0) None
+ else Some(rny1.get.sameElements(rny2.get))
+ }
+ }
+
+ override def compare(string1: String, string2: String)
+ (implicit stringFilter: StringFilter): Option[CompareReturn] =
+
+ compare(
+ stringFilter.filter(string1.toCharArray),
+ stringFilter.filter(string2.toCharArray)
+ )(new StringFilterDelegate)
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala
new file mode 100755
index 0000000..8b47456
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala
@@ -0,0 +1,208 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedNysiisAlgorithmSpec extends ScalaTest {
+ "RefinedNysiisAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ RefinedNysiisAlgorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ RefinedNysiisAlgorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // a
+ RefinedNysiisAlgorithm.compute("a").get should equal ("a")
+ RefinedNysiisAlgorithm.compute("aa").get should equal ("a")
+
+ // b
+ RefinedNysiisAlgorithm.compute("b").get should equal ("b")
+ RefinedNysiisAlgorithm.compute("bb").get should equal ("b")
+
+ // c
+ RefinedNysiisAlgorithm.compute("c").get should equal ("c")
+ RefinedNysiisAlgorithm.compute("cc").get should equal ("c")
+
+ // d
+ RefinedNysiisAlgorithm.compute("d").get should equal ("d")
+ RefinedNysiisAlgorithm.compute("dd").get should equal ("d")
+
+ // e
+ RefinedNysiisAlgorithm.compute("e").get should equal ("e")
+ RefinedNysiisAlgorithm.compute("ee").get should equal ("y")
+
+ // f
+ RefinedNysiisAlgorithm.compute("f").get should equal ("f")
+ RefinedNysiisAlgorithm.compute("ff").get should equal ("f")
+
+ // g
+ RefinedNysiisAlgorithm.compute("g").get should equal ("g")
+ RefinedNysiisAlgorithm.compute("gg").get should equal ("g")
+
+ // h
+ RefinedNysiisAlgorithm.compute("h").get should equal ("h")
+ RefinedNysiisAlgorithm.compute("hh").get should equal ("h")
+
+ // i
+ RefinedNysiisAlgorithm.compute("i").get should equal ("i")
+ RefinedNysiisAlgorithm.compute("ii").get should equal ("i")
+
+ // j
+ RefinedNysiisAlgorithm.compute("j").get should equal ("j")
+ RefinedNysiisAlgorithm.compute("jj").get should equal ("j")
+
+ // k
+ RefinedNysiisAlgorithm.compute("k").get should equal ("c")
+ RefinedNysiisAlgorithm.compute("kk").get should equal ("c")
+
+ // l
+ RefinedNysiisAlgorithm.compute("l").get should equal ("l")
+ RefinedNysiisAlgorithm.compute("ll").get should equal ("l")
+
+ // m
+ RefinedNysiisAlgorithm.compute("m").get should equal ("m")
+ RefinedNysiisAlgorithm.compute("mm").get should equal ("mn")
+
+ // n
+ RefinedNysiisAlgorithm.compute("n").get should equal ("n")
+ RefinedNysiisAlgorithm.compute("nn").get should equal ("n")
+
+ // o
+ RefinedNysiisAlgorithm.compute("o").get should equal ("o")
+ RefinedNysiisAlgorithm.compute("oo").get should equal ("o")
+
+ // p
+ RefinedNysiisAlgorithm.compute("p").get should equal ("p")
+ RefinedNysiisAlgorithm.compute("pp").get should equal ("p")
+
+ // q
+ RefinedNysiisAlgorithm.compute("q").get should equal ("q")
+ RefinedNysiisAlgorithm.compute("qq").get should equal ("qg")
+
+ // r
+ RefinedNysiisAlgorithm.compute("r").get should equal ("r")
+ RefinedNysiisAlgorithm.compute("rr").get should equal ("r")
+
+ // s
+ RefinedNysiisAlgorithm.compute("s").get should equal ("s")
+ RefinedNysiisAlgorithm.compute("ss").get should equal ("s")
+
+ // t
+ RefinedNysiisAlgorithm.compute("t").get should equal ("t")
+ RefinedNysiisAlgorithm.compute("tt").get should equal ("t")
+
+ // u
+ RefinedNysiisAlgorithm.compute("u").get should equal ("u")
+ RefinedNysiisAlgorithm.compute("uu").get should equal ("u")
+
+ // v
+ RefinedNysiisAlgorithm.compute("v").get should equal ("v")
+ RefinedNysiisAlgorithm.compute("vv").get should equal ("v")
+
+ // w
+ RefinedNysiisAlgorithm.compute("w").get should equal ("w")
+ RefinedNysiisAlgorithm.compute("ww").get should equal ("w")
+
+ // x
+ RefinedNysiisAlgorithm.compute("x").get should equal ("x")
+ RefinedNysiisAlgorithm.compute("xx").get should equal ("x")
+
+ // y
+ RefinedNysiisAlgorithm.compute("y").get should equal ("y")
+ RefinedNysiisAlgorithm.compute("yy").get should equal ("y")
+ RefinedNysiisAlgorithm.compute("ybyb").get should equal ("ybab")
+
+ // z
+ RefinedNysiisAlgorithm.compute("z").get should equal ("z")
+ RefinedNysiisAlgorithm.compute("zz").get should equal ("z")
+
+ // Head cases.
+ RefinedNysiisAlgorithm.compute("mac").get should equal ("mc")
+ RefinedNysiisAlgorithm.compute("pf").get should equal ("f")
+
+ // Last cases.
+ RefinedNysiisAlgorithm.compute("ix").get should equal ("ic")
+ RefinedNysiisAlgorithm.compute("ex").get should equal ("ec")
+ RefinedNysiisAlgorithm.compute("ye").get should equal ("y")
+ RefinedNysiisAlgorithm.compute("ee").get should equal ("y")
+ RefinedNysiisAlgorithm.compute("ie").get should equal ("y")
+ RefinedNysiisAlgorithm.compute("dt").get should equal ("d")
+ RefinedNysiisAlgorithm.compute("rt").get should equal ("d")
+ RefinedNysiisAlgorithm.compute("rd").get should equal ("d")
+ RefinedNysiisAlgorithm.compute("nt").get should equal ("d")
+ RefinedNysiisAlgorithm.compute("nd").get should equal ("d")
+
+ // Core cases.
+ RefinedNysiisAlgorithm.compute("bevb").get should equal ("bafb")
+ RefinedNysiisAlgorithm.compute("bghtb").get should equal ("bgtb")
+ RefinedNysiisAlgorithm.compute("bdgb").get should equal ("bgb")
+ RefinedNysiisAlgorithm.compute("bphb").get should equal ("bfb")
+ RefinedNysiisAlgorithm.compute("bknb").get should equal ("bnb")
+ RefinedNysiisAlgorithm.compute("bshb").get should equal ("bsb")
+ RefinedNysiisAlgorithm.compute("bschb").get should equal ("bsb")
+ RefinedNysiisAlgorithm.compute("bywb").get should equal ("bab")
+ RefinedNysiisAlgorithm.compute("byw").get should equal ("by")
+ RefinedNysiisAlgorithm.compute("ywb").get should equal ("yb")
+ RefinedNysiisAlgorithm.compute("bwrb").get should equal ("brb")
+
+ // Transcode cases.
+ RefinedNysiisAlgorithm.compute("bay").get should equal ("by")
+
+ // Miscellaneous.
+ RefinedNysiisAlgorithm.compute("macdonald").get should equal ("mcdanald")
+ RefinedNysiisAlgorithm.compute("phone").get should equal ("fan")
+ RefinedNysiisAlgorithm.compute("aggregate").get should equal ("agragat")
+ RefinedNysiisAlgorithm.compute("accuracy").get should equal ("acaracy")
+ RefinedNysiisAlgorithm.compute("encyclopedia").get should equal ("encaclapad")
+ RefinedNysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab")
+ RefinedNysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn")
+
+ // Dropby.
+ RefinedNysiisAlgorithm.compute("edwards").get should equal ("edwad")
+ RefinedNysiisAlgorithm.compute("parez").get should equal ("par")
+ RefinedNysiisAlgorithm.compute("macintosh").get should equal ("mcantas")
+ RefinedNysiisAlgorithm.compute("phillipson").get should equal ("falapsan")
+ RefinedNysiisAlgorithm.compute("haddix").get should equal ("hadac")
+ RefinedNysiisAlgorithm.compute("essex").get should equal ("esac")
+ RefinedNysiisAlgorithm.compute("moye").get should equal ("my")
+ RefinedNysiisAlgorithm.compute("mckee").get should equal ("mcy")
+ RefinedNysiisAlgorithm.compute("mackie").get should equal ("mcy")
+ RefinedNysiisAlgorithm.compute("heitschmidt").get should equal ("hatsnad")
+ RefinedNysiisAlgorithm.compute("bart").get should equal ("bad")
+ RefinedNysiisAlgorithm.compute("hurd").get should equal ("had")
+ RefinedNysiisAlgorithm.compute("hunt").get should equal ("had")
+ RefinedNysiisAlgorithm.compute("westerlund").get should equal ("wastarlad")
+ RefinedNysiisAlgorithm.compute("evers").get should equal ("evar")
+ RefinedNysiisAlgorithm.compute("devito").get should equal ("dafat")
+ RefinedNysiisAlgorithm.compute("rawson").get should equal ("rasan")
+ RefinedNysiisAlgorithm.compute("shoulders").get should equal ("saldar")
+ RefinedNysiisAlgorithm.compute("leighton").get should equal ("lagtan")
+ RefinedNysiisAlgorithm.compute("wooldridge").get should equal ("waldrag")
+ RefinedNysiisAlgorithm.compute("oliphant").get should equal ("olafad")
+ RefinedNysiisAlgorithm.compute("hatchett").get should equal ("hatcat")
+ RefinedNysiisAlgorithm.compute("mcknight").get should equal ("mcnagt")
+ RefinedNysiisAlgorithm.compute("rickert").get should equal ("racad")
+ RefinedNysiisAlgorithm.compute("bowman").get should equal ("banan")
+ RefinedNysiisAlgorithm.compute("vasquez").get should equal ("vasg")
+ RefinedNysiisAlgorithm.compute("bashaw").get should equal ("bas")
+ RefinedNysiisAlgorithm.compute("schoenhoeft").get should equal ("sanaft") // dropby wrongly says scanaft
+ RefinedNysiisAlgorithm.compute("heywood").get should equal ("had")
+ RefinedNysiisAlgorithm.compute("hayman").get should equal ("hanan")
+ RefinedNysiisAlgorithm.compute("seawright").get should equal ("saragt")
+ RefinedNysiisAlgorithm.compute("kratzer").get should equal ("cratsar")
+ RefinedNysiisAlgorithm.compute("canaday").get should equal ("canady")
+ RefinedNysiisAlgorithm.compute("crepeau").get should equal ("crap")
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetricSpec.scala
new file mode 100755
index 0000000..940c0c2
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedNysiisMetricSpec.scala
@@ -0,0 +1,37 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedNysiisMetricSpec extends ScalaTest {
+ "RefinedNysiisMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ RefinedNysiisMetric.compare("", "").isDefined should be (false)
+ RefinedNysiisMetric.compare("abc", "").isDefined should be (false)
+ RefinedNysiisMetric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ RefinedNysiisMetric.compare("123", "123").isDefined should be (false)
+ RefinedNysiisMetric.compare("123", "").isDefined should be (false)
+ RefinedNysiisMetric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ RefinedNysiisMetric.compare("ham", "hum").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ RefinedNysiisMetric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/readme.md b/readme.md
index 3207e32..2be2205 100755
--- a/readme.md
+++ b/readme.md
@@ -15,6 +15,7 @@ A small library of string metrics and phonetic algorithms. Each has a command li
* __[Metaphone](http://en.wikipedia.org/wiki/Metaphone)__ (Phonetic metric and algorithm)
* __[N-Gram](http://en.wikipedia.org/wiki/N-gram)__ (Similarity metric and algorithm)
* __[NYSIIS](http://en.wikipedia.org/wiki/New_York_State_Identification_and_Intelligence_System)__ (Phonetic metric and algorithm)
+* __[Refined NYSIIS](http://www.markcrocker.com/rexxtipsntricks/rxtt28.2.0482.html)__ (Phonetic metric and algorithm)
* __[Refined Soundex](http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html)__ (Phonetic metric and algorithm)
* __[Soundex](http://en.wikipedia.org/wiki/Soundex)__ (Phonetic metric and algorithm)
* __Weighted Levenshtein__ (Similarity metric)
@@ -106,7 +107,6 @@ metaphoneAlgorithm abc
* MongeElkan
* NeedlemanWunch
* Jaccard
-* Refined NYSIIS
* Double Metaphone
* Memoization decorator