diff options
author | Rocky Madden <git@rockymadden.com> | 2012-10-15 16:24:41 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-10-15 16:24:41 -0600 |
commit | 2085318fcd0f785630c3f2baad09f0d70b481cc6 (patch) | |
tree | a40a554397ea674d659d202d7188fa84c907630e | |
parent | 488de0cd595a7a034d706f319ec7cdcacea6eaab (diff) | |
download | stringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.tar.gz stringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.tar.bz2 stringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.zip |
Created SoundexMetric, spec, and command.
5 files changed, 226 insertions, 0 deletions
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala new file mode 100755 index 0000000..6f6f9ec --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.{ SoundexMetric, StringCleanerDelegate } +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.cli.command._ + +/** + * The soundexMetric [[org.hashtree.stringmetric.cli.command.Command]]. Compares two strings to determine if they are + * pronounced similarly, per the Soundex phonetic algorithm. + */ +object soundexMetric extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) { + execute(options) + exit(options) + // Invalid syntax. + } else { + throw new IllegalArgumentException("Expected valid syntax. See --help.") + } + } catch { + case e => error(e)(options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Compares two strings to determine if they are pronounced similarly, per the Soundex phonetic algorithm." + ls + ls + + "Syntax:" + ls + + tab + "soundexMetric [Options] string1 string2..." + ls + ls + + "Options:" + ls + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + ) + } + + override def execute(options: OptionMap): Unit = { + val strings = options('dashless).split(" ") + + println(SoundexMetric.compare(strings(0), strings(1))(new StringCleanerDelegate).toString) + } +}
\ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala new file mode 100755 index 0000000..c71b1ea --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala @@ -0,0 +1,39 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class soundexMetricSpec extends ScalaTest { + "soundexMetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + soundexMetric.main(Array("--unitTest", "--debug", "aBc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + soundexMetric.main(Array("--unitTest", "--debug", "aBc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + soundexMetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +}
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala new file mode 100755 index 0000000..b3033de --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala @@ -0,0 +1,99 @@ +package org.hashtree.stringmetric + +import scala.annotation.tailrec + +/** An implementation of the Soundex [[org.hashtree.stringmetric.StringMetric]]. */ +object SoundexMetric extends StringMetric { + override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Boolean = { + val se1 = if (charArray1.length > 0) soundex(charArray1) else None + val se2 = if (charArray2.length > 0) soundex(charArray2) else None + + (se1.isDefined && se2.isDefined && se1.get == se2.get) + } + + override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Boolean = { + compare(string1.toCharArray, string2.toCharArray) + } + + private[this] def soundex(charArray: Array[Char]): Option[String] = { + require(charArray.length > 0) + + @tailrec + def letter(ca: Array[Char], i: Int): Option[Tuple2[Char, Int]] = { + require(ca.length > 0) + + val c = ca.head.toLower + + if (c >= 97 && c <= 122) { + Some((c, i)) + } else if (ca.length == 1) { + None + } else { + letter(ca.tail, i + 1) + } + } + + @tailrec + def code(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = { + require(i.length > 0) + require((p >= 97 && p <= 122) || (p >= 49 && p <= 54)) + require(o.length > 0) + + val c = i.head.toLower + val m2 = (mc: Char) => mc match { + case 'b' | 'f' | 'p' | 'v' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' + case 'd' | 't' => '3' + case 'l' => '4' + case 'm' | 'n' => '5' + case 'r' => '6' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => mc match { + case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' + case 'd' | 't' if pc != '3' => '3' + case 'l' if pc != '4' => '4' + case 'm' | 'n' if pc != '5' => '5' + case 'r' if pc != '6' => '6' + case _ => '\0' + } + + val a = + p match { + // Code twice. + case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) + // Code once. + case _ => m1( + c, + o.last match { + case '1' | '2' | '3' | '4' | '5' | '6' => o.last + case _ => m2(o.last) + } + ) + } + + if (i.length == 1 || (o.length == 3 && a != '\0')) { + if (a != '\0') o :+ a else o + } else { + code(i.tail, c, if (a != '\0') o :+ a else o) + } + } + + letter(charArray, 0) match { + case Some(l) => + if (charArray.length - 1 == l._2) { + Some(l._1 + "000") + } else { + Some( + code( + charArray.takeRight(charArray.length - (l._2 + 1)), + l._1, // Pass first letter. + Array(l._1) // Pass array with first letter. + ).mkString.padTo(4, '0') + ) + } + case None => None + } + } +}
\ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala new file mode 100755 index 0000000..c688f9d --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala @@ -0,0 +1,35 @@ +package org.hashtree.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexMetricSpec extends ScalaTest { + "SoundexMetric" should provide { + "compare method" when passed { + "valid arguments" should returns { + "Boolean indicating matches" in { + SoundexMetric.compare("abc", "abc") should be (true) // a120 vs. a120 + SoundexMetric.compare("a", "a") should be (true) // a000 vs. a000 + SoundexMetric.compare("abc", "xyz") should be (false) // a120 vs. x200 + SoundexMetric.compare("", "") should be (false) + SoundexMetric.compare("123", "123") should be (false) + SoundexMetric.compare("1", "1") should be (false) + + SoundexMetric.compare("Robert", "Rupert") should be (true) // r163 vs. r163 + SoundexMetric.compare("Robert", "Rubin") should be (false) // r163 vs. r150 + + SoundexMetric.compare("Ashcraft", "Ashcroft") should be (true) // a261 vs. a261 + SoundexMetric.compare("Tymczak", "Tymczak") should be (true) // t522 vs. t522 + SoundexMetric.compare("Pfister", "Pfister") should be (true) // p236 vs. p236 + SoundexMetric.compare("Euler", "Ellery") should be (true) // e460 vs. e460 + SoundexMetric.compare("Gauss", "Ghosh") should be (true) // g200 vs. g200 + SoundexMetric.compare("Hilbert", "Heilbronn") should be (true) // h416 vs. h416 + SoundexMetric.compare("Knuth", "Kant") should be (true) // k530 vs. k530 + SoundexMetric.compare("Lloyd", "Ladd") should be (true) // l300 vs. l300 + SoundexMetric.compare("Lukasiewicz", "Lissajous") should be (true) // l222 vs. l222 + } + } + } + } +}
\ No newline at end of file @@ -3,6 +3,7 @@ A collection of string metrics implemented in Scala. Includes a light-weight cor * Jaro * Jaro-Winkler +* Soundex ## Building the API gradle jar |