summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-10-15 16:24:41 -0600
committerRocky Madden <git@rockymadden.com>2012-10-15 16:24:41 -0600
commit2085318fcd0f785630c3f2baad09f0d70b481cc6 (patch)
treea40a554397ea674d659d202d7188fa84c907630e
parent488de0cd595a7a034d706f319ec7cdcacea6eaab (diff)
downloadstringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.tar.gz
stringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.tar.bz2
stringmetric-2085318fcd0f785630c3f2baad09f0d70b481cc6.zip
Created SoundexMetric, spec, and command.
-rwxr-xr-xcli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala52
-rwxr-xr-xcli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala39
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala99
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala35
-rwxr-xr-xreadme.md1
5 files changed, 226 insertions, 0 deletions
diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala
new file mode 100755
index 0000000..6f6f9ec
--- /dev/null
+++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/soundexMetric.scala
@@ -0,0 +1,52 @@
+package org.hashtree.stringmetric.cli.command
+
+import org.hashtree.stringmetric.{ SoundexMetric, StringCleanerDelegate }
+import org.hashtree.stringmetric.cli._
+import org.hashtree.stringmetric.cli.command._
+
+/**
+ * The soundexMetric [[org.hashtree.stringmetric.cli.command.Command]]. Compares two strings to determine if they are
+ * pronounced similarly, per the Soundex phonetic algorithm.
+ */
+object soundexMetric extends Command {
+ override def main(args: Array[String]): Unit = {
+ val options = OptionMapUtility.toOptionMap(args)
+
+ try {
+ // Help.
+ if (options.contains('h) || options.contains('help)) {
+ help()
+ exit(options)
+ // Execute.
+ } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) {
+ execute(options)
+ exit(options)
+ // Invalid syntax.
+ } else {
+ throw new IllegalArgumentException("Expected valid syntax. See --help.")
+ }
+ } catch {
+ case e => error(e)(options)
+ }
+ }
+
+ override def help(): Unit = {
+ val ls = sys.props("line.separator")
+ val tab = " "
+
+ println(
+ "Compares two strings to determine if they are pronounced similarly, per the Soundex phonetic algorithm." + ls + ls +
+ "Syntax:" + ls +
+ tab + "soundexMetric [Options] string1 string2..." + ls + ls +
+ "Options:" + ls +
+ tab + "-h, --help" + ls +
+ tab + tab + "Outputs description, syntax, and options."
+ )
+ }
+
+ override def execute(options: OptionMap): Unit = {
+ val strings = options('dashless).split(" ")
+
+ println(SoundexMetric.compare(strings(0), strings(1))(new StringCleanerDelegate).toString)
+ }
+} \ No newline at end of file
diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala
new file mode 100755
index 0000000..c71b1ea
--- /dev/null
+++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/soundexMetricSpec.scala
@@ -0,0 +1,39 @@
+package org.hashtree.stringmetric.cli.command
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class soundexMetricSpec extends ScalaTest {
+ "soundexMetric" should provide {
+ "main method" when passed {
+ "valid dashless arguments" should executes {
+ "print if they are a match" in {
+ val out = new java.io.ByteArrayOutputStream()
+
+ Console.withOut(out)(
+ soundexMetric.main(Array("--unitTest", "--debug", "aBc", "abc"))
+ )
+
+ out.toString should equal ("true\n")
+ out.reset()
+
+ Console.withOut(out)(
+ soundexMetric.main(Array("--unitTest", "--debug", "aBc", "xyz"))
+ )
+
+ out.toString should equal ("false\n")
+ out.reset()
+ }
+ }
+ "no dashless arguments" should throws {
+ "IllegalArgumentException" in {
+ evaluating {
+ soundexMetric.main(Array("--unitTest", "--debug"))
+ } should produce [IllegalArgumentException]
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala
new file mode 100755
index 0000000..b3033de
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/SoundexMetric.scala
@@ -0,0 +1,99 @@
+package org.hashtree.stringmetric
+
+import scala.annotation.tailrec
+
+/** An implementation of the Soundex [[org.hashtree.stringmetric.StringMetric]]. */
+object SoundexMetric extends StringMetric {
+ override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Boolean = {
+ val se1 = if (charArray1.length > 0) soundex(charArray1) else None
+ val se2 = if (charArray2.length > 0) soundex(charArray2) else None
+
+ (se1.isDefined && se2.isDefined && se1.get == se2.get)
+ }
+
+ override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Boolean = {
+ compare(string1.toCharArray, string2.toCharArray)
+ }
+
+ private[this] def soundex(charArray: Array[Char]): Option[String] = {
+ require(charArray.length > 0)
+
+ @tailrec
+ def letter(ca: Array[Char], i: Int): Option[Tuple2[Char, Int]] = {
+ require(ca.length > 0)
+
+ val c = ca.head.toLower
+
+ if (c >= 97 && c <= 122) {
+ Some((c, i))
+ } else if (ca.length == 1) {
+ None
+ } else {
+ letter(ca.tail, i + 1)
+ }
+ }
+
+ @tailrec
+ def code(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = {
+ require(i.length > 0)
+ require((p >= 97 && p <= 122) || (p >= 49 && p <= 54))
+ require(o.length > 0)
+
+ val c = i.head.toLower
+ val m2 = (mc: Char) => mc match {
+ case 'b' | 'f' | 'p' | 'v' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2'
+ case 'd' | 't' => '3'
+ case 'l' => '4'
+ case 'm' | 'n' => '5'
+ case 'r' => '6'
+ case _ => '\0'
+ }
+ val m1 = (mc: Char, pc: Char) => mc match {
+ case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2'
+ case 'd' | 't' if pc != '3' => '3'
+ case 'l' if pc != '4' => '4'
+ case 'm' | 'n' if pc != '5' => '5'
+ case 'r' if pc != '6' => '6'
+ case _ => '\0'
+ }
+
+ val a =
+ p match {
+ // Code twice.
+ case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
+ // Code once.
+ case _ => m1(
+ c,
+ o.last match {
+ case '1' | '2' | '3' | '4' | '5' | '6' => o.last
+ case _ => m2(o.last)
+ }
+ )
+ }
+
+ if (i.length == 1 || (o.length == 3 && a != '\0')) {
+ if (a != '\0') o :+ a else o
+ } else {
+ code(i.tail, c, if (a != '\0') o :+ a else o)
+ }
+ }
+
+ letter(charArray, 0) match {
+ case Some(l) =>
+ if (charArray.length - 1 == l._2) {
+ Some(l._1 + "000")
+ } else {
+ Some(
+ code(
+ charArray.takeRight(charArray.length - (l._2 + 1)),
+ l._1, // Pass first letter.
+ Array(l._1) // Pass array with first letter.
+ ).mkString.padTo(4, '0')
+ )
+ }
+ case None => None
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala
new file mode 100755
index 0000000..c688f9d
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/SoundexMetricSpec.scala
@@ -0,0 +1,35 @@
+package org.hashtree.stringmetric
+
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class SoundexMetricSpec extends ScalaTest {
+ "SoundexMetric" should provide {
+ "compare method" when passed {
+ "valid arguments" should returns {
+ "Boolean indicating matches" in {
+ SoundexMetric.compare("abc", "abc") should be (true) // a120 vs. a120
+ SoundexMetric.compare("a", "a") should be (true) // a000 vs. a000
+ SoundexMetric.compare("abc", "xyz") should be (false) // a120 vs. x200
+ SoundexMetric.compare("", "") should be (false)
+ SoundexMetric.compare("123", "123") should be (false)
+ SoundexMetric.compare("1", "1") should be (false)
+
+ SoundexMetric.compare("Robert", "Rupert") should be (true) // r163 vs. r163
+ SoundexMetric.compare("Robert", "Rubin") should be (false) // r163 vs. r150
+
+ SoundexMetric.compare("Ashcraft", "Ashcroft") should be (true) // a261 vs. a261
+ SoundexMetric.compare("Tymczak", "Tymczak") should be (true) // t522 vs. t522
+ SoundexMetric.compare("Pfister", "Pfister") should be (true) // p236 vs. p236
+ SoundexMetric.compare("Euler", "Ellery") should be (true) // e460 vs. e460
+ SoundexMetric.compare("Gauss", "Ghosh") should be (true) // g200 vs. g200
+ SoundexMetric.compare("Hilbert", "Heilbronn") should be (true) // h416 vs. h416
+ SoundexMetric.compare("Knuth", "Kant") should be (true) // k530 vs. k530
+ SoundexMetric.compare("Lloyd", "Ladd") should be (true) // l300 vs. l300
+ SoundexMetric.compare("Lukasiewicz", "Lissajous") should be (true) // l222 vs. l222
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/readme.md b/readme.md
index 56b6010..13eb075 100755
--- a/readme.md
+++ b/readme.md
@@ -3,6 +3,7 @@ A collection of string metrics implemented in Scala. Includes a light-weight cor
* Jaro
* Jaro-Winkler
+* Soundex
## Building the API
gradle jar