summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-10-20 15:25:49 -0600
committerRocky Madden <git@rockymadden.com>2012-10-20 15:25:49 -0600
commitf95a07d2481a45ddd8d088651f9669900da51edd (patch)
tree278a7169ffa25b979aca45c488b4bc6d71e0298e
parent153741654bc2186f91d59261715cb9b58340f09a (diff)
downloadstringmetric-f95a07d2481a45ddd8d088651f9669900da51edd.tar.gz
stringmetric-f95a07d2481a45ddd8d088651f9669900da51edd.tar.bz2
stringmetric-f95a07d2481a45ddd8d088651f9669900da51edd.zip
Split out the Soundex algorithm from the metric.
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/Algorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/StringAlgorithm.scala6
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala94
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala92
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexMetricSpec.scala39
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala50
6 files changed, 187 insertions, 100 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/Algorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/Algorithm.scala
new file mode 100755
index 0000000..e334d20
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/Algorithm.scala
@@ -0,0 +1,6 @@
+package org.hashtree.stringmetric
+
+/** Marks those which leverage traits of a stand alone algorithm. */
+trait Algorithm[T, C] {
+ def compute(t: T)(implicit c: C): Option[T]
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/StringAlgorithm.scala b/core/source/core/scala/org/hashtree/stringmetric/StringAlgorithm.scala
new file mode 100755
index 0000000..2901010
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/StringAlgorithm.scala
@@ -0,0 +1,6 @@
+package org.hashtree.stringmetric
+
+/** Marks those which leverage traits of a string based [[org.hashtree.stringmetric.Algorithm]]. */
+trait StringAlgorithm extends Algorithm[String, StringCleaner] {
+ def compute(ca: Array[Char])(implicit stringCleaner: StringCleaner): Option[Array[Char]]
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala
new file mode 100755
index 0000000..75762ca
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/Soundex.scala
@@ -0,0 +1,94 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.{ StringAlgorithm, StringCleaner, StringCleanerDelegate }
+import scala.annotation.tailrec
+
+/** An implementation of the Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */
+object Soundex extends StringAlgorithm {
+ override def compute(charArray: Array[Char])(implicit stringCleaner: StringCleaner): Option[Array[Char]] = {
+ val ca = stringCleaner.clean(charArray)
+
+ if (ca.length == 0) None
+ else {
+ letter(ca, 0) match {
+ case Some(l) => {
+ if (ca.length - 1 == l._2) Some(Array(l._1, '0', '0', '0'))
+ else {
+ Some(
+ code(
+ ca.takeRight(ca.length - (l._2 + 1)),
+ l._1, // Pass first letter.
+ Array(l._1) // Pass array with first letter.
+ ).padTo(4, '0')
+ )
+ }
+ }
+ case None => None
+ }
+ }
+ }
+
+ override def compute(string: String)(implicit stringCleaner: StringCleaner): Option[String] = {
+ compute(stringCleaner.clean(string.toCharArray))(new StringCleanerDelegate) match {
+ case Some(se) => Some(se.mkString)
+ case None => None
+ }
+ }
+
+ @tailrec
+ private[this] def letter(i: Array[Char], ind: Int): Option[Tuple2[Char, Int]] = {
+ if (i.length == 0) None
+ else {
+ val c = i.head.toLower
+
+ if (c >= 97 && c <= 122) Some((c, ind)) else letter(i.tail, ind + 1)
+ }
+ }
+
+ @tailrec
+ private[this] def code(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = {
+ require(p >= 97 && p <= 122)
+ require(o.length > 0)
+
+ if (i.length == 0) o
+ else {
+ val c = i.head.toLower
+ val m2 = (mc: Char) => mc match {
+ case 'b' | 'f' | 'p' | 'v' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2'
+ case 'd' | 't' => '3'
+ case 'l' => '4'
+ case 'm' | 'n' => '5'
+ case 'r' => '6'
+ case _ => '\0'
+ }
+ val m1 = (mc: Char, pc: Char) => mc match {
+ case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2'
+ case 'd' | 't' if pc != '3' => '3'
+ case 'l' if pc != '4' => '4'
+ case 'm' | 'n' if pc != '5' => '5'
+ case 'r' if pc != '6' => '6'
+ case _ => '\0'
+ }
+ val a = p match {
+ // Code twice.
+ case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
+ // Code once.
+ case _ => {
+ m1(
+ c,
+ o.last match {
+ case '1' | '2' | '3' | '4' | '5' | '6' => o.last
+ case _ => m2(o.last)
+ }
+ )
+ }
+ }
+
+ if (o.length == 3 && a != '\0') o :+ a
+ else
+ code(i.tail, c, if (a != '\0') o :+ a else o)
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
index c8f2d13..cc891ab 100755
--- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala
@@ -1,100 +1,30 @@
package org.hashtree.stringmetric.phonetic
import org.hashtree.stringmetric.{ StringCleaner, StringCleanerDelegate, StringMetric }
-import scala.annotation.tailrec
/** An implementation of the Soundex [[org.hashtree.stringmetric.StringMetric]]. */
object SoundexMetric extends StringMetric {
- implicit val stringCleaner = new StringCleanerDelegate
-
override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringCleaner: StringCleaner): Option[Boolean] = {
val ca1 = stringCleaner.clean(charArray1)
val ca2 = stringCleaner.clean(charArray2)
- val se1 = if (ca1.length > 0) soundex(ca1) else None
- val se2 = if (ca2.length > 0) soundex(ca2) else None
- if (!se1.isDefined || !se2.isDefined) None else Some(se1.get == se2.get)
+ if (ca1.length == 0 || ca2.length == 0) None
+ else {
+ val se1 = Soundex.compute(ca1)
+ val se2 = Soundex.compute(ca2)
+
+ if (!se1.isDefined || !se2.isDefined || (se1.get.length == 0 && se2.get.length == 0))
+ None
+ else
+ Some(se1.get.sameElements(se2.get))
+ }
}
override def compare(string1: String, string2: String)(implicit stringCleaner: StringCleaner): Option[Boolean] = {
+ // Unable to perform simple equality check, due to situations where no letters are passed.
compare(
stringCleaner.clean(string1.toCharArray),
stringCleaner.clean(string2.toCharArray)
)(new StringCleanerDelegate)
}
-
- private[this] def soundex(ca: Array[Char]) = {
- require(ca.length > 0)
-
- @tailrec
- def letter(ca: Array[Char], i: Int): Option[Tuple2[Char, Int]] = {
- require(ca.length > 0)
-
- val c = ca.head.toLower
-
- if (c >= 97 && c <= 122) Some((c, i)) else if (ca.length == 1) None else letter(ca.tail, i + 1)
- }
-
- @tailrec
- def code(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = {
- require(i.length > 0)
- require(p >= 97 && p <= 122)
- require(o.length > 0)
-
- val c = i.head.toLower
- val m2 = (mc: Char) => mc match {
- case 'b' | 'f' | 'p' | 'v' => '1'
- case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2'
- case 'd' | 't' => '3'
- case 'l' => '4'
- case 'm' | 'n' => '5'
- case 'r' => '6'
- case _ => '\0'
- }
- val m1 = (mc: Char, pc: Char) => mc match {
- case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1'
- case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2'
- case 'd' | 't' if pc != '3' => '3'
- case 'l' if pc != '4' => '4'
- case 'm' | 'n' if pc != '5' => '5'
- case 'r' if pc != '6' => '6'
- case _ => '\0'
- }
- val a = p match {
- // Code twice.
- case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
- // Code once.
- case _ => {
- m1(
- c,
- o.last match {
- case '1' | '2' | '3' | '4' | '5' | '6' => o.last
- case _ => m2(o.last)
- }
- )
- }
- }
-
- if (i.length == 1 || (o.length == 3 && a != '\0'))
- if (a != '\0') o :+ a else o
- else
- code(i.tail, c, if (a != '\0') o :+ a else o)
- }
-
- letter(ca, 0) match {
- case Some(l) => {
- if (ca.length - 1 == l._2) Some(l._1 + "000")
- else {
- Some(
- code(
- ca.takeRight(ca.length - (l._2 + 1)),
- l._1, // Pass first letter.
- Array(l._1) // Pass array with first letter.
- ).mkString.padTo(4, '0')
- )
- }
- }
- case None => None
- }
- }
} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexMetricSpec.scala
index 6664696..f92f789 100755
--- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexMetricSpec.scala
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexMetricSpec.scala
@@ -8,27 +8,28 @@ import org.scalatest.junit.JUnitRunner
final class SoundexMetricSpec extends ScalaTest {
"SoundexMetric" should provide {
"compare method" when passed {
- "valid arguments" should returns {
- "Boolean indicating matches" in {
- SoundexMetric.compare("abc", "abc").get should be (true) // a120 vs. a120
- SoundexMetric.compare("a", "a").get should be (true) // a000 vs. a000
- SoundexMetric.compare("abc", "xyz").get should be (false) // a120 vs. x200
+ "empty arguments" should returns {
+ "None" in {
SoundexMetric.compare("", "").isDefined should be (false)
+ SoundexMetric.compare("abc", "").isDefined should be (false)
+ SoundexMetric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
SoundexMetric.compare("123", "123").isDefined should be (false)
- SoundexMetric.compare("1", "1").isDefined should be (false)
-
- SoundexMetric.compare("Robert", "Rupert").get should be (true) // r163 vs. r163
- SoundexMetric.compare("Robert", "Rubin").get should be (false) // r163 vs. r150
-
- SoundexMetric.compare("Ashcraft", "Ashcroft").get should be (true) // a261 vs. a261
- SoundexMetric.compare("Tymczak", "Tymczak").get should be (true) // t522 vs. t522
- SoundexMetric.compare("Pfister", "Pfister").get should be (true) // p236 vs. p236
- SoundexMetric.compare("Euler", "Ellery").get should be (true) // e460 vs. e460
- SoundexMetric.compare("Gauss", "Ghosh").get should be (true) // g200 vs. g200
- SoundexMetric.compare("Hilbert", "Heilbronn").get should be (true) // h416 vs. h416
- SoundexMetric.compare("Knuth", "Kant").get should be (true) // k530 vs. k530
- SoundexMetric.compare("Lloyd", "Ladd").get should be (true) // l300 vs. l300
- SoundexMetric.compare("Lukasiewicz", "Lissajous").get should be (true) // l222 vs. l222
+ SoundexMetric.compare("123", "").isDefined should be (false)
+ SoundexMetric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ SoundexMetric.compare("robert", "rupert").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ SoundexMetric.compare("robert", "rubin").get should be (false)
}
}
}
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala
new file mode 100755
index 0000000..13c6bdd
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala
@@ -0,0 +1,50 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class SoundexSpec extends ScalaTest {
+ "Soundex" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Soundex.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Soundex.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ Soundex.compute("abc").get should equal ("a120")
+ Soundex.compute("xyz").get should equal ("x200")
+ Soundex.compute("robert").get should equal ("r163")
+ Soundex.compute("rupert").get should equal ("r163")
+ Soundex.compute("rubin").get should equal ("r150")
+ Soundex.compute("ashcraft").get should equal ("a261")
+ Soundex.compute("tymczak").get should equal ("t522")
+ Soundex.compute("pfister").get should equal ("p236")
+ Soundex.compute("euler").get should equal ("e460")
+ Soundex.compute("gauss").get should equal ("g200")
+ Soundex.compute("hilbert").get should equal ("h416")
+ Soundex.compute("knuth").get should equal ("k530")
+ Soundex.compute("lloyd").get should equal ("l300")
+ Soundex.compute("lukasiewicz").get should equal ("l222")
+ Soundex.compute("ashcroft").get should equal ("a261")
+ Soundex.compute("tymczak").get should equal ("t522")
+ Soundex.compute("pfister").get should equal ("p236")
+ Soundex.compute("ellery").get should equal ("e460")
+ Soundex.compute("ghosh").get should equal ("g200")
+ Soundex.compute("heilbronn").get should equal ("h416")
+ Soundex.compute("kant").get should equal ("k530")
+ Soundex.compute("ladd").get should equal ("l300")
+ Soundex.compute("lissajous").get should equal ("l222")
+ }
+ }
+ }
+ }
+} \ No newline at end of file