summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2012-10-29 13:31:47 -0600
committerRocky Madden <git@rockymadden.com>2012-10-29 13:31:47 -0600
commit400973bb472918b6046cd7fc1ef65f040225d1b3 (patch)
treed02ca9b6eb539fd5475b5c82c3e6030b10fac0f6 /core
parent26d85c948eb5359e62f9ad54469a3730fb0499f5 (diff)
downloadstringmetric-400973bb472918b6046cd7fc1ef65f040225d1b3.tar.gz
stringmetric-400973bb472918b6046cd7fc1ef65f040225d1b3.tar.bz2
stringmetric-400973bb472918b6046cd7fc1ef65f040225d1b3.zip
Created refined soundex algorithm, metric, command, and specs.
Diffstat (limited to 'core')
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala84
-rwxr-xr-xcore/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala26
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricSpec.scala37
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala54
-rwxr-xr-xcore/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala5
5 files changed, 205 insertions, 1 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala
new file mode 100755
index 0000000..8443a5f
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundex.scala
@@ -0,0 +1,84 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.{ StringAlgorithm, StringFilter, StringFilterDelegate }
+import scala.annotation.tailrec
+
+/** An implementation of the refined Soundex [[org.hashtree.stringmetric.StringAlgorithm]]. */
+object RefinedSoundex extends StringAlgorithm {
+ override def compute(charArray: Array[Char])(implicit stringFilter: StringFilter): Option[Array[Char]] = {
+ val ca = stringFilter.filter(charArray)
+
+ if (ca.length == 0) None
+ else {
+ val fc = ca.head.toLower
+
+ if (fc < 97 || fc > 122) None
+ else {
+ Some(
+ transcode(
+ ca,
+ fc, // Pass first letter.
+ Array(fc) // Pass array with first letter.
+ )
+ )
+ }
+ }
+ }
+
+ override def compute(string: String)(implicit stringFilter: StringFilter): Option[String] =
+ compute(stringFilter.filter(string.toCharArray))(new StringFilterDelegate) match {
+ case Some(se) => Some(se.mkString)
+ case None => None
+ }
+
+ @tailrec
+ private[this] def transcode(i: Array[Char], p: Char, o: Array[Char]): Array[Char] = {
+ require(o.length > 0)
+
+ if (i.length == 0) o
+ else {
+ val c = i.head.toLower
+ val m2 = (mc: Char) => mc match {
+ case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0'
+ case 'b' | 'p' => '1'
+ case 'f' | 'v' => '2'
+ case 'c' | 'k' | 's' => '3'
+ case 'g' | 'j' => '4'
+ case 'q' | 'x' | 'z' => '5'
+ case 'd' | 't' => '6'
+ case 'l' => '7'
+ case 'm' | 'n' => '8'
+ case 'r' => '9'
+ case _ => '\0'
+ }
+ val m1 = (mc: Char, pc: Char) => mc match {
+ case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0'
+ case 'b' | 'p' if pc != '1' => '1'
+ case 'f' | 'v' if pc != '2' => '2'
+ case 'c' | 'k' | 's' if pc != '3' => '3'
+ case 'g' | 'j' if pc != '4' => '4'
+ case 'q' | 'x' | 'z' if pc != '5' => '5'
+ case 'd' | 't' if pc != '6' => '6'
+ case 'l' if pc != '7' => '7'
+ case 'm' | 'n' if pc != '8' => '8'
+ case 'r' if pc != '9' => '9'
+ case _ => '\0'
+ }
+ val a =
+ // Code twice.
+ if (o.length == 1)
+ m2(c)
+ // Code once.
+ else
+ m1(
+ c,
+ o.last match {
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last
+ case _ => m2(o.last)
+ }
+ )
+
+ transcode(i.tail, c, if (a != '\0') o :+ a else o)
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
new file mode 100755
index 0000000..0c233b0
--- /dev/null
+++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -0,0 +1,26 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.{ StringFilter, StringFilterDelegate, StringMetric }
+
+/** An implementation of the refined Soundex [[org.hashtree.stringmetric.StringMetric]]. */
+object RefinedSoundexMetric extends StringMetric {
+ override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[Boolean] = {
+ val ca1 = stringFilter.filter(charArray1)
+ val ca2 = stringFilter.filter(charArray2)
+
+ if (ca1.length == 0 || ca2.length == 0) None
+ else {
+ val rse1 = RefinedSoundex.compute(ca1)
+ val rse2 = RefinedSoundex.compute(ca2)
+
+ if (!rse1.isDefined || !rse2.isDefined || (rse1.get.length == 0 && rse2.get.length == 0)) None
+ else Some(rse1.get.sameElements(rse2.get))
+ }
+ }
+
+ override def compare(string1: String, string2: String)(implicit stringFilter: StringFilter): Option[Boolean] =
+ compare(
+ stringFilter.filter(string1.toCharArray),
+ stringFilter.filter(string2.toCharArray)
+ )(new StringFilterDelegate)
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricSpec.scala
new file mode 100755
index 0000000..9524835
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricSpec.scala
@@ -0,0 +1,37 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedSoundexMetricSpec extends ScalaTest {
+ "RefinedSoundexMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ RefinedSoundexMetric.compare("", "").isDefined should be (false)
+ RefinedSoundexMetric.compare("abc", "").isDefined should be (false)
+ RefinedSoundexMetric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ RefinedSoundexMetric.compare("123", "123").isDefined should be (false)
+ RefinedSoundexMetric.compare("123", "").isDefined should be (false)
+ RefinedSoundexMetric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ RefinedSoundexMetric.compare("robert", "rupert").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ RefinedSoundexMetric.compare("robert", "rubin").get should be (false)
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala
new file mode 100755
index 0000000..1531eb0
--- /dev/null
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexSpec.scala
@@ -0,0 +1,54 @@
+package org.hashtree.stringmetric.phonetic
+
+import org.hashtree.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedSoundexSpec extends ScalaTest {
+ "RefinedSoundex" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ RefinedSoundex.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ RefinedSoundex.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ RefinedSoundex.compute("x123").get should equal ("x5")
+
+ RefinedSoundex.compute("braz").get should equal ("b1905")
+ RefinedSoundex.compute("broz").get should equal ("b1905")
+ RefinedSoundex.compute("caren").get should equal ("c30908")
+ RefinedSoundex.compute("carren").get should equal ("c30908")
+ RefinedSoundex.compute("coram").get should equal ("c30908")
+ RefinedSoundex.compute("corran").get should equal ("c30908")
+ RefinedSoundex.compute("curreen").get should equal ("c30908")
+ RefinedSoundex.compute("curwen").get should equal ("c30908")
+ RefinedSoundex.compute("hairs").get should equal ("h093")
+ RefinedSoundex.compute("hark").get should equal ("h093")
+ RefinedSoundex.compute("hars").get should equal ("h093")
+ RefinedSoundex.compute("hayers").get should equal ("h093")
+ RefinedSoundex.compute("heers").get should equal ("h093")
+ RefinedSoundex.compute("hiers").get should equal ("h093")
+ RefinedSoundex.compute("lambard").get should equal ("l7081096")
+ RefinedSoundex.compute("lambart").get should equal ("l7081096")
+ RefinedSoundex.compute("lambert").get should equal ("l7081096")
+ RefinedSoundex.compute("lambird").get should equal ("l7081096")
+ RefinedSoundex.compute("lampaert").get should equal ("l7081096")
+ RefinedSoundex.compute("lampart").get should equal ("l7081096")
+ RefinedSoundex.compute("lamport").get should equal ("l7081096")
+ RefinedSoundex.compute("limbert").get should equal ("l7081096")
+ RefinedSoundex.compute("lombard").get should equal ("l7081096")
+ RefinedSoundex.compute("nolton").get should equal ("n807608")
+ RefinedSoundex.compute("noulton").get should equal ("n807608")
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala
index dbefc51..9f18bc9 100755
--- a/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala
+++ b/core/source/test/scala/org/hashtree/stringmetric/phonetic/SoundexSpec.scala
@@ -20,8 +20,11 @@ final class SoundexSpec extends ScalaTest {
}
"phonetic argument" should returns {
"Some" in {
+ Soundex.compute("x123").get should equal ("x000")
+
Soundex.compute("abc").get should equal ("a120")
Soundex.compute("xyz").get should equal ("x200")
+
Soundex.compute("robert").get should equal ("r163")
Soundex.compute("rupert").get should equal ("r163")
Soundex.compute("rubin").get should equal ("r150")
@@ -43,7 +46,7 @@ final class SoundexSpec extends ScalaTest {
Soundex.compute("kant").get should equal ("k530")
Soundex.compute("ladd").get should equal ("l300")
Soundex.compute("lissajous").get should equal ("l222")
- Soundex.compute("x123").get should equal ("x000")
+ Soundex.compute("fusedale").get should equal ("f234")
}
}
}