summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorRocky Madden <git@rockymadden.com>2013-11-27 08:03:12 -0700
committerRocky Madden <git@rockymadden.com>2013-11-27 08:03:12 -0700
commitab32aed4dfa68df86c00dd0c75a41932d16a659c (patch)
tree100bade5d27c8e8e9c193a0818175887786e01ee /core
parentb75574d2c9d5152e378eded51d5634c5d6ec4ab1 (diff)
downloadstringmetric-ab32aed4dfa68df86c00dd0c75a41932d16a659c.tar.gz
stringmetric-ab32aed4dfa68df86c00dd0c75a41932d16a659c.tar.bz2
stringmetric-ab32aed4dfa68df86c00dd0c75a41932d16a659c.zip
Preferred minimal repetition naming.
Diffstat (limited to 'core')
-rwxr-xr-xcore/build.gradle27
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala7
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala7
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala32
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala32
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala32
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala32
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala32
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala58
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala55
-rwxr-xr-xcore/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala35
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala55
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Filter.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Filterable.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Metric.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala42
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala45
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala120
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala14
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala5
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala15
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala15
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala10
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala15
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala15
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala11
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala9
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/package.scala7
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala122
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala32
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala131
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala40
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala135
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala40
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala75
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala33
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala73
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala33
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala42
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala37
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala37
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala87
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala40
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala58
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala40
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala40
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala57
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala61
-rwxr-xr-xcore/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala37
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala96
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala38
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala18
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala59
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala141
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala23
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala29
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala35
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala37
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala31
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala33
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala41
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala31
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala226
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala54
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala204
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala50
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala221
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala50
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala175
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala50
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala174
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala50
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala75
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala52
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala77
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala66
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala66
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala65
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala75
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala77
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala56
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala64
-rwxr-xr-xcore/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala69
110 files changed, 5589 insertions, 0 deletions
diff --git a/core/build.gradle b/core/build.gradle
new file mode 100755
index 0000000..5c92253
--- /dev/null
+++ b/core/build.gradle
@@ -0,0 +1,27 @@
+apply from: '../deploy.gradle'
+
+dependencies { testCompile 'com.google.caliper:caliper:0.5-rc1' }
+
+sourceSets {
+ benchmark {
+ output.resourcesDir "${project.buildDir}/classes/benchmark"
+
+ java { srcDir 'source/benchmark/java' }
+ resources { srcDir 'source/benchmark/resource' }
+ scala { srcDir 'source/benchmark/scala' }
+ }
+ main {
+ output.resourcesDir "${project.buildDir}/classes/main"
+
+ java { srcDir 'source/core/java' }
+ resources { srcDir 'source/core/resource' }
+ scala { srcDir 'source/core/scala' }
+ }
+ test {
+ output.resourcesDir "${project.buildDir}/classes/test"
+
+ java { srcDir 'source/test/java' }
+ resources { srcDir 'source/test/resource' }
+ scala { srcDir 'source/test/scala' }
+ }
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala
new file mode 100755
index 0000000..55a6238
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala
@@ -0,0 +1,7 @@
+package com.rockymadden.stringmetric
+
+import com.google.caliper.SimpleBenchmark
+
+trait CaliperBenchmark extends SimpleBenchmark {
+ def run(reps: Int)(code: => Unit) = (0 until reps).foreach(i => code)
+} \ No newline at end of file
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala
new file mode 100755
index 0000000..4474a8d
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala
@@ -0,0 +1,7 @@
+package com.rockymadden.stringmetric
+
+import com.google.caliper.{Benchmark, Runner}
+
+abstract class CaliperRunner(private[this] val suite: java.lang.Class[_ <: Benchmark]) {
+ def main(args: Array[String]): Unit = Runner.main(suite, args)
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala
new file mode 100755
index 0000000..bfc17f2
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class MetaphoneAlgorithmBenchmark extends CaliperBenchmark {
+ import MetaphoneAlgorithmBenchmark.Algorithm
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.filter(_ > '9').take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Algorithm.compute(charArray)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Algorithm.compute(string)
+ }
+}
+
+object MetaphoneAlgorithmBenchmark extends CaliperRunner(classOf[MetaphoneAlgorithmBenchmark]) {
+ private final val Algorithm = MetaphoneAlgorithm()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala
new file mode 100755
index 0000000..147ab54
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class MetaphoneMetricBenchmark extends CaliperBenchmark {
+ import MetaphoneMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.filter(_ > '9').take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object MetaphoneMetricBenchmark extends CaliperRunner(classOf[MetaphoneMetricBenchmark]) {
+ private final val Metric = MetaphoneMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala
new file mode 100755
index 0000000..79f9e16
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class NysiisAlgorithmBenchmark extends CaliperBenchmark {
+ import NysiisAlgorithmBenchmark.Algorithm
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.filter(_ > '9').take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Algorithm.compute(charArray)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Algorithm.compute(string)
+ }
+}
+
+object NysiisAlgorithmBenchmark extends CaliperRunner(classOf[NysiisAlgorithmBenchmark]) {
+ private final val Algorithm = NysiisAlgorithm()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala
new file mode 100755
index 0000000..620d054
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class NysiisMetricBenchmark extends CaliperBenchmark {
+ import NysiisMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.filter(_ > '9').take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object NysiisMetricBenchmark extends CaliperRunner(classOf[NysiisMetricBenchmark]) {
+ private final val Metric = NysiisMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala
new file mode 100755
index 0000000..885bc3a
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class RefinedNysiisAlgorithmBenchmark extends CaliperBenchmark {
+ import RefinedNysiisAlgorithmBenchmark.Algorithm
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.filter(_ > '9').take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Algorithm.compute(charArray)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Algorithm.compute(string)
+ }
+}
+
+object RefinedNysiisAlgorithmBenchmark extends CaliperRunner(classOf[RefinedNysiisAlgorithmBenchmark]) {
+ private final val Algorithm = RefinedNysiisAlgorithm()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala
new file mode 100755
index 0000000..d0da4e5
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class RefinedNysiisMetricBenchmark extends CaliperBenchmark {
+ import RefinedNysiisMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.filter(_ > '9').take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object RefinedNysiisMetricBenchmark extends CaliperRunner(classOf[RefinedNysiisMetricBenchmark]) {
+ private final val Metric = RefinedNysiisMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala
new file mode 100755
index 0000000..99aa3df
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class RefinedSoundexAlgorithmBenchmark extends CaliperBenchmark {
+ import RefinedSoundexAlgorithmBenchmark.Algorithm
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.filter(_ > '9').take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Algorithm.compute(charArray)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Algorithm.compute(string)
+ }
+}
+
+object RefinedSoundexAlgorithmBenchmark extends CaliperRunner(classOf[RefinedSoundexAlgorithmBenchmark]) {
+ private final val Algorithm = RefinedSoundexAlgorithm()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala
new file mode 100755
index 0000000..8e24650
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class RefinedSoundexMetricBenchmark extends CaliperBenchmark {
+ import RefinedSoundexMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.filter(_ > '9').take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object RefinedSoundexMetricBenchmark extends CaliperRunner(classOf[RefinedSoundexMetricBenchmark]) {
+ private final val Metric = RefinedSoundexMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala
new file mode 100755
index 0000000..5c195a6
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class SoundexAlgorithmBenchmark extends CaliperBenchmark {
+ import SoundexAlgorithmBenchmark.Algorithm
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.filter(_ > '9').take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Algorithm.compute(charArray)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Algorithm.compute(string)
+ }
+}
+
+object SoundexAlgorithmBenchmark extends CaliperRunner(classOf[SoundexAlgorithmBenchmark]) {
+ private final val Algorithm = SoundexAlgorithm()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala
new file mode 100755
index 0000000..6534d72
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class SoundexMetricBenchmark extends CaliperBenchmark {
+ import SoundexMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.filter(_ > '9').take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object SoundexMetricBenchmark extends CaliperRunner(classOf[SoundexMetricBenchmark]) {
+ private final val Metric = SoundexMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala
new file mode 100755
index 0000000..88f5d4c
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class DiceSorensenMetricBenchmark extends CaliperBenchmark {
+ import DiceSorensenMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)(2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)(2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)(2)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)(2)
+ }
+}
+
+object DiceSorensenMetricBenchmark extends CaliperRunner(classOf[DiceSorensenMetricBenchmark]) {
+ private final val Metric = DiceSorensenMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala
new file mode 100755
index 0000000..60ef7d4
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class HammingMetricBenchmark extends CaliperBenchmark {
+ import HammingMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object HammingMetricBenchmark extends CaliperRunner(classOf[HammingMetricBenchmark]) {
+ private final val Metric = HammingMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala
new file mode 100755
index 0000000..3f6a59f
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class JaccardMetricBenchmark extends CaliperBenchmark {
+ import JaccardMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)(2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)(2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)(2)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)(2)
+ }
+}
+
+object JaccardMetricBenchmark extends CaliperRunner(classOf[JaccardMetricBenchmark]) {
+ private final val Metric = JaccardMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala
new file mode 100755
index 0000000..c9c9b2c
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class JaroMetricBenchmark extends CaliperBenchmark {
+ import JaroMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object JaroMetricBenchmark extends CaliperRunner(classOf[JaroMetricBenchmark]) {
+ private final val Metric = JaroMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala
new file mode 100755
index 0000000..b3da154
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class JaroWinklerMetricBenchmark extends CaliperBenchmark {
+ import JaroWinklerMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object JaroWinklerMetricBenchmark extends CaliperRunner(classOf[JaroWinklerMetricBenchmark]) {
+ private final val Metric = JaroWinklerMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala
new file mode 100755
index 0000000..d47e138
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class LevenshteinMetricBenchmark extends CaliperBenchmark {
+ import LevenshteinMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object LevenshteinMetricBenchmark extends CaliperRunner(classOf[LevenshteinMetricBenchmark]) {
+ private final val Metric = LevenshteinMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala
new file mode 100755
index 0000000..4250e66
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala
@@ -0,0 +1,58 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class NGramMetricBenchmark extends CaliperBenchmark {
+ import NGramMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ @Param(Array("2", "3"))
+ var n: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)(n)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)(n)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)(n)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)(n)
+ }
+}
+
+object NGramMetricBenchmark extends CaliperRunner(classOf[NGramMetricBenchmark]) {
+ private final val Metric = NGramMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala
new file mode 100755
index 0000000..4d7ce0a
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class OverlapMetricBenchmark extends CaliperBenchmark {
+ import OverlapMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)(2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)(2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)(2)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)(2)
+ }
+}
+
+object OverlapMetricBenchmark extends CaliperRunner(classOf[OverlapMetricBenchmark]) {
+ private final val Metric = OverlapMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala
new file mode 100755
index 0000000..edc1527
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class RatcliffObershelpMetricBenchmark extends CaliperBenchmark {
+ import RatcliffObershelpMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)
+ }
+}
+
+object RatcliffObershelpMetricBenchmark extends CaliperRunner(classOf[RatcliffObershelpMetricBenchmark]) {
+ private final val Metric = RatcliffObershelpMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala
new file mode 100755
index 0000000..b511654
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.annotation.tailrec
+import scala.util.Random
+
+final class WeightedLevenshteinMetricBenchmark extends CaliperBenchmark {
+ import WeightedLevenshteinMetricBenchmark.Metric
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ var string1: String = _
+ var charArray1: Array[Char] = _
+ var string2: String = _
+ var charArray2: Array[Char] = _
+
+ override protected def setUp() {
+ @tailrec
+ def random(l: Int, ps: String = null): String =
+ if (l == 0) ""
+ else {
+ val s = Random.alphanumeric.take(l).mkString
+
+ if (ps == null || s != ps) s
+ else random(l, ps)
+ }
+
+ string1 = random(length)
+ string2 = random(length, string1)
+ charArray1 = string1.toCharArray
+ charArray2 = string2.toCharArray
+ }
+
+ def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray2)(1, 1, 1)
+ }
+
+ def timeCompareWithDifferentStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string2)(1, 1, 1)
+ }
+
+ def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) {
+ Metric.compare(charArray1, charArray1)(1, 1, 1)
+ }
+
+ def timeCompareWithIdenticalStrings(reps: Int) = run(reps) {
+ Metric.compare(string1, string1)(1, 1, 1)
+ }
+}
+
+object WeightedLevenshteinMetricBenchmark extends CaliperRunner(classOf[WeightedLevenshteinMetricBenchmark]) {
+ private final val Metric = WeightedLevenshteinMetric()
+}
diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala
new file mode 100755
index 0000000..7e62662
--- /dev/null
+++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala
@@ -0,0 +1,35 @@
+package com.rockymadden.stringmetric.tokenization
+
+import com.google.caliper.Param
+import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner}
+import scala.util.Random
+
+final class NGramTokenizerBenchmark extends CaliperBenchmark {
+ import NGramTokenizerBenchmark.Tokenizer
+
+ @Param(Array("0", "1", "2", "4", "8", "16"))
+ var length: Int = _
+
+ @Param(Array("2", "3"))
+ var n: Int = _
+
+ var string: String = _
+ var charArray: Array[Char] = _
+
+ override protected def setUp() {
+ string = Random.alphanumeric.take(length).mkString
+ charArray = string.toCharArray
+ }
+
+ def timeComputeWithCharArray(reps: Int) = run(reps) {
+ Tokenizer.tokenize(charArray)(n)
+ }
+
+ def timeComputeWithString(reps: Int) = run(reps) {
+ Tokenizer.tokenize(string)(n)
+ }
+}
+
+object NGramTokenizerBenchmark extends CaliperRunner(classOf[NGramTokenizerBenchmark]) {
+ private final val Tokenizer = NGramTokenizer()
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala
new file mode 100755
index 0000000..10bc2cd
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait Algorithm[A, B, C] {
+ def compute(a: A)(implicit b: B): Option[C]
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala
new file mode 100755
index 0000000..d2ede81
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala
@@ -0,0 +1,55 @@
+package com.rockymadden.stringmetric
+
+import scala.collection.immutable.Set
+
+object Alphabet {
+ protected sealed abstract class AlphabetSet {
+ protected[Alphabet] val Chars: Set[Char]
+
+ def isSuperset(char: Char): Boolean = Chars.contains(char)
+
+ def isSuperset(charArray: Array[Char]): Boolean =
+ charArray.length > 0 && charArray.takeWhile(Chars.contains(_)).length == charArray.length
+
+ def isSuperset(string: String): Boolean = isSuperset(string.toCharArray)
+ }
+
+ case object LowercaseConsonant extends AlphabetSet {
+ override protected[Alphabet] final val Chars =
+ Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z')
+ }
+ case object UppercaseConsonant extends AlphabetSet {
+ override protected[Alphabet] final val Chars =
+ Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z')
+ }
+ case object Consonant extends AlphabetSet {
+ override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ UppercaseConsonant.Chars
+ }
+ case object LowercaseVowel extends AlphabetSet {
+ override protected[Alphabet] final val Chars = Set('a', 'e', 'i', 'o', 'u')
+ }
+ case object UppercaseVowel extends AlphabetSet {
+ override protected[Alphabet] final val Chars = Set('A', 'E', 'I', 'O', 'U')
+ }
+ case object Vowel extends AlphabetSet {
+ override protected[Alphabet] final val Chars = LowercaseVowel.Chars ++ UppercaseVowel.Chars
+ }
+ case object LowercaseY extends AlphabetSet {
+ override protected[Alphabet] final val Chars = Set('y')
+ }
+ case object UppercaseY extends AlphabetSet {
+ override protected[Alphabet] final val Chars = Set('Y')
+ }
+ case object Y extends AlphabetSet {
+ override protected[Alphabet] final val Chars = LowercaseY.Chars ++ UppercaseY.Chars
+ }
+ case object LowercaseAlpha extends AlphabetSet {
+ override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ LowercaseVowel.Chars ++ LowercaseY.Chars
+ }
+ case object UppercaseAlpha extends AlphabetSet {
+ override protected[Alphabet] final val Chars = UppercaseConsonant.Chars ++ UppercaseVowel.Chars ++ UppercaseY.Chars
+ }
+ case object Alpha extends AlphabetSet {
+ override protected[Alphabet] final val Chars = LowercaseAlpha.Chars ++ UppercaseAlpha.Chars
+ }
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala b/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala
new file mode 100755
index 0000000..2a02f6b
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait Filter[A] extends Filterable[A] {
+ override def filter(a: A): A = a
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala b/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala
new file mode 100755
index 0000000..77dc0bf
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait Filterable[A] {
+ def filter(a: A): A
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala
new file mode 100755
index 0000000..6862321
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait Metric[A, B, C] {
+ def compare(a1: A, a2: A)(implicit b: B): Option[C]
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala
new file mode 100755
index 0000000..0d194da
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala
@@ -0,0 +1,42 @@
+package com.rockymadden.stringmetric
+
+trait StringAlgorithm[A, B] extends Algorithm[String, A, B] {
+ def compute(charArray: Array[Char])(implicit a: A): Option[Array[Char]]
+}
+
+object StringAlgorithm {
+ type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm
+ val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm
+
+ type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm
+ val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm
+
+ type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm
+ val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm
+
+ type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm
+ val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm
+
+ type Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm
+ val Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm
+
+ def computeWithMetaphone(charArray: Array[Char]) = Metaphone.compute(charArray)
+
+ def computeWithMetaphone(string: String) = Metaphone.compute(string)
+
+ def computeWithNysiis(charArray: Array[Char]) = Nysiis.compute(charArray)
+
+ def computeWithNysiis(string: String) = Nysiis.compute(string)
+
+ def computeWithRefinedNysiis(charArray: Array[Char]) = RefinedNysiis.compute(charArray)
+
+ def computeWithRefinedNysiis(string: String) = RefinedNysiis.compute(string)
+
+ def computeWithRefinedSoundex(charArray: Array[Char]) = RefinedSoundex.compute(charArray)
+
+ def computeWithRefinedSoundex(string: String) = RefinedSoundex.compute(string)
+
+ def computeWithSoundex(charArray: Array[Char]) = Soundex.compute(charArray)
+
+ def computeWithSoundex(string: String) = Soundex.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala
new file mode 100755
index 0000000..1430d34
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala
@@ -0,0 +1,45 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.filter.StringFilterDelegate
+
+trait StringFilter extends Filter[String] with StringFilterable {
+ override def filter(charArray: Array[Char]): Array[Char] = charArray
+}
+
+object StringFilter {
+ type AsciiControl = com.rockymadden.stringmetric.filter.AsciiControlFilter
+ lazy val asciiControl = new StringFilterDelegate with AsciiControl
+
+ type AsciiControlOnly = com.rockymadden.stringmetric.filter.AsciiControlOnlyFilter
+ lazy val asciiControlOnly = new StringFilterDelegate with AsciiControlOnly
+
+ type AsciiLetterNumber = com.rockymadden.stringmetric.filter.AsciiLetterNumberFilter
+ lazy val asciiLetterNumber = new StringFilterDelegate with AsciiLetterNumber
+
+ type AsciiLetterNumberOnly = com.rockymadden.stringmetric.filter.AsciiLetterNumberOnlyFilter
+ lazy val asciiLetterNumberOnly = new StringFilterDelegate with AsciiLetterNumberOnly
+
+ type AsciiLetter = com.rockymadden.stringmetric.filter.AsciiLetterFilter
+ lazy val asciiLetter = new StringFilterDelegate with AsciiLetter
+
+ type AsciiLetterOnly = com.rockymadden.stringmetric.filter.AsciiLetterOnlyFilter
+ lazy val asciiLetterOnly = new StringFilterDelegate with AsciiLetterOnly
+
+ type AsciiNumber = com.rockymadden.stringmetric.filter.AsciiNumberFilter
+ lazy val asciiNumber = new StringFilterDelegate with AsciiNumber
+
+ type AsciiNumberOnly = com.rockymadden.stringmetric.filter.AsciiNumberOnlyFilter
+ lazy val asciiNumberOnly = new StringFilterDelegate with AsciiNumberOnly
+
+ type AsciiSpace = com.rockymadden.stringmetric.filter.AsciiSpaceFilter
+ lazy val asciiSpace = new StringFilterDelegate with AsciiSpace
+
+ type AsciiSymbol = com.rockymadden.stringmetric.filter.AsciiSymbolFilter
+ lazy val asciiSymbol = new StringFilterDelegate with AsciiSymbol
+
+ type AsciiSymbolOnly = com.rockymadden.stringmetric.filter.AsciiSymbolOnlyFilter
+ lazy val asciiSymbolOnly = new StringFilterDelegate with AsciiSymbolOnly
+
+ type IgnoreAsciiLetterCase = com.rockymadden.stringmetric.filter.IgnoreAsciiLetterCaseFilter
+ lazy val ignoreAsciiLetterCase = new StringFilterDelegate with IgnoreAsciiLetterCase
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala
new file mode 100755
index 0000000..d639dfb
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait StringFilterable extends Filterable[String] {
+ def filter(charArray: Array[Char]): Array[Char]
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala
new file mode 100755
index 0000000..212f76d
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala
@@ -0,0 +1,120 @@
+package com.rockymadden.stringmetric
+
+trait StringMetric[A, B] extends Metric[String, A, B] {
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit a: A): Option[B]
+}
+
+object StringMetric {
+ type DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric
+ val DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric
+
+ type Hamming = com.rockymadden.stringmetric.similarity.HammingMetric
+ val Hamming = com.rockymadden.stringmetric.similarity.HammingMetric
+
+ type Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric
+ val Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric
+
+ type Jaro = com.rockymadden.stringmetric.similarity.JaroMetric
+ val Jaro = com.rockymadden.stringmetric.similarity.JaroMetric
+
+ type JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric
+ val JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric
+
+ type Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric
+ val Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric
+
+ type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric
+ val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric
+
+ type NGram = com.rockymadden.stringmetric.similarity.NGramMetric
+ val NGram = com.rockymadden.stringmetric.similarity.NGramMetric
+
+ type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric
+ val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric
+
+ type Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric
+ val Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric
+
+ type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric
+ val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric
+
+ type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric
+ val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric
+
+ type Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric
+ val Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric
+
+ type WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric
+ val WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric
+
+ def compareWithDiceSorensen(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
+ DiceSorensen.compare(charArray1, charArray2)(n)
+
+ def compareWithDiceSorensen(string1: String, string2: String)(n: Int) = DiceSorensen.compare(string1, string2)(n)
+
+ def compareWithHamming(charArray1: Array[Char], charArray2: Array[Char]) = Hamming.compare(charArray1, charArray2)
+
+ def compareWithHamming(string1: String, string2: String)= Hamming.compare(string1, string2)
+
+ def compareWithJaccard(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
+ Jaccard.compare(charArray1, charArray2)(n)
+
+ def compareWithJaccard(string1: String, string2: String)(n: Int) = Jaccard.compare(string1, string2)(n)
+
+ def compareWithJaro(charArray1: Array[Char], charArray2: Array[Char]) = Jaro.compare(charArray1, charArray2)
+
+ def compareWithJaro(string1: String, string2: String) = Jaro.compare(string1, string2)
+
+ def compareWithJaroWinkler(charArray1: Array[Char], charArray2: Array[Char]) =
+ JaroWinkler.compare(charArray1, charArray2)
+
+ def compareWithJaroWinkler(string1: String, string2: String) = JaroWinkler.compare(string1, string2)
+
+ def compareWithLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) =
+ Levenshtein.compare(charArray1, charArray2)
+
+ def compareWithLevenshtein(string1: String, string2: String) = Levenshtein.compare(string1, string2)
+
+ def compareWithMetaphone(charArray1: Array[Char], charArray2: Array[Char]) =
+ Metaphone.compare(charArray1, charArray2)
+
+ def compareWithMetaphone(string1: String, string2: String) = Metaphone.compare(string1, string2)
+
+ def compareWithNGram(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
+ NGram.compare(charArray1, charArray2)(n)
+
+ def compareWithNGram(string1: String, string2: String)(n: Int) = NGram.compare(string1, string2)(n)
+
+ def compareWithNysiis(charArray1: Array[Char], charArray2: Array[Char]) = Nysiis.compare(charArray1, charArray2)
+
+ def compareWithNysiis(string1: String, string2: String) = Nysiis.compare(string1, string2)
+
+ def compareWithOverlap(charArray1: Array[Char], charArray2: Array[Char])(n: Int) =
+ Overlap.compare(charArray1, charArray2)(n)
+
+ def compareWithOverlap(string1: String, string2: String)(n: Int) = Overlap.compare(string1, string2)(n)
+
+ def compareWithRefinedNysiis(charArray1: Array[Char], charArray2: Array[Char]) =
+ RefinedNysiis.compare(charArray1, charArray2)
+
+ def compareWithRefinedNysiis(string1: String, string2: String) = RefinedNysiis.compare(string1, string2)
+
+ def compareWithRefinedSoundex(charArray1: Array[Char], charArray2: Array[Char]) =
+ RefinedSoundex.compare(charArray1, charArray2)
+
+ def compareWithRefinedSoundex(string1: String, string2: String) = RefinedSoundex.compare(string1, string2)
+
+ def compareWithSoundex(charArray1: Array[Char], charArray2: Array[Char]) = Soundex.compare(charArray1, charArray2)
+
+ def compareWithSoundex(string1: String, string2: String) = Soundex.compare(string1, string2)
+
+ def compareWithWeightedLevenshtein(charArray1: Array[Char], charArray2: Array[Char])
+ (options: (BigDecimal, BigDecimal, BigDecimal)) =
+
+ WeightedLevenshtein.compare(charArray1, charArray2)(options)
+
+ def compareWithWeightedLevenshtein(string1: String, string2: String)
+ (options: (BigDecimal, BigDecimal, BigDecimal)) =
+
+ WeightedLevenshtein.compare(string1, string2)(options)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala
new file mode 100755
index 0000000..bef56d9
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala
@@ -0,0 +1,14 @@
+package com.rockymadden.stringmetric
+
+trait StringTokenizer[A, B] extends Tokenizer[String, A, B] {
+ def tokenize(charArray: Array[Char])(implicit a: A): Option[Array[Array[Char]]]
+}
+
+object StringTokenizer {
+ type NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer
+ val NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer
+
+ def tokenizeWithNGram(charArray: Array[Char])(n: Int) = NGram.tokenize(charArray)(n)
+
+ def tokenizeWithNGram(string: String)(n: Int) = NGram.tokenize(string)(n)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala
new file mode 100755
index 0000000..c9edae5
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala
@@ -0,0 +1,5 @@
+package com.rockymadden.stringmetric
+
+trait Tokenizer[A, B, C] {
+ def tokenize(a: A)(implicit b: B): Option[C]
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala
new file mode 100755
index 0000000..bd45ecf
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII controls do not matter. */
+trait AsciiControlFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => !(c <= 31 || c == 127)))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala
new file mode 100755
index 0000000..c08b686
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures only ASCII control characters matter. */
+trait AsciiControlOnlyFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => (c <= 31 || c == 127)))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala
new file mode 100755
index 0000000..24509cb
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII letters do not matter. */
+trait AsciiLetterFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => !((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala
new file mode 100755
index 0000000..e17c715
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala
@@ -0,0 +1,15 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII letters and numbers do not matter. */
+trait AsciiLetterNumberFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(
+ charArray.filter(c =>
+ !((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))
+ )
+ )
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala
new file mode 100755
index 0000000..7cf97ba
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala
@@ -0,0 +1,15 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures only ASCII letters and numbers matter. */
+trait AsciiLetterNumberOnlyFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(
+ charArray.filter(c =>
+ ((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))
+ )
+ )
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala
new file mode 100755
index 0000000..70032d9
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures only ASCII letters matter. */
+trait AsciiLetterOnlyFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => ((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122))))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala
new file mode 100755
index 0000000..42fe77e
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII numbers do not matter. */
+trait AsciiNumberFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => !(c >= 48 && c <= 57)))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala
new file mode 100755
index 0000000..3f17099
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures only ASCII numbers matter. */
+trait AsciiNumberOnlyFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.filter(c => (c >= 48 && c <= 57 )))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala
new file mode 100755
index 0000000..538107d
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala
@@ -0,0 +1,10 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII spaces do not matter. */
+trait AsciiSpaceFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] = super.filter(charArray.filter(_ != ' '))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala
new file mode 100755
index 0000000..7b0c810
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala
@@ -0,0 +1,15 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII symbols do not matter. */
+trait AsciiSymbolFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(
+ charArray.filter(c =>
+ !((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126))
+ )
+ )
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala
new file mode 100755
index 0000000..5cb5e94
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala
@@ -0,0 +1,15 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures only ASCII symbols matter. */
+trait AsciiSymbolOnlyFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(
+ charArray.filter(c =>
+ ((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126))
+ )
+ )
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala
new file mode 100755
index 0000000..54fe66f
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala
@@ -0,0 +1,11 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+/** Ensures ASCII letter case-sensitivity does not matter. */
+trait IgnoreAsciiLetterCaseFilter extends StringFilter {
+ abstract override def filter(charArray: Array[Char]): Array[Char] =
+ super.filter(charArray.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c))
+
+ abstract override def filter(string: String): String = filter(string.toCharArray).mkString
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala
new file mode 100755
index 0000000..8ece42d
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala
@@ -0,0 +1,9 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.StringFilter
+
+class StringFilterDelegate extends StringFilter {
+ override def filter(charArray: Array[Char]): Array[Char] = charArray
+
+ override def filter(string: String): String = string
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/package.scala b/core/source/core/scala/com/rockymadden/stringmetric/package.scala
new file mode 100755
index 0000000..6752f4d
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/package.scala
@@ -0,0 +1,7 @@
+package com.rockymadden
+
+package object stringmetric {
+ type CompareTuple[T] = (Array[T], Array[T])
+
+ type MatchTuple[T] = (Array[T], Array[T])
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
new file mode 100755
index 0000000..c580fd3
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala
@@ -0,0 +1,122 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
+import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import scala.annotation.{switch, tailrec}
+
+/** An implementation of the Metaphone algorithm. */
+class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
+ final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
+ val fca = filter(charArray)
+
+ if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ else {
+ val th = deduplicate(transcodeHead(fca.map(_.toLower)))
+ val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
+
+ if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
+ }
+ }
+
+ final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
+ compute(string.toCharArray).map(_.mkString)
+
+ private[this] def deduplicate(ca: Array[Char]) =
+ if (ca.length <= 1) ca
+ else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+
+ @tailrec
+ private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ if (c == '\0' && r.length == 0) o
+ else {
+ def shift(d: Int, ca: Array[Char]) = {
+ val sca = r.splitAt(d - 1)
+
+ (
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
+ ca
+ )
+ }
+
+ val t = {
+ (c: @switch) match {
+ case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o)
+ case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c)
+ case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b')
+ case 'c' =>
+ if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k')
+ else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x')
+ else if ((r.length >= 1 && r.head == 'h')
+ || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x')
+ else if (l.length >= 1 && r.length >= 1 && l.last == 's'
+ && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o)
+ else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's')
+ else shift(1, o :+ 'k')
+ case 'd' =>
+ if (r.length >= 2 && r.head == 'g'
+ && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j')
+ else shift(1, o :+ 't')
+ case 'g' =>
+ if ((r.length > 1 && r.head == 'h')
+ || (r.length == 1 && r.head == 'n')
+ || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o)
+ else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j')
+ else shift(1, o :+ 'k')
+ case 'h' =>
+ if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head)))
+ || (l.length >= 2 && l.last == 'h'
+ && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p'
+ || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o)
+ else shift(1, o :+ 'h')
+ case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k')
+ case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p')
+ case 'q' => shift(1, o :+ 'k')
+ case 's' =>
+ if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x')
+ else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x')
+ else shift(1, o :+ 's')
+ case 't' =>
+ if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x')
+ else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0')
+ else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o)
+ else shift(1, o :+ 't')
+ case 'v' => shift(1, o :+ 'f')
+ case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c)
+ case 'x' => shift(1, (o :+ 'k') :+ 's')
+ case 'z' => shift(1, o :+ 's')
+ case _ => shift(1, o)
+ }
+ }
+
+ transcode(t._1, t._2, t._3, t._4)
+ }
+ }
+
+ private[this] def transcodeHead(ca: Array[Char]) = {
+ (ca.length: @switch) match {
+ case 0 => ca
+ case 1 => if (ca.head == 'x') Array('s') else ca
+ case _ =>
+ (ca.head: @switch) match {
+ case 'a' if (ca(1) == 'e') => ca.tail
+ case 'g' | 'k' | 'p' if (ca(1) == 'n') => ca.tail
+ case 'w' if (ca(1) == 'r') => ca.tail
+ case 'w' if (ca(1) == 'h') => 'w' +: ca.drop(2)
+ case 'x' => 's' +: ca.tail
+ case _ => ca
+ }
+ }
+ }
+}
+
+object MetaphoneAlgorithm {
+ private lazy val self = apply()
+
+ def apply(): MetaphoneAlgorithm = new MetaphoneAlgorithm with StringFilter
+
+ def compute(charArray: Array[Char]) = self.compute(charArray)
+
+ def compute(string: String) = self.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
new file mode 100755
index 0000000..2975ad3
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala
@@ -0,0 +1,32 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+
+/** An implementation of the Metaphone metric. */
+class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Boolean] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
+ else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 =>
+ MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_))
+ )
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object MetaphoneMetric {
+ private lazy val self = apply()
+
+ def apply(): MetaphoneMetric = new MetaphoneMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
new file mode 100755
index 0000000..ff0b3d6
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala
@@ -0,0 +1,131 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
+import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import scala.annotation.{switch, tailrec}
+
+/** An implementation of the NYSIIS algorithm. */
+class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
+ final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
+ val fca = filter(charArray)
+
+ if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ else {
+ val tr = transcodeRight(fca.map(_.toLower))
+ val tl = transcodeLeft(tr._1)
+ val t =
+ if (tl._2.length == 0) tl._1 ++ tr._2
+ else tl._1 ++ transcodeCenter(
+ Array.empty[Char],
+ tl._2.head,
+ if (tl._2.length > 1) tl._2.tail else Array.empty[Char],
+ Array.empty[Char]
+ ) ++ tr._2
+
+ if (t.length == 1) Some(t)
+ else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail))))
+ }
+ }
+
+ final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
+ compute(string.toCharArray).map(_.mkString)
+
+ private[this] def cleanLast(ca: Array[Char]) =
+ if (ca.length == 0) ca
+ else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length)
+ else ca
+
+ private[this] def cleanTerminal(ca: Array[Char]) =
+ if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
+ else ca
+
+ private[this] def deduplicate(ca: Array[Char]) =
+ if (ca.length <= 1) ca
+ else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+
+ @tailrec
+ private[this] def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ if (c == '\0' && r.length == 0) o
+ else {
+ def shift(d: Int, ca: Array[Char]) = {
+ val sca = r.splitAt(d - 1)
+
+ (
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
+ ca
+ )
+ }
+
+ val t = {
+ (c: @switch) match {
+ case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a')
+ case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c)
+ case 'e' =>
+ if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
+ else shift(1, o :+ 'a')
+ case 'h' =>
+ if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))) shift(1, o)
+ else shift(1, o :+ c)
+ case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
+ case 'm' => shift(1, o :+ 'n')
+ case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
+ case 'q' => shift(1, o :+ 'g')
+ case 's' =>
+ if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
+ else shift(1, o :+ c)
+ case 'w' =>
+ if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o)
+ else shift(1, o :+ c)
+ case 'z' => shift(1, o :+ 's')
+ case _ => shift(1, o)
+ }
+ }
+
+ transcodeCenter(t._1, t._2, t._3, t._4)
+ }
+ }
+
+ private[this] def transcodeLeft(ca: Array[Char]) = {
+ if (ca.length == 0) (Array.empty[Char], ca)
+ else {
+ lazy val tr2 = ca.takeRight(ca.length - 2)
+ lazy val tr3 = ca.takeRight(ca.length - 3)
+
+ (ca.head: @switch) match {
+ case 'k' if (ca.length >= 2 && ca(1) == 'n') => (Array('n', 'n'), tr2)
+ case 'k' => (Array('c'), ca.tail)
+ case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => (Array('m', 'c'), tr3)
+ case 'p' if (ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f')) => (Array('f', 'f'), tr2)
+ case 's' if (ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h')) => (Array('s', 's'), tr3)
+ case _ => (Array(ca.head), ca.tail)
+ }
+ }
+ }
+
+ private[this] def transcodeRight(ca: Array[Char]) = {
+ if (ca.length >= 2) {
+ val lc = ca(ca.length - 1)
+ val lcm1 = ca(ca.length - 2)
+ lazy val t2 = ca.take(ca.length - 2)
+
+ (lc: @switch) match {
+ case 'd' if (lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
+ case 'e' if (lcm1 == 'e' || lcm1 == 'i') => (t2, Array('y'))
+ case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d'))
+ case _ => (ca, Array.empty[Char])
+ }
+ } else (ca, Array.empty[Char])
+ }
+}
+
+object NysiisAlgorithm {
+ private lazy val self = apply()
+
+ def apply(): NysiisAlgorithm = new NysiisAlgorithm with StringFilter
+
+ def compute(charArray: Array[Char]) = self.compute(charArray)
+
+ def compute(string: String) = self.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
new file mode 100755
index 0000000..6d1c22c
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala
@@ -0,0 +1,40 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+
+/** An implementation of the NYSIIS metric. */
+class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Boolean] = {
+
+ val unequal = (c1: Char, c2: Char) => {
+ val lc1 = c1.toLower
+ val lc2 = c2.toLower
+
+ (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
+ }
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
+ else if (unequal(fca1.head, fca2.head)) Some(false)
+ else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 =>
+ NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_))
+ )
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object NysiisMetric {
+ private lazy val self = apply()
+
+ def apply(): NysiisMetric = new NysiisMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
new file mode 100755
index 0000000..334e9e3
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala
@@ -0,0 +1,135 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
+import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
+import scala.annotation.{switch, tailrec}
+
+/** An implementation of the refined NYSIIS algorithm. */
+class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
+ final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
+ val fca = filter(charArray)
+
+ if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ else {
+ val lfca = fca.map(_.toLower)
+ val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z'))))
+ val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char])
+
+ if (t.length == 1) Some(t)
+ else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a')))))
+ }
+ }
+
+ final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
+ compute(string.toCharArray).map(_.mkString)
+
+ private[this] def cleanLast(ca: Array[Char], s: Set[Char]) =
+ if (ca.length == 0) ca
+ else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length)
+ else ca
+
+ private[this] def cleanTerminal(ca: Array[Char]) =
+ if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y'
+ else ca
+
+ private[this] def deduplicate(ca: Array[Char]) =
+ if (ca.length <= 1) ca
+ else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
+
+ @tailrec
+ private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
+ if (c == '\0' && r.length == 0) o
+ else {
+ def shift(d: Int, ca: Array[Char]) = {
+ val sca = r.splitAt(d - 1)
+
+ (
+ if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
+ if (sca._2.length > 0) sca._2.head else '\0',
+ if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
+ ca
+ )
+ }
+
+ val t = {
+ (c: @switch) match {
+ case 'a' | 'i' | 'o' | 'u' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else shift(1, o :+ 'a')
+ case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c)
+ case 'd' =>
+ if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c)
+ case 'e' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f'))
+ else shift(1, o :+ 'a')
+ case 'g' =>
+ if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't'))
+ else shift(1, o :+ c)
+ case 'h' =>
+ if (l.length == 0) shift(1, o :+ c)
+ else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))) shift(1, o)
+ else shift(1, o :+ c)
+ case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c')
+ case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n')
+ case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c)
+ case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g')
+ case 's' =>
+ if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c)
+ else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c)
+ else shift(1, o :+ c)
+ case 'w' =>
+ if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o)
+ else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r')
+ else shift(1, o :+ c)
+ case 'y' =>
+ if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a')
+ else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c)
+ else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a')
+ else shift(1, o :+ c)
+ case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's')
+ case _ => shift(1, o)
+ }
+ }
+
+ transcode(t._1, t._2, t._3, t._4)
+ }
+ }
+
+ private[this] def transcodeHead(ca: Array[Char]) = {
+ if (ca.length == 0) ca
+ else
+ (ca.head: @switch) match {
+ case 'm' if (ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c') => Array('m', 'c') ++ ca.takeRight(ca.length - 3)
+ case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2)
+ case _ => ca
+ }
+ }
+
+ private[this] def transcodeLast(ca: Array[Char]) = {
+ if (ca.length >= 2) {
+ val lc = ca(ca.length - 1)
+ val lcm1 = ca(ca.length - 2)
+ lazy val t2 = ca.take(ca.length - 2)
+
+ (lc: @switch) match {
+ case 'd' if (lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
+ case 'e' if (lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y') => t2 :+ 'y'
+ case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd'
+ case 'x' if (lcm1 == 'e') => t2 ++ Array('e', 'c')
+ case 'x' if (lcm1 == 'i') => t2 ++ Array('i', 'c')
+ case _ => ca
+ }
+ } else ca
+ }
+}
+
+object RefinedNysiisAlgorithm {
+ private lazy val self = apply()
+
+ def apply(): RefinedNysiisAlgorithm = new RefinedNysiisAlgorithm with StringFilter
+
+ def compute(charArray: Array[Char]) = self.compute(charArray)
+
+ def compute(string: String) = self.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
new file mode 100755
index 0000000..c96cc52
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala
@@ -0,0 +1,40 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+
+/** An implementation of the refined NYSIIS metric. */
+class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Boolean] = {
+
+ val unequal = (c1: Char, c2: Char) => {
+ val lc1 = c1.toLower
+ val lc2 = c2.toLower
+
+ (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2)
+ }
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
+ else if (unequal(fca1.head, fca2.head)) Some(false)
+ else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 =>
+ RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_))
+ )
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object RefinedNysiisMetric {
+ private lazy val self = apply()
+
+ def apply(): RefinedNysiisMetric = new RefinedNysiisMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
new file mode 100755
index 0000000..f22bde1
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala
@@ -0,0 +1,75 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+import scala.annotation.{switch, tailrec}
+
+/** An implementation of the refined Soundex algorithm. */
+class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
+ final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
+ val fca = filter(charArray)
+
+ if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ else Some(transcode(fca, Array(fca.head.toLower)))
+ }
+
+ final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
+ compute(string.toCharArray).map(_.mkString)
+
+ @tailrec
+ private[this] def transcode(i: Array[Char], o: Array[Char]): Array[Char] = {
+ if (i.length == 0) o
+ else {
+ val c = i.head.toLower
+ val m2 = (mc: Char) => (mc: @switch) match {
+ case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0'
+ case 'b' | 'p' => '1'
+ case 'f' | 'v' => '2'
+ case 'c' | 'k' | 's' => '3'
+ case 'g' | 'j' => '4'
+ case 'q' | 'x' | 'z' => '5'
+ case 'd' | 't' => '6'
+ case 'l' => '7'
+ case 'm' | 'n' => '8'
+ case 'r' => '9'
+ case _ => '\0'
+ }
+ val m1 = (mc: Char, pc: Char) => (mc: @switch) match {
+ case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0'
+ case 'b' | 'p' if pc != '1' => '1'
+ case 'f' | 'v' if pc != '2' => '2'
+ case 'c' | 'k' | 's' if pc != '3' => '3'
+ case 'g' | 'j' if pc != '4' => '4'
+ case 'q' | 'x' | 'z' if pc != '5' => '5'
+ case 'd' | 't' if pc != '6' => '6'
+ case 'l' if pc != '7' => '7'
+ case 'm' | 'n' if pc != '8' => '8'
+ case 'r' if pc != '9' => '9'
+ case _ => '\0'
+ }
+ val a =
+ // Code twice.
+ if (o.length == 1) m2(c)
+ // Code once.
+ else m1(
+ c,
+ (o.last: @switch) match {
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last
+ case _ => m2(o.last)
+ }
+ )
+
+ transcode(i.tail, if (a != '\0') o :+ a else o)
+ }
+ }
+}
+
+object RefinedSoundexAlgorithm {
+ private lazy val self = apply()
+
+ def apply(): RefinedSoundexAlgorithm = new RefinedSoundexAlgorithm with StringFilter
+
+ def compute(charArray: Array[Char]) = self.compute(charArray)
+
+ def compute(string: String) = self.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
new file mode 100755
index 0000000..eb2f01e
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+
+/** An implementation of the refined Soundex metric. */
+class RefinedSoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Boolean] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
+ else if (fca1.head.toLower != fca2.head.toLower) Some(false)
+ else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 =>
+ RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_))
+ )
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object RefinedSoundexMetric {
+ private lazy val self = apply()
+
+ def apply(): RefinedSoundexMetric = new RefinedSoundexMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
new file mode 100755
index 0000000..361047d
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala
@@ -0,0 +1,73 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+import scala.annotation.{switch, tailrec}
+
+/** An implementation of the Soundex algorithm. */
+class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
+ final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
+ val fca = filter(charArray)
+
+ if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
+ else {
+ val fc = fca.head.toLower
+
+ Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0'))
+ }
+ }
+
+ final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
+ compute(string.toCharArray).map(_.mkString)
+
+ @tailrec
+ private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = {
+ if (i.length == 0) o
+ else {
+ val c = i.head.toLower
+ val m2 = (mc: Char) => (mc: @switch) match {
+ case 'b' | 'f' | 'p' | 'v' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2'
+ case 'd' | 't' => '3'
+ case 'l' => '4'
+ case 'm' | 'n' => '5'
+ case 'r' => '6'
+ case _ => '\0'
+ }
+ val m1 = (mc: Char, pc: Char) => (mc: @switch) match {
+ case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1'
+ case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2'
+ case 'd' | 't' if pc != '3' => '3'
+ case 'l' if pc != '4' => '4'
+ case 'm' | 'n' if pc != '5' => '5'
+ case 'r' if pc != '6' => '6'
+ case _ => '\0'
+ }
+ val a = pc match {
+ // Code twice.
+ case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c)
+ // Code once.
+ case _ => m1(
+ c,
+ (o.last: @switch) match {
+ case '1' | '2' | '3' | '4' | '5' | '6' => o.last
+ case _ => m2(o.last)
+ }
+ )
+ }
+
+ if (o.length == 3 && a != '\0') o :+ a
+ else transcode(i.tail, c, if (a != '\0') o :+ a else o)
+ }
+ }
+}
+
+object SoundexAlgorithm {
+ private lazy val self = apply()
+
+ def apply(): SoundexAlgorithm = new SoundexAlgorithm with StringFilter
+
+ def compute(charArray: Array[Char]) = self.compute(charArray)
+
+ def compute(string: String) = self.compute(string)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
new file mode 100755
index 0000000..e4daa17
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+import com.rockymadden.stringmetric.Alphabet.Alpha
+
+/** An implementation of the Soundex metric. */
+class SoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Boolean] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None
+ else if (fca1.head.toLower != fca2.head.toLower) Some(false)
+ else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 =>
+ SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_))
+ )
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object SoundexMetric {
+ private lazy val self = apply()
+
+ def apply(): SoundexMetric = new SoundexMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
new file mode 100755
index 0000000..5e01bb1
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala
@@ -0,0 +1,42 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
+import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+
+/**
+ * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required.
+ * Traditionally, the algorithm uses bigrams.
+ */
+class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
+ if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
+ NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
+
+ (2d * ms) / (ca1bg.length + ca2bg.length)
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)(n: Int)
+
+ private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+}
+
+object DiceSorensenMetric {
+ private lazy val self = apply()
+
+ def apply(): DiceSorensenMetric = new DiceSorensenMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
+
+ def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
new file mode 100755
index 0000000..95ff203
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala
@@ -0,0 +1,37 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+
+/** An implementation of the Hamming metric. */
+class HammingMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Int] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None
+ else if (fca1.sameElements(fca2)) Some(0)
+ else Some(hamming(fca1, fca2))
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] =
+ compare(string1.toCharArray, string2.toCharArray)
+
+ private[this] def hamming(ct: CompareTuple[Char]) = {
+ require(ct._1.length == ct._2.length)
+
+ if (ct._1.length == 0) 0
+ else ct._1.zip(ct._2).count(t => t._1 != t._2)
+ }
+}
+
+object HammingMetric {
+ private lazy val self = apply()
+
+ def apply(): HammingMetric = new HammingMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
new file mode 100755
index 0000000..e32c926
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala
@@ -0,0 +1,37 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{StringMetric, StringFilter}
+import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+
+/* An implementation of the Jaccard metric. */
+class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
+ if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
+ NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length
+
+ i.toDouble / (ca1bg.length + ca2bg.length - i)
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)(n: Int)
+}
+
+object JaccardMetric {
+ private lazy val self = apply()
+
+ def apply(): JaccardMetric = new JaccardMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
+
+ def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
new file mode 100755
index 0000000..b7ce2c5
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala
@@ -0,0 +1,87 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, MatchTuple, StringFilter, StringMetric}
+import scala.collection.mutable.{ArrayBuffer, HashSet}
+
+/**
+ * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched
+ * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios.
+ */
+class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Double] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else {
+ val mt = `match`(fca1, fca2)
+ val ms = scoreMatches(mt._1, mt._2)
+
+ if (ms == 0) Some(0d)
+ else {
+ val ts = scoreTranspositions(mt._1, mt._2)
+
+ Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3)
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)
+
+ private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = {
+ lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1)
+ val one = ArrayBuffer.empty[Int]
+ val two = HashSet.empty[Int]
+ var i = 0
+ var bi = false
+
+ while (i < ct._1.length && !bi) {
+ val start = if (i - window <= 0) 0 else i - window
+ val end = if (i + window >= ct._2.length - 1) ct._2.length - 1 else i + window
+
+ if (start > ct._2.length - 1) bi = !bi
+ else {
+ var ii = start
+ var bii = false
+
+ while (ii <= end && !bii) {
+ if (!two.contains(ii) && ct._1(i) == ct._2(ii)) {
+ one += i
+ two += ii
+ bii = !bii
+ } else ii += 1
+ }
+
+ i += 1
+ }
+ }
+
+ (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_)))
+ }
+
+ private[this] def scoreMatches(mt: MatchTuple[Char]) = {
+ require(mt._1.length == mt._2.length)
+
+ mt._1.length
+ }
+
+ private[this] def scoreTranspositions(mt: MatchTuple[Char]) = {
+ require(mt._1.length == mt._2.length)
+
+ (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt
+ }
+}
+
+object JaroMetric {
+ private lazy val self = apply()
+
+ def apply(): JaroMetric = new JaroMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
new file mode 100755
index 0000000..4e9aebd
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala
@@ -0,0 +1,40 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{StringFilter, StringMetric}
+
+/**
+ * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is
+ * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios
+ * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722).
+ */
+class JaroWinklerMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Double] = {
+
+ val fca1 = filter(charArray1)
+ val fca2 = filter(charArray2)
+
+ JaroMetric.compare(fca1, fca2).map {
+ case 0d => 0d
+ case 1d => 1d
+ case jaro => {
+ val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2)
+
+ jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro))
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)
+}
+
+object JaroWinklerMetric {
+ private lazy val self = apply()
+
+ def apply(): JaroWinklerMetric = new JaroWinklerMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
new file mode 100755
index 0000000..47dff23
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala
@@ -0,0 +1,58 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+
+/** An implementation of the Levenshtein metric. */
+class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Int] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(0)
+ else Some(levenshtein(fca1, fca2))
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] =
+ compare(string1.toCharArray, string2.toCharArray)
+
+ private[this] def levenshtein(ct: CompareTuple[Char]) = {
+ val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1)
+
+ def distance(t: (Int, Int)): Int = {
+ t match {
+ case (r, 0) => r
+ case (0, c) => c
+ case (r, c) if m(r)(c) != -1 => m(r)(c)
+ case (r, c) => {
+ val min =
+ if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1)
+ else math.min(
+ math.min(
+ distance(r - 1, c) + 1, // Delete (left).
+ distance(r, c - 1) + 1 // Insert (up).
+ ),
+ distance(r - 1, c - 1) + 1 // Substitute (left-up).
+ )
+
+ m(r)(c) = min
+ min
+ }
+ }
+ }
+
+ distance(ct._1.length, ct._2.length)
+ }
+}
+
+object LevenshteinMetric {
+ private lazy val self = apply()
+
+ def apply(): LevenshteinMetric = new LevenshteinMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
new file mode 100755
index 0000000..e74e8eb
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala
@@ -0,0 +1,40 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
+import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+import scala.math
+
+/** An implementation of the N-Gram metric. */
+class NGramMetric extends StringMetric[Int, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
+ if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
+ NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString)))
+
+ ms.toDouble / math.max(ca1bg.length, ca2bg.length)
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)(n)
+
+ private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+}
+
+object NGramMetric {
+ private lazy val self = apply()
+
+ def apply(): NGramMetric = new NGramMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
+
+ def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
new file mode 100755
index 0000000..a543a7e
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala
@@ -0,0 +1,40 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter}
+import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+import scala.math
+
+/* An implementation of the overlap metric. */
+class OverlapMetric extends StringMetric[Int, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = {
+ if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare.
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg =>
+ NGramTokenizer.tokenize(fca2)(n).map { ca2bg =>
+ val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString))
+
+ ms.toDouble / (math.min(ca1bg.length, ca2bg.length))
+ }
+ }
+ }
+
+ final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)(n: Int)
+
+ private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length
+}
+
+object OverlapMetric {
+ private lazy val self = apply()
+
+ def apply(): OverlapMetric = new OverlapMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n)
+
+ def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
new file mode 100755
index 0000000..1017b1f
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala
@@ -0,0 +1,57 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric}
+
+/** An implementation of the Ratcliff/Obershelp metric. */
+class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter =>
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit di: DummyImplicit): Option[Double] = {
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(1d)
+ else Some(2d * commonSequences(fca1, fca2).foldLeft(0)(_ + _.length) / (fca1.length + fca2.length))
+ }
+
+ final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] =
+ compare(string1.toCharArray, string2.toCharArray)
+
+ private[this] def longestCommonSubsequence(ct: CompareTuple[Char]) = {
+ val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1)
+ var lrc = (0, 0, 0) // Length, row, column.
+
+ for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) {
+ if (ct._1(r) == ct._2(c)) {
+ val l = m(r)(c) + 1
+ m(r + 1)(c + 1) = l
+ if (l > lrc._1) lrc = (l, r + 1, c + 1)
+ }
+ }
+
+ lrc
+ }
+
+ private[this] def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = {
+ val lcs = longestCommonSubsequence(ct)
+
+ if (lcs._1 == 0) Array.empty
+ else {
+ val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2))
+ val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3))
+
+ Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ commonSequences(sct1._1, sct2._1) ++ commonSequences(sct1._2, sct2._2)
+ }
+ }
+}
+
+object RatcliffObershelpMetric {
+ private lazy val self = apply()
+
+ def apply(): RatcliffObershelpMetric = new RatcliffObershelpMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2)
+
+ def compare(string1: String, string2: String) = self.compare(string1, string2)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
new file mode 100755
index 0000000..976b01a
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala
@@ -0,0 +1,61 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.{CompareTuple, StringMetric, StringFilter}
+import scala.math.BigDecimal
+
+/** An implementation of a weighted Levenshtein metric. */
+class WeightedLevenshteinMetric
+ extends StringMetric[(BigDecimal, BigDecimal, BigDecimal), Double] { this: StringFilter =>
+
+ /** Options order is delete, insert, then substitute weight. */
+ final override def compare(charArray1: Array[Char], charArray2: Array[Char])
+ (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = {
+
+ if (options._1 < 0 || options._2 < 0 || options._3 < 0)
+ throw new IllegalArgumentException("Expected valid weight options.")
+
+ val fca1 = filter(charArray1)
+ lazy val fca2 = filter(charArray2)
+
+ if (fca1.length == 0 || fca2.length == 0) None
+ else if (fca1.sameElements(fca2)) Some(0d)
+ else Some(weightedLevenshtein((fca1, fca2), options).toDouble)
+ }
+
+ /** Options order is delete, insert, then substitute weight. */
+ final override def compare(string1: String, string2: String)
+ (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] =
+
+ compare(string1.toCharArray, string2.toCharArray)(options)
+
+ private[this] def weightedLevenshtein(ct: CompareTuple[Char], w: (BigDecimal, BigDecimal, BigDecimal)) = {
+ val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1)
+
+ for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r
+ for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c
+
+ for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) {
+ m(r)(c) =
+ if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1)
+ else (m(r - 1)(c) + w._1).min( // Delete (left).
+ (m(r)(c - 1) + w._2).min( // Insert (up).
+ m(r - 1)(c - 1) + w._3 // Substitute (left-up).
+ )
+ )
+ }
+
+ m(ct._1.length)(ct._2.length)
+ }
+}
+
+object WeightedLevenshteinMetric {
+ private lazy val self = apply()
+
+ def apply(): WeightedLevenshteinMetric = new WeightedLevenshteinMetric with StringFilter
+
+ def compare(charArray1: Array[Char], charArray2: Array[Char])(options: (BigDecimal, BigDecimal, BigDecimal)) =
+ self.compare(charArray1, charArray2)(options)
+
+ def compare(string1: String, string2: String)(options: (BigDecimal, BigDecimal, BigDecimal)) =
+ self.compare(string1, string2)(options)
+}
diff --git a/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala
new file mode 100755
index 0000000..d66fd62
--- /dev/null
+++ b/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala
@@ -0,0 +1,37 @@
+package com.rockymadden.stringmetric.tokenization
+
+import com.rockymadden.stringmetric.{StringFilter, StringTokenizer}
+import scala.annotation.tailrec
+
+/** An implementation of the N-Gram tokenizer. */
+class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringFilter =>
+ final override def tokenize(charArray: Array[Char])(implicit n: Int): Option[Array[Array[Char]]] = {
+ if (n <= 0) throw new IllegalArgumentException("Expected valid n.")
+
+ val fca = filter(charArray)
+
+ if (fca.length < n) None
+ else Some(sequence(fca, Array.empty[Array[Char]], n))
+ }
+
+ final override def tokenize(string: String)(implicit n: Int): Option[Array[String]] =
+ tokenize(string.toCharArray)(n).map(_.map(_.mkString))
+
+ @tailrec
+ private[this] def sequence(i: Array[Char], o: Array[Array[Char]], n: Int): Array[Array[Char]] = {
+ require(n > 0)
+
+ if (i.length <= n) o :+ i
+ else sequence(i.tail, o :+ i.take(n), n)
+ }
+}
+
+object NGramTokenizer {
+ private lazy val self = apply()
+
+ def apply(): NGramTokenizer = new NGramTokenizer with StringFilter
+
+ def tokenize(charArray: Array[Char])(n: Int) = self.tokenize(charArray)(n)
+
+ def tokenize(string: String)(n: Int) = self.tokenize(string)(n)
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala
new file mode 100755
index 0000000..5bb503a
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala
@@ -0,0 +1,96 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.Alphabet.{Alpha, Vowel}
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AlphabetSpec extends ScalaTest {
+ "Alphabet" should provide {
+ "an overloaded isSuperset method which accepts Char" when passed {
+ "non-alphabet argument" should returns {
+ "false" in {
+ Alpha isSuperset '0' should be (false)
+ }
+ }
+ "alphabet argument" should returns {
+ "true" in {
+ Alpha isSuperset 'a' should be (true)
+ Alpha isSuperset 'A' should be (true)
+ }
+ }
+ "non-vowel argument" should returns {
+ "false" in {
+ Vowel isSuperset 'y' should be (false)
+ }
+ }
+ "vowel argument" should returns {
+ "true" in {
+ Vowel isSuperset 'a' should be (true)
+ Vowel isSuperset 'A' should be (true)
+ }
+ }
+ }
+ "an overloaded isSuperset method which accepts Array[Char]" when passed {
+ "empty argument" should returns {
+ "false" in {
+ Alpha isSuperset Array.empty[Char] should be (false)
+ }
+ }
+ "non-alphabet argument" should returns {
+ "false" in {
+ Alpha isSuperset "hi!".toCharArray should be (false)
+ Alpha isSuperset "helloworld!".toCharArray should be (false)
+ }
+ }
+ "alphabet argument" should returns {
+ "true" in {
+ Alpha isSuperset "hi".toCharArray should be (true)
+ Alpha isSuperset "helloworld".toCharArray should be (true)
+ Alpha isSuperset "HI".toCharArray should be (true)
+ Alpha isSuperset "HELLOWORLD".toCharArray should be (true)
+ }
+ }
+ "non-vowel argument" should returns {
+ "false" in {
+ Vowel isSuperset "y".toCharArray should be (false)
+ }
+ }
+ "vowel argument" should returns {
+ "true" in {
+ Vowel isSuperset "a".toCharArray should be (true)
+ Vowel isSuperset "A".toCharArray should be (true)
+ }
+ }
+ }
+ "an overloaded isSuperset method which accepts String" when passed {
+ "empty argument" should returns {
+ "false" in {
+ Alpha isSuperset "" should be (false)
+ }
+ }
+ "non-alphabet argument" should returns {
+ "false" in {
+ Alpha isSuperset "helloworld!" should be (false)
+ }
+ }
+ "alphabet argument" should returns {
+ "true" in {
+ Alpha isSuperset "helloworld" should be (true)
+ Alpha isSuperset "HELLOWORLD" should be (true)
+ }
+ }
+ "non-vowel argument" should returns {
+ "false" in {
+ Vowel isSuperset "y" should be (false)
+ }
+ }
+ "vowel argument" should returns {
+ "true" in {
+ Vowel isSuperset "a" should be (true)
+ Vowel isSuperset "A" should be (true)
+ }
+ }
+ }
+ }
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala
new file mode 100755
index 0000000..e900f83
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala
@@ -0,0 +1,38 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.filter.AsciiNumberFilter
+import com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm
+import com.rockymadden.stringmetric.similarity.DiceSorensenMetric
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class FilterDecoratedSpec extends ScalaTest {
+ import FilterDecoratedSpec.{Algorithm, Metric}
+
+ "Filter decorated metrics" should provide {
+ "compare method" when passed {
+ "filterable arguments" should returns {
+ "filtered results" in {
+ Metric.compare("123", "456")(1).isDefined should be (false)
+ Metric.compare("ni123ght", "na456cht")(1).get should be (0.6)
+ }
+ }
+ }
+ }
+ "Filter decorated algorithms" should provide {
+ "compute method" when passed {
+ "filterable argument" should returns {
+ "filtered results" in {
+ Algorithm.compute("456").isDefined should be (false)
+ Algorithm.compute("du123mb456").get should equal ("tm")
+ }
+ }
+ }
+ }
+}
+
+object FilterDecoratedSpec {
+ private final val Algorithm = new MetaphoneAlgorithm with AsciiNumberFilter
+ private final val Metric = new DiceSorensenMetric with AsciiNumberFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala b/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala
new file mode 100755
index 0000000..5f4ab62
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala
@@ -0,0 +1,18 @@
+package com.rockymadden.stringmetric
+
+import org.scalatest.{BeforeAndAfter, ParallelTestExecution, WordSpec}
+import org.scalatest.matchers.ShouldMatchers
+
+trait ScalaTest extends WordSpec with ShouldMatchers with BeforeAndAfter with ParallelTestExecution {
+ def allows = afterWord("allow")
+
+ def executes = afterWord("execute")
+
+ def passed = afterWord("passed")
+
+ def provide = afterWord("provide")
+
+ def returns = afterWord("return")
+
+ def throws = afterWord("throw")
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala
new file mode 100755
index 0000000..7ce0c24
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala
@@ -0,0 +1,59 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.phonetic._
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class StringAlgorithmSpec extends ScalaTest {
+ "StringAlgorithm standalone object" should provide {
+ "compute method, type, and companion object pass-throughs" in {
+ val metaphone: StringAlgorithm.Metaphone = StringAlgorithm.Metaphone()
+
+ metaphone.compute("testone").get should
+ equal (StringAlgorithm.computeWithMetaphone("testone").get)
+ metaphone.compute("testone".toCharArray).get should
+ equal (StringAlgorithm.computeWithMetaphone("testone".toCharArray).get)
+ metaphone.compute("testone".toCharArray).get should
+ equal (MetaphoneAlgorithm.compute("testone".toCharArray).get)
+
+ val nysiis: StringAlgorithm.Nysiis = StringAlgorithm.Nysiis()
+
+ nysiis.compute("testone").get should
+ equal (StringAlgorithm.computeWithNysiis("testone").get)
+ nysiis.compute("testone".toCharArray).get should
+ equal (StringAlgorithm.computeWithNysiis("testone".toCharArray).get)
+ nysiis.compute("testone".toCharArray).get should
+ equal (NysiisAlgorithm.compute("testone".toCharArray).get)
+
+ val refinedNysiis: StringAlgorithm.RefinedNysiis = StringAlgorithm.RefinedNysiis()
+
+ refinedNysiis.compute("testone").get should
+ equal (StringAlgorithm.computeWithRefinedNysiis("testone").get)
+ refinedNysiis.compute("testone".toCharArray).get should
+ equal (StringAlgorithm.computeWithRefinedNysiis("testone".toCharArray).get)
+ refinedNysiis.compute("testone".toCharArray).get should
+ equal (RefinedNysiisAlgorithm.compute("testone".toCharArray).get)
+
+ val refinedSoundex: StringAlgorithm.RefinedSoundex = StringAlgorithm.RefinedSoundex()
+
+ refinedSoundex.compute("testone").get should
+ equal (StringAlgorithm.computeWithRefinedSoundex("testone").get)
+ refinedSoundex.compute("testone".toCharArray).get should
+ equal (StringAlgorithm.computeWithRefinedSoundex("testone".toCharArray).get)
+ refinedSoundex.compute("testone".toCharArray).get should
+ equal (RefinedSoundexAlgorithm.compute("testone".toCharArray).get)
+
+ val soundex: StringAlgorithm.Soundex = StringAlgorithm.Soundex()
+
+ soundex.compute("testone").get should
+ equal (StringAlgorithm.computeWithSoundex("testone").get)
+ soundex.compute("testone".toCharArray).get should
+ equal (StringAlgorithm.computeWithSoundex("testone".toCharArray).get)
+ soundex.compute("testone".toCharArray).get should
+ equal (SoundexAlgorithm.compute("testone".toCharArray).get)
+ }
+ }
+}
+
+
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala
new file mode 100755
index 0000000..ca99bff
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala
@@ -0,0 +1,141 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.phonetic._
+import com.rockymadden.stringmetric.similarity._
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class StringMetricSpec extends ScalaTest {
+ "StringMetric standalone object" should provide {
+ "compare method, type, and companion object pass-throughs" in {
+ val diceSorensen: StringMetric.DiceSorensen = StringMetric.DiceSorensen()
+
+ diceSorensen.compare("testone", "testtwo")(1).get should
+ equal (StringMetric.compareWithDiceSorensen("testone", "testtwo")(1).get)
+ diceSorensen.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (StringMetric.compareWithDiceSorensen("testone".toCharArray, "testtwo".toCharArray)(1).get)
+ diceSorensen.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (DiceSorensenMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get)
+
+ val hamming: StringMetric.Hamming = StringMetric.Hamming()
+
+ hamming.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithHamming("testone", "testtwo").get)
+ hamming.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithHamming("testone".toCharArray, "testtwo".toCharArray).get)
+ hamming.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (HammingMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val jaccard: StringMetric.Jaccard = StringMetric.Jaccard()
+
+ jaccard.compare("testone", "testtwo")(1).get should
+ equal (StringMetric.compareWithJaccard("testone", "testtwo")(1).get)
+ jaccard.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (StringMetric.compareWithJaccard("testone".toCharArray, "testtwo".toCharArray)(1).get)
+ jaccard.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (JaccardMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get)
+
+ val jaro: StringMetric.Jaro = StringMetric.Jaro()
+
+ jaro.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithJaro("testone", "testtwo").get)
+ jaro.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithJaro("testone".toCharArray, "testtwo".toCharArray).get)
+ jaro.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (JaroMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val jaroWinkler: StringMetric.JaroWinkler = StringMetric.JaroWinkler()
+
+ jaroWinkler.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithJaroWinkler("testone", "testtwo").get)
+ jaroWinkler.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithJaroWinkler("testone".toCharArray, "testtwo".toCharArray).get)
+ jaroWinkler.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (JaroWinklerMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val levenshtein: StringMetric.Levenshtein = StringMetric.Levenshtein()
+
+ levenshtein.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithLevenshtein("testone", "testtwo").get)
+ levenshtein.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithLevenshtein("testone".toCharArray, "testtwo".toCharArray).get)
+ levenshtein.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (LevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val metaphone: StringMetric.Metaphone = StringMetric.Metaphone()
+
+ metaphone.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithMetaphone("testone", "testtwo").get)
+ metaphone.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithMetaphone("testone".toCharArray, "testtwo".toCharArray).get)
+ metaphone.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (MetaphoneMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val nGram: StringMetric.NGram = StringMetric.NGram()
+
+ nGram.compare("testone", "testtwo")(1).get should
+ equal (StringMetric.compareWithNGram("testone", "testtwo")(1).get)
+ nGram.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (StringMetric.compareWithNGram("testone".toCharArray, "testtwo".toCharArray)(1).get)
+ nGram.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (NGramMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get)
+
+ val nysiis: StringMetric.Nysiis = StringMetric.Nysiis()
+
+ nysiis.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithNysiis("testone", "testtwo").get)
+ nysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithNysiis("testone".toCharArray, "testtwo".toCharArray).get)
+ nysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (NysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val overlap: StringMetric.Overlap = StringMetric.Overlap()
+
+ overlap.compare("testone", "testtwo")(1).get should
+ equal (StringMetric.compareWithOverlap("testone", "testtwo")(1).get)
+ overlap.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (StringMetric.compareWithOverlap("testone".toCharArray, "testtwo".toCharArray)(1).get)
+ overlap.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should
+ equal (OverlapMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get)
+
+ val refinedNysiis: StringMetric.RefinedNysiis = StringMetric.RefinedNysiis()
+
+ refinedNysiis.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithRefinedNysiis("testone", "testtwo").get)
+ refinedNysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithRefinedNysiis("testone".toCharArray, "testtwo".toCharArray).get)
+ refinedNysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (RefinedNysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val refinedSoundex: StringMetric.RefinedSoundex = StringMetric.RefinedSoundex()
+
+ refinedSoundex.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithRefinedSoundex("testone", "testtwo").get)
+ refinedSoundex.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithRefinedSoundex("testone".toCharArray, "testtwo".toCharArray).get)
+ refinedSoundex.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (RefinedSoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val soundex: StringMetric.Soundex = StringMetric.Soundex()
+
+ soundex.compare("testone", "testtwo").get should
+ equal (StringMetric.compareWithSoundex("testone", "testtwo").get)
+ soundex.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (StringMetric.compareWithSoundex("testone".toCharArray, "testtwo".toCharArray).get)
+ soundex.compare("testone".toCharArray, "testtwo".toCharArray).get should
+ equal (SoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get)
+
+ val weightedLevenshtein: StringMetric.WeightedLevenshtein = StringMetric.WeightedLevenshtein()
+
+ weightedLevenshtein.compare("testone", "testtwo")(1, 2, 3).get should
+ equal (StringMetric.compareWithWeightedLevenshtein("testone", "testtwo")(1, 2, 3).get)
+ weightedLevenshtein.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get should
+ equal (StringMetric.compareWithWeightedLevenshtein("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get)
+ weightedLevenshtein.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get should
+ equal (WeightedLevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get)
+ }
+ }
+}
+
+
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala
new file mode 100755
index 0000000..8837c25
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala
@@ -0,0 +1,23 @@
+package com.rockymadden.stringmetric
+
+import com.rockymadden.stringmetric.tokenization.NGramTokenizer
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class StringTokenizerSpec extends ScalaTest {
+ "StringTokenizer standalone object" should provide {
+ "tokenize method, type, and companion object pass-throughs" in {
+ val nGram: StringTokenizer.NGram = StringTokenizer.NGram()
+
+ nGram.tokenize("testone")(1).get should
+ equal (StringTokenizer.tokenizeWithNGram("testone")(1).get)
+ nGram.tokenize("testone".toCharArray)(1).get should
+ equal (StringTokenizer.tokenizeWithNGram("testone".toCharArray)(1).get)
+ nGram.tokenize("testone".toCharArray)(1).get should
+ equal (NGramTokenizer.tokenize("testone".toCharArray)(1).get)
+ }
+ }
+}
+
+
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala
new file mode 100755
index 0000000..1cce0c9
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiControlFilterSpec extends ScalaTest {
+ import AsciiControlFilterSpec.Filter
+
+ "AsciiControlFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with controls" should returns {
+ "String with controls removed" in {
+ Filter.filter(" HelloWorld") should equal ("HelloWorld")
+ Filter.filter("HelloWorld ") should equal ("HelloWorld")
+ Filter.filter("Hello World") should equal ("HelloWorld")
+ }
+ }
+ "character array with controls" should returns {
+ "character array with controls removed" in {
+ Filter.filter(" HelloWorld".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("HelloWorld ".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("Hello World".toCharArray) should equal ("HelloWorld".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiControlFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiControlFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala
new file mode 100755
index 0000000..958c8ba
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiControlOnlyFilterSpec extends ScalaTest {
+ import AsciiControlOnlyFilterSpec.Filter
+
+ "AsciiControlOnlyFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed characters" should returns {
+ "String with non-controls removed" in {
+ Filter.filter("!@#$% ^&*()abc") should equal (" ")
+ Filter.filter(" ^&*()abc") should equal (" ")
+ Filter.filter("%^&*()abc ") should equal (" ")
+ }
+ }
+ "character array with mixed characters" should returns {
+ "character array with non-controls removed" in {
+ Filter.filter("!@#$% ^&*()abc".toCharArray) should equal (" ".toCharArray)
+ Filter.filter(" ^&*()abc".toCharArray) should equal (" ".toCharArray)
+ Filter.filter("%^&*()abc ".toCharArray) should equal (" ".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiControlOnlyFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiControlOnlyFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala
new file mode 100755
index 0000000..d86e7a5
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala
@@ -0,0 +1,29 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiLetterFilterSpec extends ScalaTest {
+ import AsciiLetterFilterSpec.Filter
+
+ "AsciiLetterFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with letters" should returns {
+ "String with letters removed" in {
+ Filter.filter(" Hello123World!") should equal (" 123!")
+ }
+ }
+ "character array with letters" should returns {
+ "character array with letters removed" in {
+ Filter.filter(" Hello123World!".toCharArray) should equal (" 123!".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiLetterFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiLetterFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala
new file mode 100755
index 0000000..edfdce6
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiLetterNumberFilterSpec extends ScalaTest {
+ import AsciiLetterNumberFilterSpec.Filter
+
+ "AsciiLetterNumberFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with letters and numbers" should returns {
+ "String with letters and numbers removed" in {
+ Filter.filter(" Hello123World!") should equal (" !")
+ Filter.filter("Hello123 !World") should equal (" !")
+ Filter.filter("!Hello123World ") should equal ("! ")
+ }
+ }
+ "character array with letters and numbers" should returns {
+ "character array with letters and numbers removed" in {
+ Filter.filter(" Hello123World!".toCharArray) should equal (" !".toCharArray)
+ Filter.filter("Hello123 !World".toCharArray) should equal (" !".toCharArray)
+ Filter.filter("!Hello123World ".toCharArray) should equal ("! ".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiLetterNumberFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiLetterNumberFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala
new file mode 100755
index 0000000..7998e39
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala
@@ -0,0 +1,35 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiLetterNumberOnlyFilterSpec extends ScalaTest {
+ import AsciiLetterNumberOnlyFilterSpec.Filter
+
+ "AsciiLetterNumberOnlyFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed characters" should returns {
+ "String with non-letters and non-numbers removed" in {
+ Filter.filter("!@#$%^&*()abc") should equal ("abc")
+ Filter.filter("!@#$%^&*()abc123") should equal ("abc123")
+ Filter.filter("abc123!@#$%^&*()") should equal ("abc123")
+ Filter.filter("!@#$%abc123^&*()") should equal ("abc123")
+ }
+ }
+ "character array with mixed characters" should returns {
+ "character array with non-letters and non-numbers removed" in {
+ Filter.filter("!@#$%^&*()abc".toCharArray) should equal ("abc".toCharArray)
+ Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("abc123".toCharArray)
+ Filter.filter("abc123!@#$%^&*()".toCharArray) should equal ("abc123".toCharArray)
+ Filter.filter("!@#$%abc123^&*()".toCharArray) should equal ("abc123".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiLetterNumberOnlyFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiLetterNumberOnlyFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala
new file mode 100755
index 0000000..d134792
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiLetterOnlyFilterSpec extends ScalaTest {
+ import AsciiLetterOnlyFilterSpec.Filter
+
+ "AsciiLetterOnlyFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed characters" should returns {
+ "String with non-letters removed" in {
+ Filter.filter("!@#$%^&*()abc") should equal ("abc")
+ Filter.filter("!@#$%^&*()abc123") should equal ("abc")
+ Filter.filter("abc!@#$%^&*()123") should equal ("abc")
+ }
+ }
+ "character array with mixed characters" should returns {
+ "character array with non-letters removed" in {
+ Filter.filter("!@#$%^&*()abc".toCharArray) should equal ("abc".toCharArray)
+ Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("abc".toCharArray)
+ Filter.filter("abc!@#$%^&*()123".toCharArray) should equal ("abc".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiLetterOnlyFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiLetterOnlyFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala
new file mode 100755
index 0000000..7c24d45
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiNumberFilterSpec extends ScalaTest {
+ import AsciiNumberFilterSpec.Filter
+
+ "AsciiNumberFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with numbers" should returns {
+ "String with numbers removed" in {
+ Filter.filter(" Hello123World!") should equal (" HelloWorld!")
+ Filter.filter("123 HelloWorld!") should equal (" HelloWorld!")
+ Filter.filter(" HelloWorld!123") should equal (" HelloWorld!")
+ }
+ }
+ "character array with numbers" should returns {
+ "character array with numbers removed" in {
+ Filter.filter(" Hello123World!".toCharArray) should equal (" HelloWorld!".toCharArray)
+ Filter.filter("123 HelloWorld!".toCharArray) should equal (" HelloWorld!".toCharArray)
+ Filter.filter(" HelloWorld!123".toCharArray) should equal (" HelloWorld!".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiNumberFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiNumberFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala
new file mode 100755
index 0000000..db49d25
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiNumberOnlyFilterSpec extends ScalaTest {
+ import AsciiNumberOnlyFilterSpec.Filter
+
+ "AsciiNumberOnlyFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed characters" should returns {
+ "String with non-numbers removed" in {
+ Filter.filter("!@#$%^&*()abc123") should equal ("123")
+ Filter.filter("123!@#$%^&*()abc") should equal ("123")
+ Filter.filter("!@#$%^123&*()abc") should equal ("123")
+ }
+ }
+ "character array with mixed characters" should returns {
+ "character array with non-numbers removed" in {
+ Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("123".toCharArray)
+ Filter.filter("123!@#$%^&*()abc".toCharArray) should equal ("123".toCharArray)
+ Filter.filter("!@#$%^123&*()abc".toCharArray) should equal ("123".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiNumberOnlyFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiNumberOnlyFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala
new file mode 100755
index 0000000..759db3d
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala
@@ -0,0 +1,37 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiSpaceFilterSpec extends ScalaTest {
+ import AsciiSpaceFilterSpec.Filter
+
+ "AsciiSpaceFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with spaces" should returns {
+ "String with spaces removed" in {
+ Filter.filter("HelloWorld") should equal ("HelloWorld")
+ Filter.filter(" HelloWorld ") should equal ("HelloWorld")
+ Filter.filter("Hello World") should equal ("HelloWorld")
+ Filter.filter("H e l l o W o r l d") should equal ("HelloWorld")
+ Filter.filter("H e l l o W o r l d") should equal ("HelloWorld")
+ }
+ }
+ "character array with spaces" should returns {
+ "character array with spaces removed" in {
+ Filter.filter("HelloWorld".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter(" HelloWorld ".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("Hello World".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("H e l l o W o r l d".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("H e l l o W o r l d".toCharArray) should equal ("HelloWorld".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiSpaceFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiSpaceFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala
new file mode 100755
index 0000000..99be533
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala
@@ -0,0 +1,31 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiSymbolFilterSpec extends ScalaTest {
+ import AsciiSymbolFilterSpec.Filter
+
+ "AsciiSymbolFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with symbols" should returns {
+ "String with symbols removed" in {
+ Filter.filter("[HelloWorld]") should equal ("HelloWorld")
+ Filter.filter("Hello!World") should equal ("HelloWorld")
+ }
+ }
+ "character array with symbols" should returns {
+ "character array with symbols removed" in {
+ Filter.filter("[HelloWorld]".toCharArray) should equal ("HelloWorld".toCharArray)
+ Filter.filter("Hello!World".toCharArray) should equal ("HelloWorld".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiSymbolFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiSymbolFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala
new file mode 100755
index 0000000..c6f1899
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala
@@ -0,0 +1,33 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class AsciiSymbolOnlyFilterSpec extends ScalaTest {
+ import AsciiSymbolOnlyFilterSpec.Filter
+
+ "AsciiSymbolOnlyFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed characters" should returns {
+ "String with non-symbols removed" in {
+ Filter.filter("!@#$%^&*()abc123") should equal ("!@#$%^&*()")
+ Filter.filter("abc123!@#$%^&*()") should equal ("!@#$%^&*()")
+ Filter.filter("!@#$%abc123^&*()") should equal ("!@#$%^&*()")
+ }
+ }
+ "character array with mixed characters" should returns {
+ "character array with non-symbols removed" in {
+ Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("!@#$%^&*()".toCharArray)
+ Filter.filter("abc123!@#$%^&*()".toCharArray) should equal ("!@#$%^&*()".toCharArray)
+ Filter.filter("!@#$%abc123^&*()".toCharArray) should equal ("!@#$%^&*()".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object AsciiSymbolOnlyFilterSpec {
+ private final val Filter = new StringFilterDelegate with AsciiSymbolOnlyFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala
new file mode 100755
index 0000000..a04a0ae
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala
@@ -0,0 +1,41 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class IgnoreAsciiLetterCaseFilterSpec extends ScalaTest {
+ import IgnoreAsciiLetterCaseFilterSpec.Filter
+
+ "IgnoreAsciiLetterCaseFilter" should provide {
+ "overloaded filter method" when passed {
+ "String with mixed case" should returns {
+ "String with the same case" in {
+ Filter.filter("HelloWorld") should (equal ("helloworld") or equal ("HELLOWORLD"))
+ Filter.filter("Hello World") should (equal ("hello world") or equal ("HELLO WORLD"))
+ Filter.filter("H e l l o W o r l d") should
+ (equal ("h e l l o w o r l d") or equal ("H E L L O W O R L D"))
+ Filter.filter("H e l l o W o r l d") should
+ (equal ("h e l l o w o r l d") or equal ("H E L L O W O R L D"))
+ }
+ }
+ "character array with mixed case" should returns {
+ "character array with the same case" in {
+ Filter.filter("HelloWorld".toCharArray) should
+ (equal ("helloworld".toCharArray) or equal ("HELLOWORLD".toCharArray))
+ Filter.filter("Hello World".toCharArray) should
+ (equal ("hello world".toCharArray) or equal ("HELLO WORLD".toCharArray))
+ Filter.filter("H e l l o W o r l d".toCharArray) should
+ (equal ("h e l l o w o r l d".toCharArray) or equal ("H E L L O W O R L D".toCharArray))
+ Filter.filter("H e l l o W o r l d".toCharArray) should
+ (equal ("h e l l o w o r l d".toCharArray) or equal ("H E L L O W O R L D".toCharArray))
+ }
+ }
+ }
+ }
+}
+
+object IgnoreAsciiLetterCaseFilterSpec {
+ private final val Filter = new StringFilterDelegate with IgnoreAsciiLetterCaseFilter
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala
new file mode 100755
index 0000000..132156b
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala
@@ -0,0 +1,31 @@
+package com.rockymadden.stringmetric.filter
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class StringFilterDelegateSpec extends ScalaTest {
+ import StringFilterDelegateSpec.Filter
+
+ "StringFilter" should provide {
+ "overloaded filter method" when passed {
+ "String" should returns {
+ "the same String" in {
+ Filter.filter("Hello World") should equal ("Hello World")
+ Filter.filter(" Hello! World]") should equal (" Hello! World]")
+ }
+ }
+ "character array" should returns {
+ "the same character array" in {
+ Filter.filter("Hello World".toCharArray) should equal ("Hello World".toCharArray)
+ Filter.filter(" Hello! World]".toCharArray) should equal (" Hello! World]".toCharArray)
+ }
+ }
+ }
+ }
+}
+
+object StringFilterDelegateSpec {
+ private final val Filter = new StringFilterDelegate
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala
new file mode 100755
index 0000000..1f904d5
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala
@@ -0,0 +1,226 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class MetaphoneAlgorithmSpec extends ScalaTest {
+ import MetaphoneAlgorithmSpec.Algorithm
+
+ "MetaphoneAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Algorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Algorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // z
+ Algorithm.compute("z").get should equal ("s")
+ Algorithm.compute("zz").get should equal ("s")
+
+ // y
+ Algorithm.compute("y").isDefined should be (false)
+ Algorithm.compute("zy").get should equal ("s")
+ Algorithm.compute("zyz").get should equal ("ss")
+ Algorithm.compute("zya").get should equal ("sy")
+
+ // x
+ Algorithm.compute("x").get should equal ("s")
+ Algorithm.compute("zx").get should equal ("sks")
+ Algorithm.compute("zxz").get should equal ("skss")
+
+ // w
+ Algorithm.compute("w").isDefined should be (false)
+ Algorithm.compute("zw").get should equal ("s")
+ Algorithm.compute("zwz").get should equal ("ss")
+ Algorithm.compute("zwa").get should equal ("sw")
+
+ // v
+ Algorithm.compute("v").get should equal ("f")
+ Algorithm.compute("zv").get should equal ("sf")
+ Algorithm.compute("zvz").get should equal ("sfs")
+
+ // u
+ Algorithm.compute("u").get should equal ("u")
+ Algorithm.compute("zu").get should equal ("s")
+
+ // t
+ Algorithm.compute("t").get should equal ("t")
+ Algorithm.compute("ztiaz").get should equal ("sxs")
+ Algorithm.compute("ztioz").get should equal ("sxs")
+ Algorithm.compute("zthz").get should equal ("s0s")
+ Algorithm.compute("ztchz").get should equal ("sxs")
+ Algorithm.compute("ztz").get should equal ("sts")
+
+ // s
+ Algorithm.compute("s").get should equal ("s")
+ Algorithm.compute("zshz").get should equal ("sxs")
+ Algorithm.compute("zsioz").get should equal ("sxs")
+ Algorithm.compute("zsiaz").get should equal ("sxs")
+ Algorithm.compute("zs").get should equal ("ss")
+ Algorithm.compute("zsz").get should equal ("sss")
+
+ // r
+ Algorithm.compute("r").get should equal ("r")
+ Algorithm.compute("zr").get should equal ("sr")
+ Algorithm.compute("zrz").get should equal ("srs")
+
+ // q
+ Algorithm.compute("q").get should equal ("k")
+ Algorithm.compute("zq").get should equal ("sk")
+ Algorithm.compute("zqz").get should equal ("sks")
+
+ // p
+ Algorithm.compute("p").get should equal ("p")
+ Algorithm.compute("zp").get should equal ("sp")
+ Algorithm.compute("zph").get should equal ("sf")
+ Algorithm.compute("zpz").get should equal ("sps")
+
+ // o
+ Algorithm.compute("o").get should equal ("o")
+ Algorithm.compute("zo").get should equal ("s")
+
+ // n
+ Algorithm.compute("n").get should equal ("n")
+ Algorithm.compute("zn").get should equal ("sn")
+ Algorithm.compute("znz").get should equal ("sns")
+
+ // m
+ Algorithm.compute("m").get should equal ("m")
+ Algorithm.compute("zm").get should equal ("sm")
+ Algorithm.compute("zmz").get should equal ("sms")
+
+ // l
+ Algorithm.compute("l").get should equal ("l")
+ Algorithm.compute("zl").get should equal ("sl")
+ Algorithm.compute("zlz").get should equal ("sls")
+
+ // k
+ Algorithm.compute("k").get should equal ("k")
+ Algorithm.compute("zk").get should equal ("sk")
+ Algorithm.compute("zck").get should equal ("sk")
+
+ // j
+ Algorithm.compute("j").get should equal ("j")
+ Algorithm.compute("zj").get should equal ("sj")
+ Algorithm.compute("zjz").get should equal ("sjs")
+
+ // i
+ Algorithm.compute("i").get should equal ("i")
+ Algorithm.compute("zi").get should equal ("s")
+
+ // h
+ Algorithm.compute("h").get should equal ("h") // php wrongly says nothing
+ Algorithm.compute("zh").get should equal ("sh") // php wrongly says s
+ Algorithm.compute("zah").get should equal ("s")
+ Algorithm.compute("zchh").get should equal ("sx")
+ Algorithm.compute("ha").get should equal ("h")
+
+ // g
+ Algorithm.compute("g").get should equal ("k")
+ Algorithm.compute("zg").get should equal ("sk")
+ Algorithm.compute("zgh").get should equal ("skh") // php wrongly says sf
+ Algorithm.compute("zghz").get should equal ("shs") // php wrongly says sfs
+ Algorithm.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh
+ Algorithm.compute("zgn").get should equal ("sn")
+ Algorithm.compute("zgns").get should equal ("skns")
+ Algorithm.compute("zgned").get should equal ("snt") // others wrongly says sknt
+ Algorithm.compute("zgneds").get should equal ("sknts") // php wrongly says snts
+ Algorithm.compute("zgi").get should equal ("sj")
+ Algorithm.compute("zgiz").get should equal ("sjs")
+ Algorithm.compute("zge").get should equal ("sj")
+ Algorithm.compute("zgez").get should equal ("sjs")
+ Algorithm.compute("zgy").get should equal ("sj")
+ Algorithm.compute("zgyz").get should equal ("sjs")
+ Algorithm.compute("zgz").get should equal ("sks")
+
+ // f
+ Algorithm.compute("f").get should equal ("f")
+ Algorithm.compute("zf").get should equal ("sf")
+ Algorithm.compute("zfz").get should equal ("sfs")
+
+ // e
+ Algorithm.compute("e").get should equal ("e")
+ Algorithm.compute("ze").get should equal ("s")
+
+ // d
+ Algorithm.compute("d").get should equal ("t")
+ Algorithm.compute("fudge").get should equal ("fjj") // php wrongly says fj
+ Algorithm.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy
+ Algorithm.compute("dodgi").get should equal ("tjj") // php wrongly says tj
+ Algorithm.compute("zd").get should equal ("st")
+ Algorithm.compute("zdz").get should equal ("sts")
+
+ // c
+ Algorithm.compute("c").get should equal ("k")
+ Algorithm.compute("zcia").get should equal ("sx")
+ Algorithm.compute("zciaz").get should equal ("sxs")
+ Algorithm.compute("zch").get should equal ("sx")
+ Algorithm.compute("zchz").get should equal ("sxs")
+ Algorithm.compute("zci").get should equal ("ss")
+ Algorithm.compute("zciz").get should equal ("sss")
+ Algorithm.compute("zce").get should equal ("ss")
+ Algorithm.compute("zcez").get should equal ("sss")
+ Algorithm.compute("zcy").get should equal ("ss")
+ Algorithm.compute("zcyz").get should equal ("sss")
+ Algorithm.compute("zsci").get should equal ("ss")
+ Algorithm.compute("zsciz").get should equal ("sss")
+ Algorithm.compute("zsce").get should equal ("ss")
+ Algorithm.compute("zscez").get should equal ("sss")
+ Algorithm.compute("zscy").get should equal ("ss")
+ Algorithm.compute("zscyz").get should equal ("sss")
+ Algorithm.compute("zsch").get should equal ("sskh") // php wrongly says ssx
+ Algorithm.compute("zc").get should equal ("sk")
+ Algorithm.compute("zcz").get should equal ("sks")
+
+ // b
+ Algorithm.compute("b").get should equal ("b")
+ Algorithm.compute("zb").get should equal ("sb")
+ Algorithm.compute("zbz").get should equal ("sbs")
+ Algorithm.compute("zmb").get should equal ("sm")
+
+ // a
+ Algorithm.compute("a").get should equal ("a")
+ Algorithm.compute("za").get should equal ("s")
+
+ // Miscellaneous.
+ Algorithm.compute("dumb").get should equal ("tm")
+ Algorithm.compute("smith").get should equal ("sm0")
+ Algorithm.compute("school").get should equal ("skhl") // php wrongly says sxl
+ Algorithm.compute("merci").get should equal ("mrs")
+ Algorithm.compute("cool").get should equal ("kl")
+ Algorithm.compute("aebersold").get should equal ("ebrslt")
+ Algorithm.compute("gnagy").get should equal ("nj")
+ Algorithm.compute("knuth").get should equal ("n0")
+ Algorithm.compute("pniewski").get should equal ("nsk")
+ Algorithm.compute("wright").get should equal ("rht") // php wrongly says rft
+ Algorithm.compute("phone").get should equal ("fn")
+ Algorithm.compute("aggregate").get should equal ("akrkt")
+ Algorithm.compute("accuracy").get should equal ("akkrs")
+ Algorithm.compute("encyclopedia").get should equal ("ensklpt")
+ Algorithm.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs")
+ Algorithm.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm")
+ }
+ }
+ }
+ }
+ "MetaphoneAlgorithm companion object" should provide {
+ "pass-through compute method" should returns {
+ "same value as class" in {
+ MetaphoneAlgorithm.compute("dumb").get should equal ("tm")
+ }
+ }
+ }
+}
+
+object MetaphoneAlgorithmSpec {
+ final private val Algorithm = MetaphoneAlgorithm()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala
new file mode 100755
index 0000000..9a029d8
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala
@@ -0,0 +1,54 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class MetaphoneMetricSpec extends ScalaTest {
+ import MetaphoneMetricSpec.Metric
+
+ "MetaphoneMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ Metric.compare("123", "123").isDefined should be (false)
+ Metric.compare("123", "").isDefined should be (false)
+ Metric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ Metric.compare("dumb", "dum").get should be (true)
+ Metric.compare("smith", "smeth").get should be (true)
+ Metric.compare("merci", "mercy").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ Metric.compare("dumb", "gum").get should be (false)
+ Metric.compare("smith", "kiss").get should be (false)
+ Metric.compare("merci", "burpy").get should be (false)
+ }
+ }
+ }
+ }
+ "MetaphoneMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ MetaphoneMetric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+}
+
+object MetaphoneMetricSpec {
+ final private val Metric = MetaphoneMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala
new file mode 100755
index 0000000..6a0f113
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala
@@ -0,0 +1,204 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class NysiisAlgorithmSpec extends ScalaTest {
+ import NysiisAlgorithmSpec.Algorithm
+
+ "NysiisAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Algorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Algorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // a
+ Algorithm.compute("a").get should equal ("a")
+ Algorithm.compute("aa").get should equal ("a")
+
+ // b
+ Algorithm.compute("b").get should equal ("b")
+ Algorithm.compute("bb").get should equal ("bb")
+
+ // c
+ Algorithm.compute("c").get should equal ("c")
+ Algorithm.compute("cc").get should equal ("cc")
+
+ // d
+ Algorithm.compute("d").get should equal ("d")
+ Algorithm.compute("dd").get should equal ("dd")
+
+ // e
+ Algorithm.compute("e").get should equal ("e")
+ Algorithm.compute("ee").get should equal ("y")
+
+ // f
+ Algorithm.compute("f").get should equal ("f")
+ Algorithm.compute("ff").get should equal ("ff")
+
+ // g
+ Algorithm.compute("g").get should equal ("g")
+ Algorithm.compute("gg").get should equal ("gg")
+
+ // h
+ Algorithm.compute("h").get should equal ("h")
+ Algorithm.compute("hh").get should equal ("hh")
+
+ // i
+ Algorithm.compute("i").get should equal ("i")
+ Algorithm.compute("ii").get should equal ("i")
+
+ // j
+ Algorithm.compute("j").get should equal ("j")
+ Algorithm.compute("jj").get should equal ("jj")
+
+ // k
+ Algorithm.compute("k").get should equal ("c")
+ Algorithm.compute("kk").get should equal ("cc")
+
+ // l
+ Algorithm.compute("l").get should equal ("l")
+ Algorithm.compute("ll").get should equal ("ll")
+
+ // m
+ Algorithm.compute("m").get should equal ("m")
+ Algorithm.compute("mm").get should equal ("mn")
+
+ // n
+ Algorithm.compute("n").get should equal ("n")
+ Algorithm.compute("nn").get should equal ("nn")
+
+ // o
+ Algorithm.compute("o").get should equal ("o")
+ Algorithm.compute("oo").get should equal ("o")
+
+ // p
+ Algorithm.compute("p").get should equal ("p")
+ Algorithm.compute("pp").get should equal ("pp")
+
+ // q
+ Algorithm.compute("q").get should equal ("q")
+ Algorithm.compute("qq").get should equal ("qg")
+
+ // r
+ Algorithm.compute("r").get should equal ("r")
+ Algorithm.compute("rr").get should equal ("rr")
+
+ // s
+ Algorithm.compute("s").get should equal ("s")
+ Algorithm.compute("ss").get should equal ("s")
+
+ // t
+ Algorithm.compute("t").get should equal ("t")
+ Algorithm.compute("tt").get should equal ("tt")
+
+ // u
+ Algorithm.compute("u").get should equal ("u")
+ Algorithm.compute("uu").get should equal ("u")
+
+ // v
+ Algorithm.compute("v").get should equal ("v")
+ Algorithm.compute("vv").get should equal ("vv")
+
+ // w
+ Algorithm.compute("w").get should equal ("w")
+ Algorithm.compute("ww").get should equal ("ww")
+
+ // x
+ Algorithm.compute("x").get should equal ("x")
+ Algorithm.compute("xx").get should equal ("xx")
+
+ // y
+ Algorithm.compute("y").get should equal ("y")
+ Algorithm.compute("yy").get should equal ("yy")
+
+ // z
+ Algorithm.compute("z").get should equal ("z")
+ Algorithm.compute("zz").get should equal ("z")
+
+ // Head cases.
+ Algorithm.compute("mac").get should equal ("mc")
+ Algorithm.compute("kn").get should equal ("nn")
+ Algorithm.compute("k").get should equal ("c")
+ Algorithm.compute("ph").get should equal ("ff")
+ Algorithm.compute("pf").get should equal ("ff")
+ Algorithm.compute("sch").get should equal ("s") // dropby wrongly says ss
+
+ // Last cases.
+ Algorithm.compute("ee").get should equal ("y")
+ Algorithm.compute("ie").get should equal ("y")
+ Algorithm.compute("dt").get should equal ("d")
+ Algorithm.compute("rt").get should equal ("d")
+ Algorithm.compute("rd").get should equal ("d")
+ Algorithm.compute("nt").get should equal ("d")
+ Algorithm.compute("nd").get should equal ("d")
+
+ // Core cases.
+ Algorithm.compute("eev").get should equal ("eaf")
+ Algorithm.compute("zev").get should equal ("zaf")
+ Algorithm.compute("kkn").get should equal ("cn")
+ Algorithm.compute("sschn").get should equal ("ssn")
+ Algorithm.compute("pph").get should equal ("pf")
+
+ // Miscellaneous.
+ Algorithm.compute("macdonald").get should equal ("mcdanald")
+ Algorithm.compute("phone").get should equal ("ffan")
+ Algorithm.compute("aggregate").get should equal ("agragat")
+ Algorithm.compute("accuracy").get should equal ("acaracy")
+ Algorithm.compute("encyclopedia").get should equal ("encyclapad")
+ Algorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab")
+ Algorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn")
+
+ // Dropby.
+ Algorithm.compute("macintosh").get should equal ("mcant")
+ Algorithm.compute("knuth").get should equal ("nnat")
+ Algorithm.compute("koehn").get should equal ("can") // dropby wrongly says c
+ Algorithm.compute("phillipson").get should equal ("ffalapsan")
+ Algorithm.compute("pfeister").get should equal ("ffastar")
+ Algorithm.compute("schoenhoeft").get should equal ("ssanaft")
+ Algorithm.compute("mckee").get should equal ("mcy")
+ Algorithm.compute("heitschmedt").get should equal ("hatsnad")
+ Algorithm.compute("bart").get should equal ("bad")
+ Algorithm.compute("hurd").get should equal ("had")
+ Algorithm.compute("hunt").get should equal ("had")
+ Algorithm.compute("westerlund").get should equal ("wastarlad")
+ Algorithm.compute("casstevens").get should equal ("castafan")
+ Algorithm.compute("vasquez").get should equal ("vasg")
+ Algorithm.compute("frazier").get should equal ("frasar")
+ Algorithm.compute("bowman").get should equal ("banan")
+ Algorithm.compute("mcknight").get should equal ("mcnagt")
+ Algorithm.compute("rickert").get should equal ("racad")
+ Algorithm.compute("deutsch").get should equal ("dat") // dropby wrongly says dats
+ Algorithm.compute("westphal").get should equal ("wastfal")
+ Algorithm.compute("shriver").get should equal ("shravar")
+ Algorithm.compute("kuhl").get should equal ("cal") // dropby wrongly says c
+ Algorithm.compute("rawson").get should equal ("rasan")
+ Algorithm.compute("jiles").get should equal ("jal")
+ Algorithm.compute("carraway").get should equal ("caray")
+ Algorithm.compute("yamada").get should equal ("yanad")
+ }
+ }
+ }
+ }
+ "NysiisAlgorithm companion object" should provide {
+ "pass-through compute method" should returns {
+ "same value as class" in {
+ NysiisAlgorithm.compute("macdonald").get should equal ("mcdanald")
+ }
+ }
+ }
+}
+
+object NysiisAlgorithmSpec {
+ final private val Algorithm = NysiisAlgorithm()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala
new file mode 100755
index 0000000..c6929a2
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala
@@ -0,0 +1,50 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class NysiisMetricSpec extends ScalaTest {
+ import NysiisMetricSpec.Metric
+
+ "NysiisMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ Metric.compare("123", "123").isDefined should be (false)
+ Metric.compare("123", "").isDefined should be (false)
+ Metric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ Metric.compare("ham", "hum").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ Metric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+ }
+ "NysiisMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ NysiisMetric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+}
+
+object NysiisMetricSpec {
+ final private val Metric = NysiisMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala
new file mode 100755
index 0000000..1298d2d
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala
@@ -0,0 +1,221 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedNysiisAlgorithmSpec extends ScalaTest {
+ import RefinedNysiisAlgorithmSpec.Algorithm
+
+ "RefinedNysiisAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Algorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Algorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // a
+ Algorithm.compute("a").get should equal ("a")
+ Algorithm.compute("aa").get should equal ("a")
+
+ // b
+ Algorithm.compute("b").get should equal ("b")
+ Algorithm.compute("bb").get should equal ("b")
+
+ // c
+ Algorithm.compute("c").get should equal ("c")
+ Algorithm.compute("cc").get should equal ("c")
+
+ // d
+ Algorithm.compute("d").get should equal ("d")
+ Algorithm.compute("dd").get should equal ("d")
+
+ // e
+ Algorithm.compute("e").get should equal ("e")
+ Algorithm.compute("ee").get should equal ("y")
+
+ // f
+ Algorithm.compute("f").get should equal ("f")
+ Algorithm.compute("ff").get should equal ("f")
+
+ // g
+ Algorithm.compute("g").get should equal ("g")
+ Algorithm.compute("gg").get should equal ("g")
+
+ // h
+ Algorithm.compute("h").get should equal ("h")
+ Algorithm.compute("hh").get should equal ("h")
+
+ // i
+ Algorithm.compute("i").get should equal ("i")
+ Algorithm.compute("ii").get should equal ("i")
+
+ // j
+ Algorithm.compute("j").get should equal ("j")
+ Algorithm.compute("jj").get should equal ("j")
+
+ // k
+ Algorithm.compute("k").get should equal ("c")
+ Algorithm.compute("kk").get should equal ("c")
+
+ // l
+ Algorithm.compute("l").get should equal ("l")
+ Algorithm.compute("ll").get should equal ("l")
+
+ // m
+ Algorithm.compute("m").get should equal ("m")
+ Algorithm.compute("mm").get should equal ("mn")
+
+ // n
+ Algorithm.compute("n").get should equal ("n")
+ Algorithm.compute("nn").get should equal ("n")
+
+ // o
+ Algorithm.compute("o").get should equal ("o")
+ Algorithm.compute("oo").get should equal ("o")
+
+ // p
+ Algorithm.compute("p").get should equal ("p")
+ Algorithm.compute("pp").get should equal ("p")
+
+ // q
+ Algorithm.compute("q").get should equal ("q")
+ Algorithm.compute("qq").get should equal ("qg")
+
+ // r
+ Algorithm.compute("r").get should equal ("r")
+ Algorithm.compute("rr").get should equal ("r")
+
+ // s
+ Algorithm.compute("s").get should equal ("s")
+ Algorithm.compute("ss").get should equal ("s")
+
+ // t
+ Algorithm.compute("t").get should equal ("t")
+ Algorithm.compute("tt").get should equal ("t")
+
+ // u
+ Algorithm.compute("u").get should equal ("u")
+ Algorithm.compute("uu").get should equal ("u")
+
+ // v
+ Algorithm.compute("v").get should equal ("v")
+ Algorithm.compute("vv").get should equal ("v")
+
+ // w
+ Algorithm.compute("w").get should equal ("w")
+ Algorithm.compute("ww").get should equal ("w")
+
+ // x
+ Algorithm.compute("x").get should equal ("x")
+ Algorithm.compute("xx").get should equal ("x")
+
+ // y
+ Algorithm.compute("y").get should equal ("y")
+ Algorithm.compute("yy").get should equal ("y")
+ Algorithm.compute("ybyb").get should equal ("ybab")
+
+ // z
+ Algorithm.compute("z").get should equal ("z")
+ Algorithm.compute("zz").get should equal ("z")
+
+ // Head cases.
+ Algorithm.compute("mac").get should equal ("mc")
+ Algorithm.compute("pf").get should equal ("f")
+
+ // Last cases.
+ Algorithm.compute("ix").get should equal ("ic")
+ Algorithm.compute("ex").get should equal ("ec")
+ Algorithm.compute("ye").get should equal ("y")
+ Algorithm.compute("ee").get should equal ("y")
+ Algorithm.compute("ie").get should equal ("y")
+ Algorithm.compute("dt").get should equal ("d")
+ Algorithm.compute("rt").get should equal ("d")
+ Algorithm.compute("rd").get should equal ("d")
+ Algorithm.compute("nt").get should equal ("d")
+ Algorithm.compute("nd").get should equal ("d")
+
+ // Core cases.
+ Algorithm.compute("bevb").get should equal ("bafb")
+ Algorithm.compute("bghtb").get should equal ("bgtb")
+ Algorithm.compute("bdgb").get should equal ("bgb")
+ Algorithm.compute("bphb").get should equal ("bfb")
+ Algorithm.compute("bknb").get should equal ("bnb")
+ Algorithm.compute("bshb").get should equal ("bsb")
+ Algorithm.compute("bschb").get should equal ("bsb")
+ Algorithm.compute("bywb").get should equal ("bab")
+ Algorithm.compute("byw").get should equal ("by")
+ Algorithm.compute("ywb").get should equal ("yb")
+ Algorithm.compute("bwrb").get should equal ("brb")
+
+ // Transcode cases.
+ Algorithm.compute("bay").get should equal ("by")
+
+ // Miscellaneous.
+ Algorithm.compute("macdonald").get should equal ("mcdanald")
+ Algorithm.compute("phone").get should equal ("fan")
+ Algorithm.compute("aggregate").get should equal ("agragat")
+ Algorithm.compute("accuracy").get should equal ("acaracy")
+ Algorithm.compute("encyclopedia").get should equal ("encaclapad")
+ Algorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab")
+ Algorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn")
+
+ // Dropby.
+ Algorithm.compute("edwards").get should equal ("edwad")
+ Algorithm.compute("parez").get should equal ("par")
+ Algorithm.compute("macintosh").get should equal ("mcantas")
+ Algorithm.compute("phillipson").get should equal ("falapsan")
+ Algorithm.compute("haddix").get should equal ("hadac")
+ Algorithm.compute("essex").get should equal ("esac")
+ Algorithm.compute("moye").get should equal ("my")
+ Algorithm.compute("mckee").get should equal ("mcy")
+ Algorithm.compute("mackie").get should equal ("mcy")
+ Algorithm.compute("heitschmidt").get should equal ("hatsnad")
+ Algorithm.compute("bart").get should equal ("bad")
+ Algorithm.compute("hurd").get should equal ("had")
+ Algorithm.compute("hunt").get should equal ("had")
+ Algorithm.compute("westerlund").get should equal ("wastarlad")
+ Algorithm.compute("evers").get should equal ("evar")
+ Algorithm.compute("devito").get should equal ("dafat")
+ Algorithm.compute("rawson").get should equal ("rasan")
+ Algorithm.compute("shoulders").get should equal ("saldar")
+ Algorithm.compute("leighton").get should equal ("lagtan")
+ Algorithm.compute("wooldridge").get should equal ("waldrag")
+ Algorithm.compute("oliphant").get should equal ("olafad")
+ Algorithm.compute("hatchett").get should equal ("hatcat")
+ Algorithm.compute("mcknight").get should equal ("mcnagt")
+ Algorithm.compute("rickert").get should equal ("racad")
+ Algorithm.compute("bowman").get should equal ("banan")
+ Algorithm.compute("vasquez").get should equal ("vasg")
+ Algorithm.compute("bashaw").get should equal ("bas")
+ Algorithm.compute("schoenhoeft").get should equal ("sanaft") // dropby wrongly says scanaft
+ Algorithm.compute("heywood").get should equal ("had")
+ Algorithm.compute("hayman").get should equal ("hanan")
+ Algorithm.compute("seawright").get should equal ("saragt")
+ Algorithm.compute("kratzer").get should equal ("cratsar")
+ Algorithm.compute("canaday").get should equal ("canady")
+ Algorithm.compute("crepeau").get should equal ("crap")
+ }
+ }
+ }
+ }
+ "RefinedNysiisAlgorithm companion object" should provide {
+ "pass-through compute method" should returns {
+ "same value as class" in {
+ RefinedNysiisAlgorithm.compute("macdonald").get should equal ("mcdanald")
+ }
+ }
+ }
+}
+
+object RefinedNysiisAlgorithmSpec {
+ final private val Algorithm = RefinedNysiisAlgorithm()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala
new file mode 100755
index 0000000..ca9d2ec
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala
@@ -0,0 +1,50 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedNysiisMetricSpec extends ScalaTest {
+ import RefinedNysiisMetricSpec.Metric
+
+ "RefinedNysiisMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ Metric.compare("123", "123").isDefined should be (false)
+ Metric.compare("123", "").isDefined should be (false)
+ Metric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ Metric.compare("ham", "hum").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ Metric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+ }
+ "RefinedNysiisMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ RefinedNysiisMetric.compare("dumb", "gum").get should be (false)
+ }
+ }
+ }
+}
+
+object RefinedNysiisMetricSpec {
+ final private val Metric = RefinedNysiisMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala
new file mode 100755
index 0000000..254bf06
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala
@@ -0,0 +1,175 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedSoundexAlgorithmSpec extends ScalaTest {
+ import RefinedSoundexAlgorithmSpec.Algorithm
+
+ "RefinedSoundexAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Algorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Algorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // a
+ Algorithm.compute("a").get should equal ("a0")
+ Algorithm.compute("aa").get should equal ("a0")
+
+ // b
+ Algorithm.compute("b").get should equal ("b1")
+ Algorithm.compute("bb").get should equal ("b1")
+
+ // c
+ Algorithm.compute("c").get should equal ("c3")
+ Algorithm.compute("cc").get should equal ("c3")
+
+ // d
+ Algorithm.compute("d").get should equal ("d6")
+ Algorithm.compute("dd").get should equal ("d6")
+
+ // e
+ Algorithm.compute("e").get should equal ("e0")
+ Algorithm.compute("ee").get should equal ("e0")
+
+ // f
+ Algorithm.compute("f").get should equal ("f2")
+ Algorithm.compute("ff").get should equal ("f2")
+
+ // g
+ Algorithm.compute("g").get should equal ("g4")
+ Algorithm.compute("gg").get should equal ("g4")
+
+ // h
+ Algorithm.compute("h").get should equal ("h0")
+ Algorithm.compute("hh").get should equal ("h0")
+
+ // i
+ Algorithm.compute("i").get should equal ("i0")
+ Algorithm.compute("ii").get should equal ("i0")
+
+ // j
+ Algorithm.compute("j").get should equal ("j4")
+ Algorithm.compute("jj").get should equal ("j4")
+
+ // k
+ Algorithm.compute("k").get should equal ("k3")
+ Algorithm.compute("kk").get should equal ("k3")
+
+ // l
+ Algorithm.compute("l").get should equal ("l7")
+ Algorithm.compute("ll").get should equal ("l7")
+
+ // m
+ Algorithm.compute("m").get should equal ("m8")
+ Algorithm.compute("mm").get should equal ("m8")
+
+ // n
+ Algorithm.compute("n").get should equal ("n8")
+ Algorithm.compute("nn").get should equal ("n8")
+
+ // o
+ Algorithm.compute("o").get should equal ("o0")
+ Algorithm.compute("oo").get should equal ("o0")
+
+ // p
+ Algorithm.compute("p").get should equal ("p1")
+ Algorithm.compute("pp").get should equal ("p1")
+
+ // q
+ Algorithm.compute("q").get should equal ("q5")
+ Algorithm.compute("qq").get should equal ("q5")
+
+ // r
+ Algorithm.compute("r").get should equal ("r9")
+ Algorithm.compute("rr").get should equal ("r9")
+
+ // s
+ Algorithm.compute("s").get should equal ("s3")
+ Algorithm.compute("ss").get should equal ("s3")
+
+ // t
+ Algorithm.compute("t").get should equal ("t6")
+ Algorithm.compute("tt").get should equal ("t6")
+
+ // u
+ Algorithm.compute("u").get should equal ("u0")
+ Algorithm.compute("uu").get should equal ("u0")
+
+ // v
+ Algorithm.compute("v").get should equal ("v2")
+ Algorithm.compute("vv").get should equal ("v2")
+
+ // w
+ Algorithm.compute("w").get should equal ("w0")
+ Algorithm.compute("ww").get should equal ("w0")
+
+ // x
+ Algorithm.compute("x").get should equal ("x5")
+ Algorithm.compute("xx").get should equal ("x5")
+
+ // y
+ Algorithm.compute("y").get should equal ("y0")
+ Algorithm.compute("yy").get should equal ("y0")
+
+ // z
+ Algorithm.compute("z").get should equal ("z5")
+ Algorithm.compute("zz").get should equal ("z5")
+
+ // Starting with letter then numbers.
+ Algorithm.compute("x123456").get should equal ("x5")
+ Algorithm.compute("a123456").get should equal ("a0")
+ Algorithm.compute("f123456").get should equal ("f2")
+
+ // Miscellaneous.
+ Algorithm.compute("braz").get should equal ("b1905")
+ Algorithm.compute("broz").get should equal ("b1905")
+ Algorithm.compute("caren").get should equal ("c30908")
+ Algorithm.compute("carren").get should equal ("c30908")
+ Algorithm.compute("coram").get should equal ("c30908")
+ Algorithm.compute("corran").get should equal ("c30908")
+ Algorithm.compute("curreen").get should equal ("c30908")
+ Algorithm.compute("curwen").get should equal ("c30908")
+ Algorithm.compute("hairs").get should equal ("h093")
+ Algorithm.compute("hark").get should equal ("h093")
+ Algorithm.compute("hars").get should equal ("h093")
+ Algorithm.compute("hayers").get should equal ("h093")
+ Algorithm.compute("heers").get should equal ("h093")
+ Algorithm.compute("hiers").get should equal ("h093")
+ Algorithm.compute("lambard").get should equal ("l7081096")
+ Algorithm.compute("lambart").get should equal ("l7081096")
+ Algorithm.compute("lambert").get should equal ("l7081096")
+ Algorithm.compute("lambird").get should equal ("l7081096")
+ Algorithm.compute("lampaert").get should equal ("l7081096")
+ Algorithm.compute("lampart").get should equal ("l7081096")
+ Algorithm.compute("lamport").get should equal ("l7081096")
+ Algorithm.compute("limbert").get should equal ("l7081096")
+ Algorithm.compute("lombard").get should equal ("l7081096")
+ Algorithm.compute("nolton").get should equal ("n807608")
+ Algorithm.compute("noulton").get should equal ("n807608")
+ }
+ }
+ }
+ }
+ "RefinedSoundexAlgorithm companion object" should provide {
+ "pass-through compute method" should returns {
+ "same value as class" in {
+ RefinedSoundexAlgorithm.compute("braz").get should equal ("b1905")
+ }
+ }
+ }
+}
+
+object RefinedSoundexAlgorithmSpec {
+ final private val Algorithm = RefinedSoundexAlgorithm()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala
new file mode 100755
index 0000000..cb6a222
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala
@@ -0,0 +1,50 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RefinedSoundexMetricSpec extends ScalaTest {
+ import RefinedSoundexMetricSpec.Metric
+
+ "RefinedSoundexMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ Metric.compare("123", "123").isDefined should be (false)
+ Metric.compare("123", "").isDefined should be (false)
+ Metric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ Metric.compare("robert", "rupert").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ Metric.compare("robert", "rubin").get should be (false)
+ }
+ }
+ }
+ }
+ "RefinedSoundexMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ RefinedSoundexMetric.compare("robert", "rubin").get should be (false)
+ }
+ }
+ }
+}
+
+object RefinedSoundexMetricSpec {
+ final private val Metric = RefinedSoundexMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala
new file mode 100755
index 0000000..5b7deaa
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala
@@ -0,0 +1,174 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class SoundexAlgorithmSpec extends ScalaTest {
+ import SoundexAlgorithmSpec.Algorithm
+
+ "SoundexAlgorithm" should provide {
+ "compute method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Algorithm.compute("").isDefined should be (false)
+ }
+ }
+ "non-phonetic argument" should returns {
+ "None" in {
+ Algorithm.compute("123").isDefined should be (false)
+ }
+ }
+ "phonetic argument" should returns {
+ "Some" in {
+ // a
+ Algorithm.compute("a").get should equal ("a000")
+ Algorithm.compute("aa").get should equal ("a000")
+
+ // b
+ Algorithm.compute("b").get should equal ("b000")
+ Algorithm.compute("bb").get should equal ("b000")
+
+ // c
+ Algorithm.compute("c").get should equal ("c000")
+ Algorithm.compute("cc").get should equal ("c000")
+
+ // d
+ Algorithm.compute("d").get should equal ("d000")
+ Algorithm.compute("dd").get should equal ("d000")
+
+ // e
+ Algorithm.compute("e").get should equal ("e000")
+ Algorithm.compute("ee").get should equal ("e000")
+
+ // f
+ Algorithm.compute("f").get should equal ("f000")
+ Algorithm.compute("ff").get should equal ("f000")
+
+ // g
+ Algorithm.compute("g").get should equal ("g000")
+ Algorithm.compute("gg").get should equal ("g000")
+
+ // h
+ Algorithm.compute("h").get should equal ("h000")
+ Algorithm.compute("hh").get should equal ("h000")
+
+ // i
+ Algorithm.compute("i").get should equal ("i000")
+ Algorithm.compute("ii").get should equal ("i000")
+
+ // j
+ Algorithm.compute("j").get should equal ("j000")
+ Algorithm.compute("jj").get should equal ("j000")
+
+ // k
+ Algorithm.compute("k").get should equal ("k000")
+ Algorithm.compute("kk").get should equal ("k000")
+
+ // l
+ Algorithm.compute("l").get should equal ("l000")
+ Algorithm.compute("ll").get should equal ("l000")
+
+ // m
+ Algorithm.compute("m").get should equal ("m000")
+ Algorithm.compute("mm").get should equal ("m000")
+
+ // n
+ Algorithm.compute("n").get should equal ("n000")
+ Algorithm.compute("nn").get should equal ("n000")
+
+ // o
+ Algorithm.compute("o").get should equal ("o000")
+ Algorithm.compute("oo").get should equal ("o000")
+
+ // p
+ Algorithm.compute("p").get should equal ("p000")
+ Algorithm.compute("pp").get should equal ("p000")
+
+ // q
+ Algorithm.compute("q").get should equal ("q000")
+ Algorithm.compute("qq").get should equal ("q000")
+
+ // r
+ Algorithm.compute("r").get should equal ("r000")
+ Algorithm.compute("rr").get should equal ("r000")
+
+ // s
+ Algorithm.compute("s").get should equal ("s000")
+ Algorithm.compute("ss").get should equal ("s000")
+
+ // t
+ Algorithm.compute("t").get should equal ("t000")
+ Algorithm.compute("tt").get should equal ("t000")
+
+ // u
+ Algorithm.compute("u").get should equal ("u000")
+ Algorithm.compute("uu").get should equal ("u000")
+
+ // v
+ Algorithm.compute("v").get should equal ("v000")
+ Algorithm.compute("vv").get should equal ("v000")
+
+ // w
+ Algorithm.compute("w").get should equal ("w000")
+ Algorithm.compute("ww").get should equal ("w000")
+
+ // x
+ Algorithm.compute("x").get should equal ("x000")
+ Algorithm.compute("xx").get should equal ("x000")
+
+ // y
+ Algorithm.compute("y").get should equal ("y000")
+ Algorithm.compute("yy").get should equal ("y000")
+
+ // z
+ Algorithm.compute("z").get should equal ("z000")
+ Algorithm.compute("zz").get should equal ("z000")
+
+ // Starting with letter then numbers.
+ Algorithm.compute("x123456").get should equal ("x000")
+ Algorithm.compute("a123456").get should equal ("a000")
+ Algorithm.compute("f123456").get should equal ("f000")
+
+ // Miscellaneous.
+ Algorithm.compute("abc").get should equal ("a120")
+ Algorithm.compute("xyz").get should equal ("x200")
+ Algorithm.compute("robert").get should equal ("r163")
+ Algorithm.compute("rupert").get should equal ("r163")
+ Algorithm.compute("rubin").get should equal ("r150")
+ Algorithm.compute("ashcraft").get should equal ("a261")
+ Algorithm.compute("tymczak").get should equal ("t522")
+ Algorithm.compute("pfister").get should equal ("p236")
+ Algorithm.compute("euler").get should equal ("e460")
+ Algorithm.compute("gauss").get should equal ("g200")
+ Algorithm.compute("hilbert").get should equal ("h416")
+ Algorithm.compute("knuth").get should equal ("k530")
+ Algorithm.compute("lloyd").get should equal ("l300")
+ Algorithm.compute("lukasiewicz").get should equal ("l222")
+ Algorithm.compute("ashcroft").get should equal ("a261")
+ Algorithm.compute("tymczak").get should equal ("t522")
+ Algorithm.compute("pfister").get should equal ("p236")
+ Algorithm.compute("ellery").get should equal ("e460")
+ Algorithm.compute("ghosh").get should equal ("g200")
+ Algorithm.compute("heilbronn").get should equal ("h416")
+ Algorithm.compute("kant").get should equal ("k530")
+ Algorithm.compute("ladd").get should equal ("l300")
+ Algorithm.compute("lissajous").get should equal ("l222")
+ Algorithm.compute("fusedale").get should equal ("f234")
+ }
+ }
+ }
+ }
+ "SoundexAlgorithm companion object" should provide {
+ "pass-through compute method" should returns {
+ "same value as class" in {
+ SoundexAlgorithm.compute("abc").get should equal ("a120")
+ }
+ }
+ }
+}
+
+object SoundexAlgorithmSpec {
+ final private val Algorithm = SoundexAlgorithm()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala
new file mode 100755
index 0000000..9fc47d8
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala
@@ -0,0 +1,50 @@
+package com.rockymadden.stringmetric.phonetic
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class SoundexMetricSpec extends ScalaTest {
+ import SoundexMetricSpec.Metric
+
+ "SoundexMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "non-phonetic arguments" should returns {
+ "None" in {
+ Metric.compare("123", "123").isDefined should be (false)
+ Metric.compare("123", "").isDefined should be (false)
+ Metric.compare("", "123").isDefined should be (false)
+ }
+ }
+ "phonetically similar arguments" should returns {
+ "Boolean indicating true" in {
+ Metric.compare("robert", "rupert").get should be (true)
+ }
+ }
+ "phonetically dissimilar arguments" should returns {
+ "Boolean indicating false" in {
+ Metric.compare("robert", "rubin").get should be (false)
+ }
+ }
+ }
+ }
+ "SoundexMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ SoundexMetric.compare("robert", "rubin").get should be (false)
+ }
+ }
+ }
+}
+
+object SoundexMetricSpec {
+ final private val Metric = SoundexMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala
new file mode 100755
index 0000000..5ddfc06
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala
@@ -0,0 +1,75 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class DiceSorensenMetricSpec extends ScalaTest {
+ import DiceSorensenMetricSpec.Metric
+
+ "DiceSorensenMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "")(1).isDefined should be (false)
+ Metric.compare("abc", "")(1).isDefined should be (false)
+ Metric.compare("", "xyz")(1).isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("abc", "abc")(1).get should be (1)
+ Metric.compare("abc", "abc")(2).get should be (1)
+ Metric.compare("abc", "abc")(3).get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz")(1).get should be (0)
+ Metric.compare("abc", "xyz")(2).get should be (0)
+ Metric.compare("abc", "xyz")(3).get should be (0)
+ }
+ }
+ "invalid arguments" should returns {
+ "None" in {
+ Metric.compare("n", "naght")(2).isDefined should be (false)
+ Metric.compare("night", "n")(2).isDefined should be (false)
+ Metric.compare("ni", "naght")(3).isDefined should be (false)
+ Metric.compare("night", "na")(3).isDefined should be (false)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("night", "nacht")(1).get should be (0.6)
+ Metric.compare("night", "naght")(1).get should be (0.8)
+ Metric.compare("context", "contact")(1).get should be (0.7142857142857143)
+
+ Metric.compare("night", "nacht")(2).get should be (0.25)
+ Metric.compare("night", "naght")(2).get should be (0.5)
+ Metric.compare("context", "contact")(2).get should be (0.5)
+ Metric.compare("contextcontext", "contact")(2).get should be (0.3157894736842105)
+ Metric.compare("context", "contactcontact")(2).get should be (0.3157894736842105)
+ Metric.compare("ht", "nacht")(2).get should be (0.4)
+ Metric.compare("xp", "nacht")(2).get should be (0)
+ Metric.compare("ht", "hththt")(2).get should be (0.3333333333333333)
+
+ Metric.compare("night", "nacht")(3).get should be (0)
+ Metric.compare("night", "naght")(3).get should be (0.3333333333333333)
+ Metric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+ }
+ "DiceSorensenMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ DiceSorensenMetric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+}
+
+object DiceSorensenMetricSpec {
+ private final val Metric = DiceSorensenMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala
new file mode 100755
index 0000000..c69d860
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala
@@ -0,0 +1,52 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class HammingMetricSpec extends ScalaTest {
+ import HammingMetricSpec.Metric
+
+ "HammingMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "abc").get should be (0)
+ Metric.compare("123", "123").get should be (0)
+ }
+ }
+ "unequal arguments" should returns {
+ "Int indicating distance" in {
+ Metric.compare("abc", "xyz").get should be (3)
+ Metric.compare("123", "456").get should be (3)
+ }
+ }
+ "valid arguments" should returns {
+ "Int indicating distance" in {
+ Metric.compare("toned", "roses").get should be (3)
+ Metric.compare("1011101", "1001001").get should be (2)
+ Metric.compare("2173896", "2233796").get should be (3)
+ }
+ }
+ }
+ }
+ "HammingMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ HammingMetric.compare("2173896", "2233796").get should be (3)
+ }
+ }
+ }
+}
+
+object HammingMetricSpec {
+ private final val Metric = HammingMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala
new file mode 100755
index 0000000..17bc3ef
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala
@@ -0,0 +1,77 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class JaccardMetricSpec extends ScalaTest {
+ import JaccardMetricSpec.Metric
+
+ "JaccardMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "")(1).isDefined should be (false)
+ Metric.compare("abc", "")(1).isDefined should be (false)
+ Metric.compare("", "xyz")(1).isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("abc", "abc")(1).get should be (1)
+ Metric.compare("abc", "abc")(2).get should be (1)
+ Metric.compare("abc", "abc")(3).get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz")(1).get should be (0)
+ Metric.compare("abc", "xyz")(2).get should be (0)
+ Metric.compare("abc", "xyz")(3).get should be (0)
+ }
+ }
+ "invalid arguments" should returns {
+ "None" in {
+ Metric.compare("n", "naght")(2).isDefined should be (false)
+ Metric.compare("night", "n")(2).isDefined should be (false)
+ Metric.compare("ni", "naght")(3).isDefined should be (false)
+ Metric.compare("night", "na")(3).isDefined should be (false)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("night", "nacht")(1).get should be (0.42857142857142855)
+ Metric.compare("night", "naght")(1).get should be (0.6666666666666666)
+ Metric.compare("context", "contact")(1).get should be (0.5555555555555556)
+
+ Metric.compare("night", "nacht")(2).get should be (0.14285714285714285)
+ Metric.compare("night", "naght")(2).get should be (0.3333333333333333)
+ Metric.compare("context", "contact")(2).get should be (0.3333333333333333)
+ Metric.compare("contextcontext", "contact")(2).get should be (0.1875)
+ Metric.compare("context", "contactcontact")(2).get should be (0.1875)
+ Metric.compare("ht", "nacht")(2).get should be (0.25)
+ Metric.compare("xp", "nacht")(2).get should be (0)
+ Metric.compare("ht", "hththt")(2).get should be (0.2)
+
+ Metric.compare("night", "nacht")(3).get should be (0)
+ Metric.compare("night", "naght")(3).get should be (0.2)
+ Metric.compare("context", "contact")(3).get should be (0.25)
+ }
+ }
+ }
+ }
+ "JaccardMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ JaccardMetric.compare("context", "contact")(3).get should be (0.25)
+ }
+ }
+ }
+}
+
+object JaccardMetricSpec {
+ private final val Metric = JaccardMetric()
+}
+
+
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala
new file mode 100755
index 0000000..00f4daf
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala
@@ -0,0 +1,66 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class JaroMetricSpec extends ScalaTest {
+ import JaroMetricSpec.Metric
+
+ "JaroMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("a", "a").get should be (1)
+ Metric.compare("abc", "abc").get should be (1)
+ Metric.compare("123", "123").get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz").get should be (0)
+ Metric.compare("123", "456").get should be (0)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("aa", "a").get should be (0.8333333333333334)
+ Metric.compare("a", "aa").get should be (0.8333333333333334)
+ Metric.compare("veryveryverylong", "v").get should be (0.6875)
+ Metric.compare("v", "veryveryverylong").get should be (0.6875)
+ Metric.compare("martha", "marhta").get should be (0.9444444444444445)
+ Metric.compare("dwayne", "duane").get should be (0.8222222222222223)
+ Metric.compare("dixon", "dicksonx").get should be (0.7666666666666666)
+ Metric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334)
+ Metric.compare("jones", "johnson").get should be (0.7904761904761904)
+ Metric.compare("henka", "henkan").get should be (0.9444444444444445)
+ Metric.compare("fvie", "ten").get should be (0)
+
+ Metric.compare("zac ephron", "zac efron").get should be >
+ Metric.compare("zac ephron", "kai ephron").get
+ Metric.compare("brittney spears", "britney spears").get should be >
+ Metric.compare("brittney spears", "brittney startzman").get
+ }
+ }
+ }
+ }
+ "JaroMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ JaroMetric.compare("fvie", "ten").get should be (0)
+ }
+ }
+ }
+}
+
+object JaroMetricSpec {
+ private final val Metric = JaroMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala
new file mode 100755
index 0000000..06421a8
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala
@@ -0,0 +1,66 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class JaroWinklerMetricSpec extends ScalaTest {
+ import JaroWinklerMetricSpec.Metric
+
+ "JaroWinklerMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("a", "a").get should be (1)
+ Metric.compare("abc", "abc").get should be (1)
+ Metric.compare("123", "123").get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz").get should be (0)
+ Metric.compare("123", "456").get should be (0)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("aa", "a").get should be (0.8500000000000001)
+ Metric.compare("a", "aa").get should be (0.8500000000000001)
+ Metric.compare("veryveryverylong", "v").get should be (0.71875)
+ Metric.compare("v", "veryveryverylong").get should be (0.71875)
+ Metric.compare("martha", "marhta").get should be (0.9611111111111111)
+ Metric.compare("dwayne", "duane").get should be (0.8400000000000001)
+ Metric.compare("dixon", "dicksonx").get should be (0.8133333333333332)
+ Metric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334)
+ Metric.compare("jones", "johnson").get should be (0.8323809523809523)
+ Metric.compare("henka", "henkan").get should be (0.9666666666666667)
+ Metric.compare("fvie", "ten").get should be (0)
+
+ Metric.compare("zac ephron", "zac efron").get should be >
+ Metric.compare("zac ephron", "kai ephron").get
+ Metric.compare("brittney spears", "britney spears").get should be >
+ Metric.compare("brittney spears", "brittney startzman").get
+ }
+ }
+ }
+ }
+ "JaroWinklerMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ JaroWinklerMetric.compare("fvie", "ten").get should be (0)
+ }
+ }
+ }
+}
+
+object JaroWinklerMetricSpec {
+ private final val Metric = JaroWinklerMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala
new file mode 100755
index 0000000..51de2ca
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala
@@ -0,0 +1,65 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class LevenshteinMetricSpec extends ScalaTest {
+ import LevenshteinMetricSpec.Metric
+
+ "LevenshteinMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "abc").get should be (0)
+ Metric.compare("123", "123").get should be (0)
+ }
+ }
+ "unequal arguments" should returns {
+ "Int indicating distance" in {
+ Metric.compare("abc", "xyz").get should be (3)
+ Metric.compare("123", "456").get should be (3)
+ }
+ }
+ "valid arguments" should returns {
+ "Int indicating distance" in {
+ Metric.compare("abc", "a").get should be (2)
+ Metric.compare("a", "abc").get should be (2)
+ Metric.compare("abc", "c").get should be (2)
+ Metric.compare("c", "abc").get should be (2)
+ Metric.compare("sitting", "kitten").get should be (3)
+ Metric.compare("kitten", "sitting").get should be (3)
+ Metric.compare("cake", "drake").get should be (2)
+ Metric.compare("drake", "cake").get should be (2)
+ Metric.compare("saturday", "sunday").get should be (3)
+ Metric.compare("sunday", "saturday").get should be (3)
+ Metric.compare("book", "back").get should be (2)
+ Metric.compare("dog", "fog").get should be (1)
+ Metric.compare("foq", "fog").get should be (1)
+ Metric.compare("fvg", "fog").get should be (1)
+ Metric.compare("encyclopedia", "encyclopediaz").get should be (1)
+ Metric.compare("encyclopediz", "encyclopediaz").get should be (1)
+ }
+ }
+ }
+ }
+ "LevenshteinMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ LevenshteinMetric.compare("fvg", "fog").get should be (1)
+ }
+ }
+ }
+}
+
+object LevenshteinMetricSpec {
+ private final val Metric = LevenshteinMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala
new file mode 100755
index 0000000..39d97e5
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala
@@ -0,0 +1,75 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class NGramMetricSpec extends ScalaTest {
+ import NGramMetricSpec.Metric
+
+ "NGramMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "")(1).isDefined should be (false)
+ Metric.compare("abc", "")(1).isDefined should be (false)
+ Metric.compare("", "xyz")(1).isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("abc", "abc")(1).get should be (1)
+ Metric.compare("abc", "abc")(2).get should be (1)
+ Metric.compare("abc", "abc")(3).get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz")(1).get should be (0)
+ Metric.compare("abc", "xyz")(2).get should be (0)
+ Metric.compare("abc", "xyz")(3).get should be (0)
+ }
+ }
+ "invalid arguments" should returns {
+ "None" in {
+ Metric.compare("n", "naght")(2).isDefined should be (false)
+ Metric.compare("night", "n")(2).isDefined should be (false)
+ Metric.compare("ni", "naght")(3).isDefined should be (false)
+ Metric.compare("night", "na")(3).isDefined should be (false)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("night", "nacht")(1).get should be (0.6)
+ Metric.compare("night", "naght")(1).get should be (0.8)
+ Metric.compare("context", "contact")(1).get should be (0.7142857142857143)
+
+ Metric.compare("night", "nacht")(2).get should be (0.25)
+ Metric.compare("night", "naght")(2).get should be (0.5)
+ Metric.compare("context", "contact")(2).get should be (0.5)
+ Metric.compare("contextcontext", "contact")(2).get should be (0.23076923076923078)
+ Metric.compare("context", "contactcontact")(2).get should be (0.23076923076923078)
+ Metric.compare("ht", "nacht")(2).get should be (0.25)
+ Metric.compare("xp", "nacht")(2).get should be (0)
+ Metric.compare("ht", "hththt")(2).get should be (0.2)
+
+ Metric.compare("night", "nacht")(3).get should be (0)
+ Metric.compare("night", "naght")(3).get should be (0.3333333333333333)
+ Metric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+ }
+ "NGramMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ NGramMetric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+}
+
+object NGramMetricSpec {
+ private final val Metric = NGramMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala
new file mode 100755
index 0000000..32c9650
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala
@@ -0,0 +1,77 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class OverlapMetricSpec extends ScalaTest {
+ import OverlapMetricSpec.Metric
+
+ "OverlapMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "")(1).isDefined should be (false)
+ Metric.compare("abc", "")(1).isDefined should be (false)
+ Metric.compare("", "xyz")(1).isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "1" in {
+ Metric.compare("abc", "abc")(1).get should be (1)
+ Metric.compare("abc", "abc")(2).get should be (1)
+ Metric.compare("abc", "abc")(3).get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "xyz")(1).get should be (0)
+ Metric.compare("abc", "xyz")(2).get should be (0)
+ Metric.compare("abc", "xyz")(3).get should be (0)
+ }
+ }
+ "invalid arguments" should returns {
+ "None" in {
+ Metric.compare("n", "naght")(2).isDefined should be (false)
+ Metric.compare("night", "n")(2).isDefined should be (false)
+ Metric.compare("ni", "naght")(3).isDefined should be (false)
+ Metric.compare("night", "na")(3).isDefined should be (false)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("bob", "bobman") (1).get should be (1)
+ Metric.compare("bob", "manbobman") (1).get should be (1)
+ Metric.compare("night", "nacht")(1).get should be (0.6)
+ Metric.compare("night", "naght")(1).get should be (0.8)
+ Metric.compare("context", "contact")(1).get should be (0.7142857142857143)
+
+ Metric.compare("night", "nacht")(2).get should be (0.25)
+ Metric.compare("night", "naght")(2).get should be (0.5)
+ Metric.compare("context", "contact")(2).get should be (0.5)
+ Metric.compare("contextcontext", "contact")(2).get should be (0.5)
+ Metric.compare("context", "contactcontact")(2).get should be (0.5)
+ Metric.compare("ht", "nacht")(2).get should be (1)
+ Metric.compare("xp", "nacht")(2).get should be (0)
+ Metric.compare("ht", "hththt")(2).get should be (1)
+
+ Metric.compare("night", "nacht")(3).get should be (0)
+ Metric.compare("night", "naght")(3).get should be (0.3333333333333333)
+ Metric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+ }
+ "OverlapMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ OverlapMetric.compare("context", "contact")(3).get should be (0.4)
+ }
+ }
+ }
+}
+
+object OverlapMetricSpec {
+ private final val Metric = OverlapMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
new file mode 100755
index 0000000..638536f
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
@@ -0,0 +1,56 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class RatcliffObershelpMetricSpec extends ScalaTest {
+ import RatcliffObershelpMetricSpec.Metric
+
+ "RatcliffObershelpMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "").isDefined should be (false)
+ Metric.compare("abc", "").isDefined should be (false)
+ Metric.compare("", "xyz").isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "abc").get should be (1)
+ Metric.compare("123", "123").get should be (1)
+ }
+ }
+ "unequal arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("abc", "xyz").get should be (0)
+ Metric.compare("123", "456").get should be (0)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("aleksander", "alexandre").get should be (0.7368421052631579)
+ Metric.compare("alexandre", "aleksander").get should be (0.7368421052631579)
+ Metric.compare("pennsylvania", "pencilvaneya").get should be (0.6666666666666666)
+ Metric.compare("pencilvaneya", "pennsylvania").get should be (0.6666666666666666)
+ Metric.compare("abcefglmn", "abefglmo").get should be (0.8235294117647058)
+ Metric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058)
+ }
+ }
+ }
+ }
+ "RatcliffObershelpMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ RatcliffObershelpMetric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058)
+ }
+ }
+ }
+}
+
+object RatcliffObershelpMetricSpec {
+ private final val Metric = RatcliffObershelpMetric()
+}
+
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala
new file mode 100755
index 0000000..9c46c89
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala
@@ -0,0 +1,64 @@
+package com.rockymadden.stringmetric.similarity
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class WeightedLevenshteinMetricSpec extends ScalaTest {
+ import WeightedLevenshteinMetricSpec.{Metric, Options}
+
+ "WeightedLevenshteinMetric" should provide {
+ "compare method" when passed {
+ "empty arguments" should returns {
+ "None" in {
+ Metric.compare("", "")(Options).isDefined should be (false)
+ Metric.compare("abc", "")(Options).isDefined should be (false)
+ Metric.compare("", "xyz")(Options).isDefined should be (false)
+ }
+ }
+ "equal arguments" should returns {
+ "0" in {
+ Metric.compare("abc", "abc")(Options).get should be (0)
+ Metric.compare("123", "123")(Options).get should be (0)
+ }
+ }
+ "unequal arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("abc", "xyz")(Options).get should be (3)
+ Metric.compare("123", "456")(Options).get should be (3)
+ }
+ }
+ "valid arguments" should returns {
+ "Double indicating distance" in {
+ Metric.compare("az", "z")(Options).get should be (10)
+ Metric.compare("z", "az")(Options).get should be (0.1)
+ Metric.compare("a", "z")(Options).get should be (1)
+ Metric.compare("z", "a")(Options).get should be (1)
+ Metric.compare("ab", "yz")(Options).get should be (2)
+ Metric.compare("yz", "ab")(Options).get should be (2)
+ Metric.compare("0", "0123456789")(Options).get should be (0.9)
+ Metric.compare("0123456789", "0")(Options).get should be (90)
+ Metric.compare("book", "back")(Options).get should be (2)
+ Metric.compare("back", "book")(Options).get should be (2)
+ Metric.compare("hosp", "hospital")(Options).get should be (0.4)
+ Metric.compare("hospital", "hosp")(Options).get should be (40)
+ Metric.compare("clmbs blvd", "columbus boulevard")(Options).get should be (0.8)
+ Metric.compare("columbus boulevard", "clmbs blvd")(Options).get should be (80)
+ }
+ }
+ }
+ }
+ "WeightedLevenshteinMetric companion object" should provide {
+ "pass-through compare method" should returns {
+ "same value as class" in {
+ WeightedLevenshteinMetric.compare("hospital", "hosp")(Options).get should be (40)
+ }
+ }
+ }
+}
+
+object WeightedLevenshteinMetricSpec {
+ private final val Options = Tuple3[BigDecimal, BigDecimal, BigDecimal](10, 0.1, 1)
+ private final val Metric = WeightedLevenshteinMetric()
+}
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala
new file mode 100755
index 0000000..56fdc13
--- /dev/null
+++ b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala
@@ -0,0 +1,69 @@
+package com.rockymadden.stringmetric.tokenization
+
+import com.rockymadden.stringmetric.ScalaTest
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+final class NGramTokenizerSpec extends ScalaTest {
+ import NGramTokenizerSpec.Tokenizer
+
+ "NGramTokenizer" should provide {
+ "tokenize method" when passed {
+ "empty argument" should returns {
+ "None" in {
+ Tokenizer.tokenize("")(1).isDefined should be (false)
+ }
+ }
+ "invalid n argument" should throws {
+ "IllegalArgumentException" in {
+ evaluating {
+ Tokenizer.tokenize("")(0).isDefined should be (false)
+ } should produce [IllegalArgumentException]
+
+ evaluating {
+ Tokenizer.tokenize("")(-1).isDefined should be (false)
+ } should produce [IllegalArgumentException]
+ }
+ }
+ "valid argument" should returns {
+ "Array[String]" in {
+ Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal (
+ Array(
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+ "s", "t", "u", "v", "w", "x", "y", "z"
+ )
+ )
+ Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(2).get should equal (
+ Array(
+ "ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op",
+ "pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz"
+ )
+ )
+ Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(3).get should equal (
+ Array(
+ "abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno",
+ "nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz"
+ )
+ )
+ }
+ }
+ }
+ }
+ "NGramTokenizer companion object" should provide {
+ "pass-through tokenize method" should returns {
+ "same value as class" in {
+ NGramTokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal (
+ Array(
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+ "s", "t", "u", "v", "w", "x", "y", "z"
+ )
+ )
+ }
+ }
+ }
+}
+
+object NGramTokenizerSpec {
+ private final val Tokenizer = NGramTokenizer()
+}