From ab32aed4dfa68df86c00dd0c75a41932d16a659c Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Wed, 27 Nov 2013 08:03:12 -0700 Subject: Preferred minimal repetition naming. --- core/build.gradle | 27 +++ .../stringmetric/CaliperBenchmark.scala | 7 + .../rockymadden/stringmetric/CaliperRunner.scala | 7 + .../phonetic/MetaphoneAlgorithmBenchmark.scala | 32 +++ .../phonetic/MetaphoneMetricBenchmark.scala | 55 +++++ .../phonetic/NysiisAlgorithmBenchmark.scala | 32 +++ .../phonetic/NysiisMetricBenchmark.scala | 55 +++++ .../phonetic/RefinedNysiisAlgorithmBenchmark.scala | 32 +++ .../phonetic/RefinedNysiisMetricBenchmark.scala | 55 +++++ .../RefinedSoundexAlgorithmBenchmark.scala | 32 +++ .../phonetic/RefinedSoundexMetricBenchmark.scala | 55 +++++ .../phonetic/SoundexAlgorithmBenchmark.scala | 32 +++ .../phonetic/SoundexMetricBenchmark.scala | 55 +++++ .../similarity/DiceSorensenMetricBenchmark.scala | 55 +++++ .../similarity/HammingMetricBenchmark.scala | 55 +++++ .../similarity/JaccardMetricBenchmark.scala | 55 +++++ .../similarity/JaroMetricBenchmark.scala | 55 +++++ .../similarity/JaroWinklerMetricBenchmark.scala | 55 +++++ .../similarity/LevenshteinMetricBenchmark.scala | 55 +++++ .../similarity/NGramMetricBenchmark.scala | 58 ++++++ .../similarity/OverlapMetricBenchmark.scala | 55 +++++ .../RatcliffObershelpMetricBenchmark.scala | 55 +++++ .../WeightedLevenshteinMetricBenchmark.scala | 55 +++++ .../tokenization/NGramTokenizerBenchmark.scala | 35 ++++ .../com/rockymadden/stringmetric/Algorithm.scala | 5 + .../com/rockymadden/stringmetric/Alphabet.scala | 55 +++++ .../com/rockymadden/stringmetric/Filter.scala | 5 + .../com/rockymadden/stringmetric/Filterable.scala | 5 + .../com/rockymadden/stringmetric/Metric.scala | 5 + .../rockymadden/stringmetric/StringAlgorithm.scala | 42 ++++ .../rockymadden/stringmetric/StringFilter.scala | 45 ++++ .../stringmetric/StringFilterable.scala | 5 + .../rockymadden/stringmetric/StringMetric.scala | 120 +++++++++++ .../rockymadden/stringmetric/StringTokenizer.scala | 14 ++ .../com/rockymadden/stringmetric/Tokenizer.scala | 5 + .../stringmetric/filter/AsciiControlFilter.scala | 11 + .../filter/AsciiControlOnlyFilter.scala | 11 + .../stringmetric/filter/AsciiLetterFilter.scala | 11 + .../filter/AsciiLetterNumberFilter.scala | 15 ++ .../filter/AsciiLetterNumberOnlyFilter.scala | 15 ++ .../filter/AsciiLetterOnlyFilter.scala | 11 + .../stringmetric/filter/AsciiNumberFilter.scala | 11 + .../filter/AsciiNumberOnlyFilter.scala | 11 + .../stringmetric/filter/AsciiSpaceFilter.scala | 10 + .../stringmetric/filter/AsciiSymbolFilter.scala | 15 ++ .../filter/AsciiSymbolOnlyFilter.scala | 15 ++ .../filter/IgnoreAsciiLetterCaseFilter.scala | 11 + .../stringmetric/filter/StringFilterDelegate.scala | 9 + .../com/rockymadden/stringmetric/package.scala | 7 + .../stringmetric/phonetic/MetaphoneAlgorithm.scala | 122 +++++++++++ .../stringmetric/phonetic/MetaphoneMetric.scala | 32 +++ .../stringmetric/phonetic/NysiisAlgorithm.scala | 131 ++++++++++++ .../stringmetric/phonetic/NysiisMetric.scala | 40 ++++ .../phonetic/RefinedNysiisAlgorithm.scala | 135 ++++++++++++ .../phonetic/RefinedNysiisMetric.scala | 40 ++++ .../phonetic/RefinedSoundexAlgorithm.scala | 75 +++++++ .../phonetic/RefinedSoundexMetric.scala | 33 +++ .../stringmetric/phonetic/SoundexAlgorithm.scala | 73 +++++++ .../stringmetric/phonetic/SoundexMetric.scala | 33 +++ .../similarity/DiceSorensenMetric.scala | 42 ++++ .../stringmetric/similarity/HammingMetric.scala | 37 ++++ .../stringmetric/similarity/JaccardMetric.scala | 37 ++++ .../stringmetric/similarity/JaroMetric.scala | 87 ++++++++ .../similarity/JaroWinklerMetric.scala | 40 ++++ .../similarity/LevenshteinMetric.scala | 58 ++++++ .../stringmetric/similarity/NGramMetric.scala | 40 ++++ .../stringmetric/similarity/OverlapMetric.scala | 40 ++++ .../similarity/RatcliffObershelpMetric.scala | 57 ++++++ .../similarity/WeightedLevenshteinMetric.scala | 61 ++++++ .../stringmetric/tokenization/NGramTokenizer.scala | 37 ++++ .../rockymadden/stringmetric/AlphabetSpec.scala | 96 +++++++++ .../stringmetric/FilterDecoratedSpec.scala | 38 ++++ .../com/rockymadden/stringmetric/ScalaTest.scala | 18 ++ .../stringmetric/StringAlgorithmSpec.scala | 59 ++++++ .../stringmetric/StringMetricSpec.scala | 141 +++++++++++++ .../stringmetric/StringTokenizerSpec.scala | 23 +++ .../filter/AsciiControlFilterSpec.scala | 33 +++ .../filter/AsciiControlOnlyFilterSpec.scala | 33 +++ .../filter/AsciiLetterFilterSpec.scala | 29 +++ .../filter/AsciiLetterNumberFilterSpec.scala | 33 +++ .../filter/AsciiLetterNumberOnlyFilterSpec.scala | 35 ++++ .../filter/AsciiLetterOnlyFilterSpec.scala | 33 +++ .../filter/AsciiNumberFilterSpec.scala | 33 +++ .../filter/AsciiNumberOnlyFilterSpec.scala | 33 +++ .../stringmetric/filter/AsciiSpaceFilterSpec.scala | 37 ++++ .../filter/AsciiSymbolFilterSpec.scala | 31 +++ .../filter/AsciiSymbolOnlyFilterSpec.scala | 33 +++ .../filter/IgnoreAsciiLetterCaseFilterSpec.scala | 41 ++++ .../filter/StringFilterDelegateSpec.scala | 31 +++ .../phonetic/MetaphoneAlgorithmSpec.scala | 226 +++++++++++++++++++++ .../phonetic/MetaphoneMetricSpec.scala | 54 +++++ .../phonetic/NysiisAlgorithmSpec.scala | 204 +++++++++++++++++++ .../stringmetric/phonetic/NysiisMetricSpec.scala | 50 +++++ .../phonetic/RefinedNysiisAlgorithmSpec.scala | 221 ++++++++++++++++++++ .../phonetic/RefinedNysiisMetricSpec.scala | 50 +++++ .../phonetic/RefinedSoundexAlgorithmSpec.scala | 175 ++++++++++++++++ .../phonetic/RefinedSoundexMetricSpec.scala | 50 +++++ .../phonetic/SoundexAlgorithmSpec.scala | 174 ++++++++++++++++ .../stringmetric/phonetic/SoundexMetricSpec.scala | 50 +++++ .../similarity/DiceSorensenMetricSpec.scala | 75 +++++++ .../similarity/HammingMetricSpec.scala | 52 +++++ .../similarity/JaccardMetricSpec.scala | 77 +++++++ .../stringmetric/similarity/JaroMetricSpec.scala | 66 ++++++ .../similarity/JaroWinklerMetricSpec.scala | 66 ++++++ .../similarity/LevenshteinMetricSpec.scala | 65 ++++++ .../stringmetric/similarity/NGramMetricSpec.scala | 75 +++++++ .../similarity/OverlapMetricSpec.scala | 77 +++++++ .../similarity/RatcliffObershelpMetricSpec.scala | 56 +++++ .../similarity/WeightedLevenshteinMetricSpec.scala | 64 ++++++ .../tokenization/NGramTokenizerSpec.scala | 69 +++++++ 110 files changed, 5589 insertions(+) create mode 100755 core/build.gradle create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Filter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Metric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/package.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala create mode 100755 core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala create mode 100755 core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala (limited to 'core') diff --git a/core/build.gradle b/core/build.gradle new file mode 100755 index 0000000..5c92253 --- /dev/null +++ b/core/build.gradle @@ -0,0 +1,27 @@ +apply from: '../deploy.gradle' + +dependencies { testCompile 'com.google.caliper:caliper:0.5-rc1' } + +sourceSets { + benchmark { + output.resourcesDir "${project.buildDir}/classes/benchmark" + + java { srcDir 'source/benchmark/java' } + resources { srcDir 'source/benchmark/resource' } + scala { srcDir 'source/benchmark/scala' } + } + main { + output.resourcesDir "${project.buildDir}/classes/main" + + java { srcDir 'source/core/java' } + resources { srcDir 'source/core/resource' } + scala { srcDir 'source/core/scala' } + } + test { + output.resourcesDir "${project.buildDir}/classes/test" + + java { srcDir 'source/test/java' } + resources { srcDir 'source/test/resource' } + scala { srcDir 'source/test/scala' } + } +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala new file mode 100755 index 0000000..55a6238 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala @@ -0,0 +1,7 @@ +package com.rockymadden.stringmetric + +import com.google.caliper.SimpleBenchmark + +trait CaliperBenchmark extends SimpleBenchmark { + def run(reps: Int)(code: => Unit) = (0 until reps).foreach(i => code) +} \ No newline at end of file diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala new file mode 100755 index 0000000..4474a8d --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala @@ -0,0 +1,7 @@ +package com.rockymadden.stringmetric + +import com.google.caliper.{Benchmark, Runner} + +abstract class CaliperRunner(private[this] val suite: java.lang.Class[_ <: Benchmark]) { + def main(args: Array[String]): Unit = Runner.main(suite, args) +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala new file mode 100755 index 0000000..bfc17f2 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class MetaphoneAlgorithmBenchmark extends CaliperBenchmark { + import MetaphoneAlgorithmBenchmark.Algorithm + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Algorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Algorithm.compute(string) + } +} + +object MetaphoneAlgorithmBenchmark extends CaliperRunner(classOf[MetaphoneAlgorithmBenchmark]) { + private final val Algorithm = MetaphoneAlgorithm() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala new file mode 100755 index 0000000..147ab54 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class MetaphoneMetricBenchmark extends CaliperBenchmark { + import MetaphoneMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object MetaphoneMetricBenchmark extends CaliperRunner(classOf[MetaphoneMetricBenchmark]) { + private final val Metric = MetaphoneMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala new file mode 100755 index 0000000..79f9e16 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class NysiisAlgorithmBenchmark extends CaliperBenchmark { + import NysiisAlgorithmBenchmark.Algorithm + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Algorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Algorithm.compute(string) + } +} + +object NysiisAlgorithmBenchmark extends CaliperRunner(classOf[NysiisAlgorithmBenchmark]) { + private final val Algorithm = NysiisAlgorithm() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala new file mode 100755 index 0000000..620d054 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class NysiisMetricBenchmark extends CaliperBenchmark { + import NysiisMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object NysiisMetricBenchmark extends CaliperRunner(classOf[NysiisMetricBenchmark]) { + private final val Metric = NysiisMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala new file mode 100755 index 0000000..885bc3a --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class RefinedNysiisAlgorithmBenchmark extends CaliperBenchmark { + import RefinedNysiisAlgorithmBenchmark.Algorithm + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Algorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Algorithm.compute(string) + } +} + +object RefinedNysiisAlgorithmBenchmark extends CaliperRunner(classOf[RefinedNysiisAlgorithmBenchmark]) { + private final val Algorithm = RefinedNysiisAlgorithm() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala new file mode 100755 index 0000000..d0da4e5 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class RefinedNysiisMetricBenchmark extends CaliperBenchmark { + import RefinedNysiisMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object RefinedNysiisMetricBenchmark extends CaliperRunner(classOf[RefinedNysiisMetricBenchmark]) { + private final val Metric = RefinedNysiisMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..99aa3df --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class RefinedSoundexAlgorithmBenchmark extends CaliperBenchmark { + import RefinedSoundexAlgorithmBenchmark.Algorithm + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Algorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Algorithm.compute(string) + } +} + +object RefinedSoundexAlgorithmBenchmark extends CaliperRunner(classOf[RefinedSoundexAlgorithmBenchmark]) { + private final val Algorithm = RefinedSoundexAlgorithm() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala new file mode 100755 index 0000000..8e24650 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class RefinedSoundexMetricBenchmark extends CaliperBenchmark { + import RefinedSoundexMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object RefinedSoundexMetricBenchmark extends CaliperRunner(classOf[RefinedSoundexMetricBenchmark]) { + private final val Metric = RefinedSoundexMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..5c195a6 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class SoundexAlgorithmBenchmark extends CaliperBenchmark { + import SoundexAlgorithmBenchmark.Algorithm + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Algorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Algorithm.compute(string) + } +} + +object SoundexAlgorithmBenchmark extends CaliperRunner(classOf[SoundexAlgorithmBenchmark]) { + private final val Algorithm = SoundexAlgorithm() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala new file mode 100755 index 0000000..6534d72 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class SoundexMetricBenchmark extends CaliperBenchmark { + import SoundexMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object SoundexMetricBenchmark extends CaliperRunner(classOf[SoundexMetricBenchmark]) { + private final val Metric = SoundexMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala new file mode 100755 index 0000000..88f5d4c --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class DiceSorensenMetricBenchmark extends CaliperBenchmark { + import DiceSorensenMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2)(2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2)(2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1)(2) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1)(2) + } +} + +object DiceSorensenMetricBenchmark extends CaliperRunner(classOf[DiceSorensenMetricBenchmark]) { + private final val Metric = DiceSorensenMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala new file mode 100755 index 0000000..60ef7d4 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class HammingMetricBenchmark extends CaliperBenchmark { + import HammingMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object HammingMetricBenchmark extends CaliperRunner(classOf[HammingMetricBenchmark]) { + private final val Metric = HammingMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala new file mode 100755 index 0000000..3f6a59f --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class JaccardMetricBenchmark extends CaliperBenchmark { + import JaccardMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2)(2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2)(2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1)(2) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1)(2) + } +} + +object JaccardMetricBenchmark extends CaliperRunner(classOf[JaccardMetricBenchmark]) { + private final val Metric = JaccardMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala new file mode 100755 index 0000000..c9c9b2c --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class JaroMetricBenchmark extends CaliperBenchmark { + import JaroMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object JaroMetricBenchmark extends CaliperRunner(classOf[JaroMetricBenchmark]) { + private final val Metric = JaroMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala new file mode 100755 index 0000000..b3da154 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class JaroWinklerMetricBenchmark extends CaliperBenchmark { + import JaroWinklerMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object JaroWinklerMetricBenchmark extends CaliperRunner(classOf[JaroWinklerMetricBenchmark]) { + private final val Metric = JaroWinklerMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..d47e138 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class LevenshteinMetricBenchmark extends CaliperBenchmark { + import LevenshteinMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object LevenshteinMetricBenchmark extends CaliperRunner(classOf[LevenshteinMetricBenchmark]) { + private final val Metric = LevenshteinMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala new file mode 100755 index 0000000..4250e66 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala @@ -0,0 +1,58 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class NGramMetricBenchmark extends CaliperBenchmark { + import NGramMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2)(n) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2)(n) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1)(n) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1)(n) + } +} + +object NGramMetricBenchmark extends CaliperRunner(classOf[NGramMetricBenchmark]) { + private final val Metric = NGramMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala new file mode 100755 index 0000000..4d7ce0a --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class OverlapMetricBenchmark extends CaliperBenchmark { + import OverlapMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2)(2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2)(2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1)(2) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1)(2) + } +} + +object OverlapMetricBenchmark extends CaliperRunner(classOf[OverlapMetricBenchmark]) { + private final val Metric = OverlapMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala new file mode 100755 index 0000000..edc1527 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class RatcliffObershelpMetricBenchmark extends CaliperBenchmark { + import RatcliffObershelpMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1) + } +} + +object RatcliffObershelpMetricBenchmark extends CaliperRunner(classOf[RatcliffObershelpMetricBenchmark]) { + private final val Metric = RatcliffObershelpMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..b511654 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class WeightedLevenshteinMetricBenchmark extends CaliperBenchmark { + import WeightedLevenshteinMetricBenchmark.Metric + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray2)(1, 1, 1) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + Metric.compare(string1, string2)(1, 1, 1) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + Metric.compare(charArray1, charArray1)(1, 1, 1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + Metric.compare(string1, string1)(1, 1, 1) + } +} + +object WeightedLevenshteinMetricBenchmark extends CaliperRunner(classOf[WeightedLevenshteinMetricBenchmark]) { + private final val Metric = WeightedLevenshteinMetric() +} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala new file mode 100755 index 0000000..7e62662 --- /dev/null +++ b/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerBenchmark.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric.tokenization + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class NGramTokenizerBenchmark extends CaliperBenchmark { + import NGramTokenizerBenchmark.Tokenizer + + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + Tokenizer.tokenize(charArray)(n) + } + + def timeComputeWithString(reps: Int) = run(reps) { + Tokenizer.tokenize(string)(n) + } +} + +object NGramTokenizerBenchmark extends CaliperRunner(classOf[NGramTokenizerBenchmark]) { + private final val Tokenizer = NGramTokenizer() +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala new file mode 100755 index 0000000..10bc2cd --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Algorithm.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Algorithm[A, B, C] { + def compute(a: A)(implicit b: B): Option[C] +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala new file mode 100755 index 0000000..d2ede81 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Alphabet.scala @@ -0,0 +1,55 @@ +package com.rockymadden.stringmetric + +import scala.collection.immutable.Set + +object Alphabet { + protected sealed abstract class AlphabetSet { + protected[Alphabet] val Chars: Set[Char] + + def isSuperset(char: Char): Boolean = Chars.contains(char) + + def isSuperset(charArray: Array[Char]): Boolean = + charArray.length > 0 && charArray.takeWhile(Chars.contains(_)).length == charArray.length + + def isSuperset(string: String): Boolean = isSuperset(string.toCharArray) + } + + case object LowercaseConsonant extends AlphabetSet { + override protected[Alphabet] final val Chars = + Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') + } + case object UppercaseConsonant extends AlphabetSet { + override protected[Alphabet] final val Chars = + Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') + } + case object Consonant extends AlphabetSet { + override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ UppercaseConsonant.Chars + } + case object LowercaseVowel extends AlphabetSet { + override protected[Alphabet] final val Chars = Set('a', 'e', 'i', 'o', 'u') + } + case object UppercaseVowel extends AlphabetSet { + override protected[Alphabet] final val Chars = Set('A', 'E', 'I', 'O', 'U') + } + case object Vowel extends AlphabetSet { + override protected[Alphabet] final val Chars = LowercaseVowel.Chars ++ UppercaseVowel.Chars + } + case object LowercaseY extends AlphabetSet { + override protected[Alphabet] final val Chars = Set('y') + } + case object UppercaseY extends AlphabetSet { + override protected[Alphabet] final val Chars = Set('Y') + } + case object Y extends AlphabetSet { + override protected[Alphabet] final val Chars = LowercaseY.Chars ++ UppercaseY.Chars + } + case object LowercaseAlpha extends AlphabetSet { + override protected[Alphabet] final val Chars = LowercaseConsonant.Chars ++ LowercaseVowel.Chars ++ LowercaseY.Chars + } + case object UppercaseAlpha extends AlphabetSet { + override protected[Alphabet] final val Chars = UppercaseConsonant.Chars ++ UppercaseVowel.Chars ++ UppercaseY.Chars + } + case object Alpha extends AlphabetSet { + override protected[Alphabet] final val Chars = LowercaseAlpha.Chars ++ UppercaseAlpha.Chars + } +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala b/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala new file mode 100755 index 0000000..2a02f6b --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Filter.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Filter[A] extends Filterable[A] { + override def filter(a: A): A = a +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala b/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala new file mode 100755 index 0000000..77dc0bf --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Filterable.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Filterable[A] { + def filter(a: A): A +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala new file mode 100755 index 0000000..6862321 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Metric.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Metric[A, B, C] { + def compare(a1: A, a2: A)(implicit b: B): Option[C] +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala new file mode 100755 index 0000000..0d194da --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/StringAlgorithm.scala @@ -0,0 +1,42 @@ +package com.rockymadden.stringmetric + +trait StringAlgorithm[A, B] extends Algorithm[String, A, B] { + def compute(charArray: Array[Char])(implicit a: A): Option[Array[Char]] +} + +object StringAlgorithm { + type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm + val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm + + type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm + val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisAlgorithm + + type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm + val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm + + type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm + val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm + + type Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm + val Soundex = com.rockymadden.stringmetric.phonetic.SoundexAlgorithm + + def computeWithMetaphone(charArray: Array[Char]) = Metaphone.compute(charArray) + + def computeWithMetaphone(string: String) = Metaphone.compute(string) + + def computeWithNysiis(charArray: Array[Char]) = Nysiis.compute(charArray) + + def computeWithNysiis(string: String) = Nysiis.compute(string) + + def computeWithRefinedNysiis(charArray: Array[Char]) = RefinedNysiis.compute(charArray) + + def computeWithRefinedNysiis(string: String) = RefinedNysiis.compute(string) + + def computeWithRefinedSoundex(charArray: Array[Char]) = RefinedSoundex.compute(charArray) + + def computeWithRefinedSoundex(string: String) = RefinedSoundex.compute(string) + + def computeWithSoundex(charArray: Array[Char]) = Soundex.compute(charArray) + + def computeWithSoundex(string: String) = Soundex.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala new file mode 100755 index 0000000..1430d34 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/StringFilter.scala @@ -0,0 +1,45 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.filter.StringFilterDelegate + +trait StringFilter extends Filter[String] with StringFilterable { + override def filter(charArray: Array[Char]): Array[Char] = charArray +} + +object StringFilter { + type AsciiControl = com.rockymadden.stringmetric.filter.AsciiControlFilter + lazy val asciiControl = new StringFilterDelegate with AsciiControl + + type AsciiControlOnly = com.rockymadden.stringmetric.filter.AsciiControlOnlyFilter + lazy val asciiControlOnly = new StringFilterDelegate with AsciiControlOnly + + type AsciiLetterNumber = com.rockymadden.stringmetric.filter.AsciiLetterNumberFilter + lazy val asciiLetterNumber = new StringFilterDelegate with AsciiLetterNumber + + type AsciiLetterNumberOnly = com.rockymadden.stringmetric.filter.AsciiLetterNumberOnlyFilter + lazy val asciiLetterNumberOnly = new StringFilterDelegate with AsciiLetterNumberOnly + + type AsciiLetter = com.rockymadden.stringmetric.filter.AsciiLetterFilter + lazy val asciiLetter = new StringFilterDelegate with AsciiLetter + + type AsciiLetterOnly = com.rockymadden.stringmetric.filter.AsciiLetterOnlyFilter + lazy val asciiLetterOnly = new StringFilterDelegate with AsciiLetterOnly + + type AsciiNumber = com.rockymadden.stringmetric.filter.AsciiNumberFilter + lazy val asciiNumber = new StringFilterDelegate with AsciiNumber + + type AsciiNumberOnly = com.rockymadden.stringmetric.filter.AsciiNumberOnlyFilter + lazy val asciiNumberOnly = new StringFilterDelegate with AsciiNumberOnly + + type AsciiSpace = com.rockymadden.stringmetric.filter.AsciiSpaceFilter + lazy val asciiSpace = new StringFilterDelegate with AsciiSpace + + type AsciiSymbol = com.rockymadden.stringmetric.filter.AsciiSymbolFilter + lazy val asciiSymbol = new StringFilterDelegate with AsciiSymbol + + type AsciiSymbolOnly = com.rockymadden.stringmetric.filter.AsciiSymbolOnlyFilter + lazy val asciiSymbolOnly = new StringFilterDelegate with AsciiSymbolOnly + + type IgnoreAsciiLetterCase = com.rockymadden.stringmetric.filter.IgnoreAsciiLetterCaseFilter + lazy val ignoreAsciiLetterCase = new StringFilterDelegate with IgnoreAsciiLetterCase +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala new file mode 100755 index 0000000..d639dfb --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/StringFilterable.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait StringFilterable extends Filterable[String] { + def filter(charArray: Array[Char]): Array[Char] +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala new file mode 100755 index 0000000..212f76d --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/StringMetric.scala @@ -0,0 +1,120 @@ +package com.rockymadden.stringmetric + +trait StringMetric[A, B] extends Metric[String, A, B] { + def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit a: A): Option[B] +} + +object StringMetric { + type DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric + val DiceSorensen = com.rockymadden.stringmetric.similarity.DiceSorensenMetric + + type Hamming = com.rockymadden.stringmetric.similarity.HammingMetric + val Hamming = com.rockymadden.stringmetric.similarity.HammingMetric + + type Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric + val Jaccard = com.rockymadden.stringmetric.similarity.JaccardMetric + + type Jaro = com.rockymadden.stringmetric.similarity.JaroMetric + val Jaro = com.rockymadden.stringmetric.similarity.JaroMetric + + type JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric + val JaroWinkler = com.rockymadden.stringmetric.similarity.JaroWinklerMetric + + type Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric + val Levenshtein = com.rockymadden.stringmetric.similarity.LevenshteinMetric + + type Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric + val Metaphone = com.rockymadden.stringmetric.phonetic.MetaphoneMetric + + type NGram = com.rockymadden.stringmetric.similarity.NGramMetric + val NGram = com.rockymadden.stringmetric.similarity.NGramMetric + + type Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric + val Nysiis = com.rockymadden.stringmetric.phonetic.NysiisMetric + + type Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric + val Overlap = com.rockymadden.stringmetric.similarity.OverlapMetric + + type RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric + val RefinedNysiis = com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric + + type RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric + val RefinedSoundex = com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric + + type Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric + val Soundex = com.rockymadden.stringmetric.phonetic.SoundexMetric + + type WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric + val WeightedLevenshtein = com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric + + def compareWithDiceSorensen(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = + DiceSorensen.compare(charArray1, charArray2)(n) + + def compareWithDiceSorensen(string1: String, string2: String)(n: Int) = DiceSorensen.compare(string1, string2)(n) + + def compareWithHamming(charArray1: Array[Char], charArray2: Array[Char]) = Hamming.compare(charArray1, charArray2) + + def compareWithHamming(string1: String, string2: String)= Hamming.compare(string1, string2) + + def compareWithJaccard(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = + Jaccard.compare(charArray1, charArray2)(n) + + def compareWithJaccard(string1: String, string2: String)(n: Int) = Jaccard.compare(string1, string2)(n) + + def compareWithJaro(charArray1: Array[Char], charArray2: Array[Char]) = Jaro.compare(charArray1, charArray2) + + def compareWithJaro(string1: String, string2: String) = Jaro.compare(string1, string2) + + def compareWithJaroWinkler(charArray1: Array[Char], charArray2: Array[Char]) = + JaroWinkler.compare(charArray1, charArray2) + + def compareWithJaroWinkler(string1: String, string2: String) = JaroWinkler.compare(string1, string2) + + def compareWithLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) = + Levenshtein.compare(charArray1, charArray2) + + def compareWithLevenshtein(string1: String, string2: String) = Levenshtein.compare(string1, string2) + + def compareWithMetaphone(charArray1: Array[Char], charArray2: Array[Char]) = + Metaphone.compare(charArray1, charArray2) + + def compareWithMetaphone(string1: String, string2: String) = Metaphone.compare(string1, string2) + + def compareWithNGram(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = + NGram.compare(charArray1, charArray2)(n) + + def compareWithNGram(string1: String, string2: String)(n: Int) = NGram.compare(string1, string2)(n) + + def compareWithNysiis(charArray1: Array[Char], charArray2: Array[Char]) = Nysiis.compare(charArray1, charArray2) + + def compareWithNysiis(string1: String, string2: String) = Nysiis.compare(string1, string2) + + def compareWithOverlap(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = + Overlap.compare(charArray1, charArray2)(n) + + def compareWithOverlap(string1: String, string2: String)(n: Int) = Overlap.compare(string1, string2)(n) + + def compareWithRefinedNysiis(charArray1: Array[Char], charArray2: Array[Char]) = + RefinedNysiis.compare(charArray1, charArray2) + + def compareWithRefinedNysiis(string1: String, string2: String) = RefinedNysiis.compare(string1, string2) + + def compareWithRefinedSoundex(charArray1: Array[Char], charArray2: Array[Char]) = + RefinedSoundex.compare(charArray1, charArray2) + + def compareWithRefinedSoundex(string1: String, string2: String) = RefinedSoundex.compare(string1, string2) + + def compareWithSoundex(charArray1: Array[Char], charArray2: Array[Char]) = Soundex.compare(charArray1, charArray2) + + def compareWithSoundex(string1: String, string2: String) = Soundex.compare(string1, string2) + + def compareWithWeightedLevenshtein(charArray1: Array[Char], charArray2: Array[Char]) + (options: (BigDecimal, BigDecimal, BigDecimal)) = + + WeightedLevenshtein.compare(charArray1, charArray2)(options) + + def compareWithWeightedLevenshtein(string1: String, string2: String) + (options: (BigDecimal, BigDecimal, BigDecimal)) = + + WeightedLevenshtein.compare(string1, string2)(options) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala new file mode 100755 index 0000000..bef56d9 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/StringTokenizer.scala @@ -0,0 +1,14 @@ +package com.rockymadden.stringmetric + +trait StringTokenizer[A, B] extends Tokenizer[String, A, B] { + def tokenize(charArray: Array[Char])(implicit a: A): Option[Array[Array[Char]]] +} + +object StringTokenizer { + type NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer + val NGram = com.rockymadden.stringmetric.tokenization.NGramTokenizer + + def tokenizeWithNGram(charArray: Array[Char])(n: Int) = NGram.tokenize(charArray)(n) + + def tokenizeWithNGram(string: String)(n: Int) = NGram.tokenize(string)(n) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala new file mode 100755 index 0000000..c9edae5 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/Tokenizer.scala @@ -0,0 +1,5 @@ +package com.rockymadden.stringmetric + +trait Tokenizer[A, B, C] { + def tokenize(a: A)(implicit b: B): Option[C] +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala new file mode 100755 index 0000000..bd45ecf --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII controls do not matter. */ +trait AsciiControlFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => !(c <= 31 || c == 127))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala new file mode 100755 index 0000000..c08b686 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures only ASCII control characters matter. */ +trait AsciiControlOnlyFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => (c <= 31 || c == 127))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala new file mode 100755 index 0000000..24509cb --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII letters do not matter. */ +trait AsciiLetterFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => !((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala new file mode 100755 index 0000000..e17c715 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilter.scala @@ -0,0 +1,15 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII letters and numbers do not matter. */ +trait AsciiLetterNumberFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter( + charArray.filter(c => + !((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)) + ) + ) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala new file mode 100755 index 0000000..7cf97ba --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilter.scala @@ -0,0 +1,15 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures only ASCII letters and numbers matter. */ +trait AsciiLetterNumberOnlyFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter( + charArray.filter(c => + ((c >= 48 && c <= 57 ) || (c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)) + ) + ) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala new file mode 100755 index 0000000..70032d9 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures only ASCII letters matter. */ +trait AsciiLetterOnlyFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => ((c >= 65 && c <= 90 ) || (c >= 97 && c <= 122)))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala new file mode 100755 index 0000000..42fe77e --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII numbers do not matter. */ +trait AsciiNumberFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => !(c >= 48 && c <= 57))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala new file mode 100755 index 0000000..3f17099 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures only ASCII numbers matter. */ +trait AsciiNumberOnlyFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.filter(c => (c >= 48 && c <= 57 ))) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala new file mode 100755 index 0000000..538107d --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilter.scala @@ -0,0 +1,10 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII spaces do not matter. */ +trait AsciiSpaceFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = super.filter(charArray.filter(_ != ' ')) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala new file mode 100755 index 0000000..7b0c810 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilter.scala @@ -0,0 +1,15 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII symbols do not matter. */ +trait AsciiSymbolFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter( + charArray.filter(c => + !((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126)) + ) + ) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala new file mode 100755 index 0000000..5cb5e94 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilter.scala @@ -0,0 +1,15 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures only ASCII symbols matter. */ +trait AsciiSymbolOnlyFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter( + charArray.filter(c => + ((c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126)) + ) + ) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala new file mode 100755 index 0000000..54fe66f --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilter.scala @@ -0,0 +1,11 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +/** Ensures ASCII letter case-sensitivity does not matter. */ +trait IgnoreAsciiLetterCaseFilter extends StringFilter { + abstract override def filter(charArray: Array[Char]): Array[Char] = + super.filter(charArray.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c)) + + abstract override def filter(string: String): String = filter(string.toCharArray).mkString +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala b/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala new file mode 100755 index 0000000..8ece42d --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/filter/StringFilterDelegate.scala @@ -0,0 +1,9 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.StringFilter + +class StringFilterDelegate extends StringFilter { + override def filter(charArray: Array[Char]): Array[Char] = charArray + + override def filter(string: String): String = string +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/package.scala b/core/source/core/scala/com/rockymadden/stringmetric/package.scala new file mode 100755 index 0000000..6752f4d --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/package.scala @@ -0,0 +1,7 @@ +package com.rockymadden + +package object stringmetric { + type CompareTuple[T] = (Array[T], Array[T]) + + type MatchTuple[T] = (Array[T], Array[T]) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala new file mode 100755 index 0000000..c580fd3 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -0,0 +1,122 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} +import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import scala.annotation.{switch, tailrec} + +/** An implementation of the Metaphone algorithm. */ +class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => + final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { + val fca = filter(charArray) + + if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + else { + val th = deduplicate(transcodeHead(fca.map(_.toLower))) + val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) + + if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. + } + } + + final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = + compute(string.toCharArray).map(_.mkString) + + private[this] def deduplicate(ca: Array[Char]) = + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + @tailrec + private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @switch) match { + case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o) + case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c) + case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b') + case 'c' => + if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k') + else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x') + else if ((r.length >= 1 && r.head == 'h') + || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x') + else if (l.length >= 1 && r.length >= 1 && l.last == 's' + && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's') + else shift(1, o :+ 'k') + case 'd' => + if (r.length >= 2 && r.head == 'g' + && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j') + else shift(1, o :+ 't') + case 'g' => + if ((r.length > 1 && r.head == 'h') + || (r.length == 1 && r.head == 'n') + || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j') + else shift(1, o :+ 'k') + case 'h' => + if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head))) + || (l.length >= 2 && l.last == 'h' + && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p' + || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o) + else shift(1, o :+ 'h') + case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p') + case 'q' => shift(1, o :+ 'k') + case 's' => + if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x') + else shift(1, o :+ 's') + case 't' => + if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0') + else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o) + else shift(1, o :+ 't') + case 'v' => shift(1, o :+ 'f') + case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c) + case 'x' => shift(1, (o :+ 'k') :+ 's') + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + } + + private[this] def transcodeHead(ca: Array[Char]) = { + (ca.length: @switch) match { + case 0 => ca + case 1 => if (ca.head == 'x') Array('s') else ca + case _ => + (ca.head: @switch) match { + case 'a' if (ca(1) == 'e') => ca.tail + case 'g' | 'k' | 'p' if (ca(1) == 'n') => ca.tail + case 'w' if (ca(1) == 'r') => ca.tail + case 'w' if (ca(1) == 'h') => 'w' +: ca.drop(2) + case 'x' => 's' +: ca.tail + case _ => ca + } + } + } +} + +object MetaphoneAlgorithm { + private lazy val self = apply() + + def apply(): MetaphoneAlgorithm = new MetaphoneAlgorithm with StringFilter + + def compute(charArray: Array[Char]) = self.compute(charArray) + + def compute(string: String) = self.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala new file mode 100755 index 0000000..2975ad3 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala @@ -0,0 +1,32 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Alphabet.Alpha + +/** An implementation of the Metaphone metric. */ +class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Boolean] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None + else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 => + MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_)) + ) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = + compare(string1.toCharArray, string2.toCharArray) +} + +object MetaphoneMetric { + private lazy val self = apply() + + def apply(): MetaphoneMetric = new MetaphoneMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala new file mode 100755 index 0000000..ff0b3d6 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala @@ -0,0 +1,131 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} +import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import scala.annotation.{switch, tailrec} + +/** An implementation of the NYSIIS algorithm. */ +class NysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => + final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { + val fca = filter(charArray) + + if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + else { + val tr = transcodeRight(fca.map(_.toLower)) + val tl = transcodeLeft(tr._1) + val t = + if (tl._2.length == 0) tl._1 ++ tr._2 + else tl._1 ++ transcodeCenter( + Array.empty[Char], + tl._2.head, + if (tl._2.length > 1) tl._2.tail else Array.empty[Char], + Array.empty[Char] + ) ++ tr._2 + + if (t.length == 1) Some(t) + else Some(t.head +: deduplicate(cleanTerminal(cleanLast(t.tail)))) + } + } + + final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = + compute(string.toCharArray).map(_.mkString) + + private[this] def cleanLast(ca: Array[Char]) = + if (ca.length == 0) ca + else if(ca.last == 'a' || ca.last == 's') ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length) + else ca + + private[this] def cleanTerminal(ca: Array[Char]) = + if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' + else ca + + private[this] def deduplicate(ca: Array[Char]) = + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + @tailrec + private[this] def transcodeCenter(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @switch) match { + case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') + case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c) + case 'e' => + if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) + else shift(1, o :+ 'a') + case 'h' => + if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))) shift(1, o) + else shift(1, o :+ c) + case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') + case 'm' => shift(1, o :+ 'n') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) + case 'q' => shift(1, o :+ 'g') + case 's' => + if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) + else shift(1, o :+ c) + case 'w' => + if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) + else shift(1, o :+ c) + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcodeCenter(t._1, t._2, t._3, t._4) + } + } + + private[this] def transcodeLeft(ca: Array[Char]) = { + if (ca.length == 0) (Array.empty[Char], ca) + else { + lazy val tr2 = ca.takeRight(ca.length - 2) + lazy val tr3 = ca.takeRight(ca.length - 3) + + (ca.head: @switch) match { + case 'k' if (ca.length >= 2 && ca(1) == 'n') => (Array('n', 'n'), tr2) + case 'k' => (Array('c'), ca.tail) + case 'm' if (ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c')) => (Array('m', 'c'), tr3) + case 'p' if (ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f')) => (Array('f', 'f'), tr2) + case 's' if (ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h')) => (Array('s', 's'), tr3) + case _ => (Array(ca.head), ca.tail) + } + } + } + + private[this] def transcodeRight(ca: Array[Char]) = { + if (ca.length >= 2) { + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) + lazy val t2 = ca.take(ca.length - 2) + + (lc: @switch) match { + case 'd' if (lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d')) + case 'e' if (lcm1 == 'e' || lcm1 == 'i') => (t2, Array('y')) + case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => (t2, Array('d')) + case _ => (ca, Array.empty[Char]) + } + } else (ca, Array.empty[Char]) + } +} + +object NysiisAlgorithm { + private lazy val self = apply() + + def apply(): NysiisAlgorithm = new NysiisAlgorithm with StringFilter + + def compute(charArray: Array[Char]) = self.compute(charArray) + + def compute(string: String) = self.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala new file mode 100755 index 0000000..6d1c22c --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Alphabet.Alpha + +/** An implementation of the NYSIIS metric. */ +class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Boolean] = { + + val unequal = (c1: Char, c2: Char) => { + val lc1 = c1.toLower + val lc2 = c2.toLower + + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) + } + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None + else if (unequal(fca1.head, fca2.head)) Some(false) + else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 => + NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_)) + ) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = + compare(string1.toCharArray, string2.toCharArray) +} + +object NysiisMetric { + private lazy val self = apply() + + def apply(): NysiisMetric = new NysiisMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala new file mode 100755 index 0000000..334e9e3 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -0,0 +1,135 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} +import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} +import scala.annotation.{switch, tailrec} + +/** An implementation of the refined NYSIIS algorithm. */ +class RefinedNysiisAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => + final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { + val fca = filter(charArray) + + if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + else { + val lfca = fca.map(_.toLower) + val tlh = transcodeLast(transcodeHead(lfca.head +: cleanLast(lfca.tail, Set('s', 'z')))) + val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char]) + + if (t.length == 1) Some(t) + else Some(deduplicate(t.head +: cleanTerminal(cleanLast(t.tail, Set('a'))))) + } + } + + final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = + compute(string.toCharArray).map(_.mkString) + + private[this] def cleanLast(ca: Array[Char], s: Set[Char]) = + if (ca.length == 0) ca + else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length) + else ca + + private[this] def cleanTerminal(ca: Array[Char]) = + if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' + else ca + + private[this] def deduplicate(ca: Array[Char]) = + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + @tailrec + private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = { + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @switch) match { + case 'a' | 'i' | 'o' | 'u' => + if (l.length == 0) shift(1, o :+ c) + else shift(1, o :+ 'a') + case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c) + case 'd' => + if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c) + case 'e' => + if (l.length == 0) shift(1, o :+ c) + else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) + else shift(1, o :+ 'a') + case 'g' => + if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't')) + else shift(1, o :+ c) + case 'h' => + if (l.length == 0) shift(1, o :+ c) + else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))) shift(1, o) + else shift(1, o :+ c) + case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') + case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) + case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g') + case 's' => + if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c) + else shift(1, o :+ c) + case 'w' => + if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) + else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r') + else shift(1, o :+ c) + case 'y' => + if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a') + else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c) + else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a') + else shift(1, o :+ c) + case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + } + + private[this] def transcodeHead(ca: Array[Char]) = { + if (ca.length == 0) ca + else + (ca.head: @switch) match { + case 'm' if (ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c') => Array('m', 'c') ++ ca.takeRight(ca.length - 3) + case 'p' if (ca.length >= 2 && ca(1) == 'f') => 'f' +: ca.takeRight(ca.length - 2) + case _ => ca + } + } + + private[this] def transcodeLast(ca: Array[Char]) = { + if (ca.length >= 2) { + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) + lazy val t2 = ca.take(ca.length - 2) + + (lc: @switch) match { + case 'd' if (lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd' + case 'e' if (lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y') => t2 :+ 'y' + case 't' if (lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r') => t2 :+ 'd' + case 'x' if (lcm1 == 'e') => t2 ++ Array('e', 'c') + case 'x' if (lcm1 == 'i') => t2 ++ Array('i', 'c') + case _ => ca + } + } else ca + } +} + +object RefinedNysiisAlgorithm { + private lazy val self = apply() + + def apply(): RefinedNysiisAlgorithm = new RefinedNysiisAlgorithm with StringFilter + + def compute(charArray: Array[Char]) = self.compute(charArray) + + def compute(string: String) = self.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala new file mode 100755 index 0000000..c96cc52 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Alphabet.Alpha + +/** An implementation of the refined NYSIIS metric. */ +class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Boolean] = { + + val unequal = (c1: Char, c2: Char) => { + val lc1 = c1.toLower + val lc2 = c2.toLower + + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) + } + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None + else if (unequal(fca1.head, fca2.head)) Some(false) + else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 => + RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_)) + ) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = + compare(string1.toCharArray, string2.toCharArray) +} + +object RefinedNysiisMetric { + private lazy val self = apply() + + def apply(): RefinedNysiisMetric = new RefinedNysiisMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala new file mode 100755 index 0000000..f22bde1 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -0,0 +1,75 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} +import com.rockymadden.stringmetric.Alphabet.Alpha +import scala.annotation.{switch, tailrec} + +/** An implementation of the refined Soundex algorithm. */ +class RefinedSoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => + final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { + val fca = filter(charArray) + + if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + else Some(transcode(fca, Array(fca.head.toLower))) + } + + final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = + compute(string.toCharArray).map(_.mkString) + + @tailrec + private[this] def transcode(i: Array[Char], o: Array[Char]): Array[Char] = { + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => (mc: @switch) match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0' + case 'b' | 'p' => '1' + case 'f' | 'v' => '2' + case 'c' | 'k' | 's' => '3' + case 'g' | 'j' => '4' + case 'q' | 'x' | 'z' => '5' + case 'd' | 't' => '6' + case 'l' => '7' + case 'm' | 'n' => '8' + case 'r' => '9' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => (mc: @switch) match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0' + case 'b' | 'p' if pc != '1' => '1' + case 'f' | 'v' if pc != '2' => '2' + case 'c' | 'k' | 's' if pc != '3' => '3' + case 'g' | 'j' if pc != '4' => '4' + case 'q' | 'x' | 'z' if pc != '5' => '5' + case 'd' | 't' if pc != '6' => '6' + case 'l' if pc != '7' => '7' + case 'm' | 'n' if pc != '8' => '8' + case 'r' if pc != '9' => '9' + case _ => '\0' + } + val a = + // Code twice. + if (o.length == 1) m2(c) + // Code once. + else m1( + c, + (o.last: @switch) match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last + case _ => m2(o.last) + } + ) + + transcode(i.tail, if (a != '\0') o :+ a else o) + } + } +} + +object RefinedSoundexAlgorithm { + private lazy val self = apply() + + def apply(): RefinedSoundexAlgorithm = new RefinedSoundexAlgorithm with StringFilter + + def compute(charArray: Array[Char]) = self.compute(charArray) + + def compute(string: String) = self.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala new file mode 100755 index 0000000..eb2f01e --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Alphabet.Alpha + +/** An implementation of the refined Soundex metric. */ +class RefinedSoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Boolean] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None + else if (fca1.head.toLower != fca2.head.toLower) Some(false) + else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 => + RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_)) + ) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = + compare(string1.toCharArray, string2.toCharArray) +} + +object RefinedSoundexMetric { + private lazy val self = apply() + + def apply(): RefinedSoundexMetric = new RefinedSoundexMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala new file mode 100755 index 0000000..361047d --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala @@ -0,0 +1,73 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter} +import com.rockymadden.stringmetric.Alphabet.Alpha +import scala.annotation.{switch, tailrec} + +/** An implementation of the Soundex algorithm. */ +class SoundexAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter => + final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = { + val fca = filter(charArray) + + if (fca.length == 0 || !(Alpha isSuperset fca.head)) None + else { + val fc = fca.head.toLower + + Some(transcode(fca.tail, fc, Array(fc)).padTo(4, '0')) + } + } + + final override def compute(string: String)(implicit di: DummyImplicit): Option[String] = + compute(string.toCharArray).map(_.mkString) + + @tailrec + private[this] def transcode(i: Array[Char], pc: Char, o: Array[Char]): Array[Char] = { + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => (mc: @switch) match { + case 'b' | 'f' | 'p' | 'v' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' + case 'd' | 't' => '3' + case 'l' => '4' + case 'm' | 'n' => '5' + case 'r' => '6' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => (mc: @switch) match { + case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' + case 'd' | 't' if pc != '3' => '3' + case 'l' if pc != '4' => '4' + case 'm' | 'n' if pc != '5' => '5' + case 'r' if pc != '6' => '6' + case _ => '\0' + } + val a = pc match { + // Code twice. + case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) + // Code once. + case _ => m1( + c, + (o.last: @switch) match { + case '1' | '2' | '3' | '4' | '5' | '6' => o.last + case _ => m2(o.last) + } + ) + } + + if (o.length == 3 && a != '\0') o :+ a + else transcode(i.tail, c, if (a != '\0') o :+ a else o) + } + } +} + +object SoundexAlgorithm { + private lazy val self = apply() + + def apply(): SoundexAlgorithm = new SoundexAlgorithm with StringFilter + + def compute(charArray: Array[Char]) = self.compute(charArray) + + def compute(string: String) = self.compute(string) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala new file mode 100755 index 0000000..e4daa17 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} +import com.rockymadden.stringmetric.Alphabet.Alpha + +/** An implementation of the Soundex metric. */ +class SoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Boolean] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None + else if (fca1.head.toLower != fca2.head.toLower) Some(false) + else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 => + SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_)) + ) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = + compare(string1.toCharArray, string2.toCharArray) +} + +object SoundexMetric { + private lazy val self = apply() + + def apply(): SoundexMetric = new SoundexMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala new file mode 100755 index 0000000..5e01bb1 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -0,0 +1,42 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} +import com.rockymadden.stringmetric.tokenization.NGramTokenizer + +/** + * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required. + * Traditionally, the algorithm uses bigrams. + */ +class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { + if (n <= 0) throw new IllegalArgumentException("Expected valid n.") + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) + + (2d * ms) / (ca1bg.length + ca2bg.length) + } + } + } + + final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = + compare(string1.toCharArray, string2.toCharArray)(n: Int) + + private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length +} + +object DiceSorensenMetric { + private lazy val self = apply() + + def apply(): DiceSorensenMetric = new DiceSorensenMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) + + def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala new file mode 100755 index 0000000..95ff203 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} + +/** An implementation of the Hamming metric. */ +class HammingMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Int] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || fca2.length == 0 || fca1.length != fca2.length) None + else if (fca1.sameElements(fca2)) Some(0) + else Some(hamming(fca1, fca2)) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] = + compare(string1.toCharArray, string2.toCharArray) + + private[this] def hamming(ct: CompareTuple[Char]) = { + require(ct._1.length == ct._2.length) + + if (ct._1.length == 0) 0 + else ct._1.zip(ct._2).count(t => t._1 != t._2) + } +} + +object HammingMetric { + private lazy val self = apply() + + def apply(): HammingMetric = new HammingMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala new file mode 100755 index 0000000..e32c926 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{StringMetric, StringFilter} +import com.rockymadden.stringmetric.tokenization.NGramTokenizer + +/* An implementation of the Jaccard metric. */ +class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { + if (n <= 0) throw new IllegalArgumentException("Expected valid n.") + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length + + i.toDouble / (ca1bg.length + ca2bg.length - i) + } + } + } + + final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = + compare(string1.toCharArray, string2.toCharArray)(n: Int) +} + +object JaccardMetric { + private lazy val self = apply() + + def apply(): JaccardMetric = new JaccardMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) + + def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala new file mode 100755 index 0000000..b7ce2c5 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala @@ -0,0 +1,87 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{CompareTuple, MatchTuple, StringFilter, StringMetric} +import scala.collection.mutable.{ArrayBuffer, HashSet} + +/** + * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched + * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios. + */ +class JaroMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Double] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || fca2.length == 0) None + else if (fca1.sameElements(fca2)) Some(1d) + else { + val mt = `match`(fca1, fca2) + val ms = scoreMatches(mt._1, mt._2) + + if (ms == 0) Some(0d) + else { + val ts = scoreTranspositions(mt._1, mt._2) + + Some(((ms.toDouble / fca1.length) + (ms.toDouble / fca2.length) + ((ms.toDouble - ts) / ms)) / 3) + } + } + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = + compare(string1.toCharArray, string2.toCharArray) + + private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = { + lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) + val one = ArrayBuffer.empty[Int] + val two = HashSet.empty[Int] + var i = 0 + var bi = false + + while (i < ct._1.length && !bi) { + val start = if (i - window <= 0) 0 else i - window + val end = if (i + window >= ct._2.length - 1) ct._2.length - 1 else i + window + + if (start > ct._2.length - 1) bi = !bi + else { + var ii = start + var bii = false + + while (ii <= end && !bii) { + if (!two.contains(ii) && ct._1(i) == ct._2(ii)) { + one += i + two += ii + bii = !bii + } else ii += 1 + } + + i += 1 + } + } + + (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_))) + } + + private[this] def scoreMatches(mt: MatchTuple[Char]) = { + require(mt._1.length == mt._2.length) + + mt._1.length + } + + private[this] def scoreTranspositions(mt: MatchTuple[Char]) = { + require(mt._1.length == mt._2.length) + + (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt + } +} + +object JaroMetric { + private lazy val self = apply() + + def apply(): JaroMetric = new JaroMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala new file mode 100755 index 0000000..4e9aebd --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{StringFilter, StringMetric} + +/** + * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is + * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios + * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722). + */ +class JaroWinklerMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Double] = { + + val fca1 = filter(charArray1) + val fca2 = filter(charArray2) + + JaroMetric.compare(fca1, fca2).map { + case 0d => 0d + case 1d => 1d + case jaro => { + val prefix = fca1.zip(fca2).takeWhile(t => t._1 == t._2) + + jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro)) + } + } + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = + compare(string1.toCharArray, string2.toCharArray) +} + +object JaroWinklerMetric { + private lazy val self = apply() + + def apply(): JaroWinklerMetric = new JaroWinklerMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala new file mode 100755 index 0000000..47dff23 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala @@ -0,0 +1,58 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} + +/** An implementation of the Levenshtein metric. */ +class LevenshteinMetric extends StringMetric[DummyImplicit, Int] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Int] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || fca2.length == 0) None + else if (fca1.sameElements(fca2)) Some(0) + else Some(levenshtein(fca1, fca2)) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Int] = + compare(string1.toCharArray, string2.toCharArray) + + private[this] def levenshtein(ct: CompareTuple[Char]) = { + val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1) + + def distance(t: (Int, Int)): Int = { + t match { + case (r, 0) => r + case (0, c) => c + case (r, c) if m(r)(c) != -1 => m(r)(c) + case (r, c) => { + val min = + if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1) + else math.min( + math.min( + distance(r - 1, c) + 1, // Delete (left). + distance(r, c - 1) + 1 // Insert (up). + ), + distance(r - 1, c - 1) + 1 // Substitute (left-up). + ) + + m(r)(c) = min + min + } + } + } + + distance(ct._1.length, ct._2.length) + } +} + +object LevenshteinMetric { + private lazy val self = apply() + + def apply(): LevenshteinMetric = new LevenshteinMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala new file mode 100755 index 0000000..e74e8eb --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} +import com.rockymadden.stringmetric.tokenization.NGramTokenizer +import scala.math + +/** An implementation of the N-Gram metric. */ +class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { + if (n <= 0) throw new IllegalArgumentException("Expected valid n.") + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) + + ms.toDouble / math.max(ca1bg.length, ca2bg.length) + } + } + } + + final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = + compare(string1.toCharArray, string2.toCharArray)(n) + + private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length +} + +object NGramMetric { + private lazy val self = apply() + + def apply(): NGramMetric = new NGramMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) + + def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala new file mode 100755 index 0000000..a543a7e --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{StringMetric, MatchTuple, StringFilter} +import com.rockymadden.stringmetric.tokenization.NGramTokenizer +import scala.math + +/* An implementation of the overlap metric. */ +class OverlapMetric extends StringMetric[Int, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit n: Int): Option[Double] = { + if (n <= 0) throw new IllegalArgumentException("Expected valid n.") + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. + else if (fca1.sameElements(fca2)) Some(1d) + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) + + ms.toDouble / (math.min(ca1bg.length, ca2bg.length)) + } + } + } + + final override def compare(string1: String, string2: String)(implicit n: Int): Option[Double] = + compare(string1.toCharArray, string2.toCharArray)(n: Int) + + private[this] def scoreMatches(mt: MatchTuple[String]) = mt._1.intersect(mt._2).length +} + +object OverlapMetric { + private lazy val self = apply() + + def apply(): OverlapMetric = new OverlapMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int) = self.compare(charArray1, charArray2)(n) + + def compare(string1: String, string2: String)(n: Int) = self.compare(string1, string2)(n) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala new file mode 100755 index 0000000..1017b1f --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala @@ -0,0 +1,57 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{CompareTuple, StringFilter, StringMetric} + +/** An implementation of the Ratcliff/Obershelp metric. */ +class RatcliffObershelpMetric extends StringMetric[DummyImplicit, Double] { this: StringFilter => + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit di: DummyImplicit): Option[Double] = { + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || fca2.length == 0) None + else if (fca1.sameElements(fca2)) Some(1d) + else Some(2d * commonSequences(fca1, fca2).foldLeft(0)(_ + _.length) / (fca1.length + fca2.length)) + } + + final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Double] = + compare(string1.toCharArray, string2.toCharArray) + + private[this] def longestCommonSubsequence(ct: CompareTuple[Char]) = { + val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1) + var lrc = (0, 0, 0) // Length, row, column. + + for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) { + if (ct._1(r) == ct._2(c)) { + val l = m(r)(c) + 1 + m(r + 1)(c + 1) = l + if (l > lrc._1) lrc = (l, r + 1, c + 1) + } + } + + lrc + } + + private[this] def commonSequences(ct: CompareTuple[Char]): Array[Array[Char]] = { + val lcs = longestCommonSubsequence(ct) + + if (lcs._1 == 0) Array.empty + else { + val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2)) + val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3)) + + Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ commonSequences(sct1._1, sct2._1) ++ commonSequences(sct1._2, sct2._2) + } + } +} + +object RatcliffObershelpMetric { + private lazy val self = apply() + + def apply(): RatcliffObershelpMetric = new RatcliffObershelpMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char]) = self.compare(charArray1, charArray2) + + def compare(string1: String, string2: String) = self.compare(string1, string2) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala new file mode 100755 index 0000000..976b01a --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala @@ -0,0 +1,61 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.{CompareTuple, StringMetric, StringFilter} +import scala.math.BigDecimal + +/** An implementation of a weighted Levenshtein metric. */ +class WeightedLevenshteinMetric + extends StringMetric[(BigDecimal, BigDecimal, BigDecimal), Double] { this: StringFilter => + + /** Options order is delete, insert, then substitute weight. */ + final override def compare(charArray1: Array[Char], charArray2: Array[Char]) + (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = { + + if (options._1 < 0 || options._2 < 0 || options._3 < 0) + throw new IllegalArgumentException("Expected valid weight options.") + + val fca1 = filter(charArray1) + lazy val fca2 = filter(charArray2) + + if (fca1.length == 0 || fca2.length == 0) None + else if (fca1.sameElements(fca2)) Some(0d) + else Some(weightedLevenshtein((fca1, fca2), options).toDouble) + } + + /** Options order is delete, insert, then substitute weight. */ + final override def compare(string1: String, string2: String) + (implicit options: (BigDecimal, BigDecimal, BigDecimal)): Option[Double] = + + compare(string1.toCharArray, string2.toCharArray)(options) + + private[this] def weightedLevenshtein(ct: CompareTuple[Char], w: (BigDecimal, BigDecimal, BigDecimal)) = { + val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1) + + for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r + for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c + + for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) { + m(r)(c) = + if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1) + else (m(r - 1)(c) + w._1).min( // Delete (left). + (m(r)(c - 1) + w._2).min( // Insert (up). + m(r - 1)(c - 1) + w._3 // Substitute (left-up). + ) + ) + } + + m(ct._1.length)(ct._2.length) + } +} + +object WeightedLevenshteinMetric { + private lazy val self = apply() + + def apply(): WeightedLevenshteinMetric = new WeightedLevenshteinMetric with StringFilter + + def compare(charArray1: Array[Char], charArray2: Array[Char])(options: (BigDecimal, BigDecimal, BigDecimal)) = + self.compare(charArray1, charArray2)(options) + + def compare(string1: String, string2: String)(options: (BigDecimal, BigDecimal, BigDecimal)) = + self.compare(string1, string2)(options) +} diff --git a/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala b/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala new file mode 100755 index 0000000..d66fd62 --- /dev/null +++ b/core/source/core/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizer.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.tokenization + +import com.rockymadden.stringmetric.{StringFilter, StringTokenizer} +import scala.annotation.tailrec + +/** An implementation of the N-Gram tokenizer. */ +class NGramTokenizer extends StringTokenizer[Int, Array[String]] { this: StringFilter => + final override def tokenize(charArray: Array[Char])(implicit n: Int): Option[Array[Array[Char]]] = { + if (n <= 0) throw new IllegalArgumentException("Expected valid n.") + + val fca = filter(charArray) + + if (fca.length < n) None + else Some(sequence(fca, Array.empty[Array[Char]], n)) + } + + final override def tokenize(string: String)(implicit n: Int): Option[Array[String]] = + tokenize(string.toCharArray)(n).map(_.map(_.mkString)) + + @tailrec + private[this] def sequence(i: Array[Char], o: Array[Array[Char]], n: Int): Array[Array[Char]] = { + require(n > 0) + + if (i.length <= n) o :+ i + else sequence(i.tail, o :+ i.take(n), n) + } +} + +object NGramTokenizer { + private lazy val self = apply() + + def apply(): NGramTokenizer = new NGramTokenizer with StringFilter + + def tokenize(charArray: Array[Char])(n: Int) = self.tokenize(charArray)(n) + + def tokenize(string: String)(n: Int) = self.tokenize(string)(n) +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala new file mode 100755 index 0000000..5bb503a --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala @@ -0,0 +1,96 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.Alphabet.{Alpha, Vowel} +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AlphabetSpec extends ScalaTest { + "Alphabet" should provide { + "an overloaded isSuperset method which accepts Char" when passed { + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset '0' should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset 'a' should be (true) + Alpha isSuperset 'A' should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset 'y' should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset 'a' should be (true) + Vowel isSuperset 'A' should be (true) + } + } + } + "an overloaded isSuperset method which accepts Array[Char]" when passed { + "empty argument" should returns { + "false" in { + Alpha isSuperset Array.empty[Char] should be (false) + } + } + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset "hi!".toCharArray should be (false) + Alpha isSuperset "helloworld!".toCharArray should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset "hi".toCharArray should be (true) + Alpha isSuperset "helloworld".toCharArray should be (true) + Alpha isSuperset "HI".toCharArray should be (true) + Alpha isSuperset "HELLOWORLD".toCharArray should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset "y".toCharArray should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset "a".toCharArray should be (true) + Vowel isSuperset "A".toCharArray should be (true) + } + } + } + "an overloaded isSuperset method which accepts String" when passed { + "empty argument" should returns { + "false" in { + Alpha isSuperset "" should be (false) + } + } + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset "helloworld!" should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset "helloworld" should be (true) + Alpha isSuperset "HELLOWORLD" should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset "y" should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset "a" should be (true) + Vowel isSuperset "A" should be (true) + } + } + } + } +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala new file mode 100755 index 0000000..e900f83 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/FilterDecoratedSpec.scala @@ -0,0 +1,38 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.filter.AsciiNumberFilter +import com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm +import com.rockymadden.stringmetric.similarity.DiceSorensenMetric +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class FilterDecoratedSpec extends ScalaTest { + import FilterDecoratedSpec.{Algorithm, Metric} + + "Filter decorated metrics" should provide { + "compare method" when passed { + "filterable arguments" should returns { + "filtered results" in { + Metric.compare("123", "456")(1).isDefined should be (false) + Metric.compare("ni123ght", "na456cht")(1).get should be (0.6) + } + } + } + } + "Filter decorated algorithms" should provide { + "compute method" when passed { + "filterable argument" should returns { + "filtered results" in { + Algorithm.compute("456").isDefined should be (false) + Algorithm.compute("du123mb456").get should equal ("tm") + } + } + } + } +} + +object FilterDecoratedSpec { + private final val Algorithm = new MetaphoneAlgorithm with AsciiNumberFilter + private final val Metric = new DiceSorensenMetric with AsciiNumberFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala b/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala new file mode 100755 index 0000000..5f4ab62 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala @@ -0,0 +1,18 @@ +package com.rockymadden.stringmetric + +import org.scalatest.{BeforeAndAfter, ParallelTestExecution, WordSpec} +import org.scalatest.matchers.ShouldMatchers + +trait ScalaTest extends WordSpec with ShouldMatchers with BeforeAndAfter with ParallelTestExecution { + def allows = afterWord("allow") + + def executes = afterWord("execute") + + def passed = afterWord("passed") + + def provide = afterWord("provide") + + def returns = afterWord("return") + + def throws = afterWord("throw") +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala new file mode 100755 index 0000000..7ce0c24 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/StringAlgorithmSpec.scala @@ -0,0 +1,59 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.phonetic._ +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class StringAlgorithmSpec extends ScalaTest { + "StringAlgorithm standalone object" should provide { + "compute method, type, and companion object pass-throughs" in { + val metaphone: StringAlgorithm.Metaphone = StringAlgorithm.Metaphone() + + metaphone.compute("testone").get should + equal (StringAlgorithm.computeWithMetaphone("testone").get) + metaphone.compute("testone".toCharArray).get should + equal (StringAlgorithm.computeWithMetaphone("testone".toCharArray).get) + metaphone.compute("testone".toCharArray).get should + equal (MetaphoneAlgorithm.compute("testone".toCharArray).get) + + val nysiis: StringAlgorithm.Nysiis = StringAlgorithm.Nysiis() + + nysiis.compute("testone").get should + equal (StringAlgorithm.computeWithNysiis("testone").get) + nysiis.compute("testone".toCharArray).get should + equal (StringAlgorithm.computeWithNysiis("testone".toCharArray).get) + nysiis.compute("testone".toCharArray).get should + equal (NysiisAlgorithm.compute("testone".toCharArray).get) + + val refinedNysiis: StringAlgorithm.RefinedNysiis = StringAlgorithm.RefinedNysiis() + + refinedNysiis.compute("testone").get should + equal (StringAlgorithm.computeWithRefinedNysiis("testone").get) + refinedNysiis.compute("testone".toCharArray).get should + equal (StringAlgorithm.computeWithRefinedNysiis("testone".toCharArray).get) + refinedNysiis.compute("testone".toCharArray).get should + equal (RefinedNysiisAlgorithm.compute("testone".toCharArray).get) + + val refinedSoundex: StringAlgorithm.RefinedSoundex = StringAlgorithm.RefinedSoundex() + + refinedSoundex.compute("testone").get should + equal (StringAlgorithm.computeWithRefinedSoundex("testone").get) + refinedSoundex.compute("testone".toCharArray).get should + equal (StringAlgorithm.computeWithRefinedSoundex("testone".toCharArray).get) + refinedSoundex.compute("testone".toCharArray).get should + equal (RefinedSoundexAlgorithm.compute("testone".toCharArray).get) + + val soundex: StringAlgorithm.Soundex = StringAlgorithm.Soundex() + + soundex.compute("testone").get should + equal (StringAlgorithm.computeWithSoundex("testone").get) + soundex.compute("testone".toCharArray).get should + equal (StringAlgorithm.computeWithSoundex("testone".toCharArray).get) + soundex.compute("testone".toCharArray).get should + equal (SoundexAlgorithm.compute("testone".toCharArray).get) + } + } +} + + diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala new file mode 100755 index 0000000..ca99bff --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/StringMetricSpec.scala @@ -0,0 +1,141 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.phonetic._ +import com.rockymadden.stringmetric.similarity._ +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class StringMetricSpec extends ScalaTest { + "StringMetric standalone object" should provide { + "compare method, type, and companion object pass-throughs" in { + val diceSorensen: StringMetric.DiceSorensen = StringMetric.DiceSorensen() + + diceSorensen.compare("testone", "testtwo")(1).get should + equal (StringMetric.compareWithDiceSorensen("testone", "testtwo")(1).get) + diceSorensen.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (StringMetric.compareWithDiceSorensen("testone".toCharArray, "testtwo".toCharArray)(1).get) + diceSorensen.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (DiceSorensenMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get) + + val hamming: StringMetric.Hamming = StringMetric.Hamming() + + hamming.compare("testone", "testtwo").get should + equal (StringMetric.compareWithHamming("testone", "testtwo").get) + hamming.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithHamming("testone".toCharArray, "testtwo".toCharArray).get) + hamming.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (HammingMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val jaccard: StringMetric.Jaccard = StringMetric.Jaccard() + + jaccard.compare("testone", "testtwo")(1).get should + equal (StringMetric.compareWithJaccard("testone", "testtwo")(1).get) + jaccard.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (StringMetric.compareWithJaccard("testone".toCharArray, "testtwo".toCharArray)(1).get) + jaccard.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (JaccardMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get) + + val jaro: StringMetric.Jaro = StringMetric.Jaro() + + jaro.compare("testone", "testtwo").get should + equal (StringMetric.compareWithJaro("testone", "testtwo").get) + jaro.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithJaro("testone".toCharArray, "testtwo".toCharArray).get) + jaro.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (JaroMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val jaroWinkler: StringMetric.JaroWinkler = StringMetric.JaroWinkler() + + jaroWinkler.compare("testone", "testtwo").get should + equal (StringMetric.compareWithJaroWinkler("testone", "testtwo").get) + jaroWinkler.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithJaroWinkler("testone".toCharArray, "testtwo".toCharArray).get) + jaroWinkler.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (JaroWinklerMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val levenshtein: StringMetric.Levenshtein = StringMetric.Levenshtein() + + levenshtein.compare("testone", "testtwo").get should + equal (StringMetric.compareWithLevenshtein("testone", "testtwo").get) + levenshtein.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithLevenshtein("testone".toCharArray, "testtwo".toCharArray).get) + levenshtein.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (LevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val metaphone: StringMetric.Metaphone = StringMetric.Metaphone() + + metaphone.compare("testone", "testtwo").get should + equal (StringMetric.compareWithMetaphone("testone", "testtwo").get) + metaphone.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithMetaphone("testone".toCharArray, "testtwo".toCharArray).get) + metaphone.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (MetaphoneMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val nGram: StringMetric.NGram = StringMetric.NGram() + + nGram.compare("testone", "testtwo")(1).get should + equal (StringMetric.compareWithNGram("testone", "testtwo")(1).get) + nGram.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (StringMetric.compareWithNGram("testone".toCharArray, "testtwo".toCharArray)(1).get) + nGram.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (NGramMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get) + + val nysiis: StringMetric.Nysiis = StringMetric.Nysiis() + + nysiis.compare("testone", "testtwo").get should + equal (StringMetric.compareWithNysiis("testone", "testtwo").get) + nysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithNysiis("testone".toCharArray, "testtwo".toCharArray).get) + nysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (NysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val overlap: StringMetric.Overlap = StringMetric.Overlap() + + overlap.compare("testone", "testtwo")(1).get should + equal (StringMetric.compareWithOverlap("testone", "testtwo")(1).get) + overlap.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (StringMetric.compareWithOverlap("testone".toCharArray, "testtwo".toCharArray)(1).get) + overlap.compare("testone".toCharArray, "testtwo".toCharArray)(1).get should + equal (OverlapMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1).get) + + val refinedNysiis: StringMetric.RefinedNysiis = StringMetric.RefinedNysiis() + + refinedNysiis.compare("testone", "testtwo").get should + equal (StringMetric.compareWithRefinedNysiis("testone", "testtwo").get) + refinedNysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithRefinedNysiis("testone".toCharArray, "testtwo".toCharArray).get) + refinedNysiis.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (RefinedNysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val refinedSoundex: StringMetric.RefinedSoundex = StringMetric.RefinedSoundex() + + refinedSoundex.compare("testone", "testtwo").get should + equal (StringMetric.compareWithRefinedSoundex("testone", "testtwo").get) + refinedSoundex.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithRefinedSoundex("testone".toCharArray, "testtwo".toCharArray).get) + refinedSoundex.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (RefinedSoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val soundex: StringMetric.Soundex = StringMetric.Soundex() + + soundex.compare("testone", "testtwo").get should + equal (StringMetric.compareWithSoundex("testone", "testtwo").get) + soundex.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (StringMetric.compareWithSoundex("testone".toCharArray, "testtwo".toCharArray).get) + soundex.compare("testone".toCharArray, "testtwo".toCharArray).get should + equal (SoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + + val weightedLevenshtein: StringMetric.WeightedLevenshtein = StringMetric.WeightedLevenshtein() + + weightedLevenshtein.compare("testone", "testtwo")(1, 2, 3).get should + equal (StringMetric.compareWithWeightedLevenshtein("testone", "testtwo")(1, 2, 3).get) + weightedLevenshtein.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get should + equal (StringMetric.compareWithWeightedLevenshtein("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get) + weightedLevenshtein.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get should + equal (WeightedLevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray)(1, 2, 3).get) + } + } +} + + diff --git a/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala new file mode 100755 index 0000000..8837c25 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/StringTokenizerSpec.scala @@ -0,0 +1,23 @@ +package com.rockymadden.stringmetric + +import com.rockymadden.stringmetric.tokenization.NGramTokenizer +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class StringTokenizerSpec extends ScalaTest { + "StringTokenizer standalone object" should provide { + "tokenize method, type, and companion object pass-throughs" in { + val nGram: StringTokenizer.NGram = StringTokenizer.NGram() + + nGram.tokenize("testone")(1).get should + equal (StringTokenizer.tokenizeWithNGram("testone")(1).get) + nGram.tokenize("testone".toCharArray)(1).get should + equal (StringTokenizer.tokenizeWithNGram("testone".toCharArray)(1).get) + nGram.tokenize("testone".toCharArray)(1).get should + equal (NGramTokenizer.tokenize("testone".toCharArray)(1).get) + } + } +} + + diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala new file mode 100755 index 0000000..1cce0c9 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiControlFilterSpec extends ScalaTest { + import AsciiControlFilterSpec.Filter + + "AsciiControlFilter" should provide { + "overloaded filter method" when passed { + "String with controls" should returns { + "String with controls removed" in { + Filter.filter(" HelloWorld") should equal ("HelloWorld") + Filter.filter("HelloWorld ") should equal ("HelloWorld") + Filter.filter("Hello World") should equal ("HelloWorld") + } + } + "character array with controls" should returns { + "character array with controls removed" in { + Filter.filter(" HelloWorld".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("HelloWorld ".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("Hello World".toCharArray) should equal ("HelloWorld".toCharArray) + } + } + } + } +} + +object AsciiControlFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiControlFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala new file mode 100755 index 0000000..958c8ba --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiControlOnlyFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiControlOnlyFilterSpec extends ScalaTest { + import AsciiControlOnlyFilterSpec.Filter + + "AsciiControlOnlyFilter" should provide { + "overloaded filter method" when passed { + "String with mixed characters" should returns { + "String with non-controls removed" in { + Filter.filter("!@#$% ^&*()abc") should equal (" ") + Filter.filter(" ^&*()abc") should equal (" ") + Filter.filter("%^&*()abc ") should equal (" ") + } + } + "character array with mixed characters" should returns { + "character array with non-controls removed" in { + Filter.filter("!@#$% ^&*()abc".toCharArray) should equal (" ".toCharArray) + Filter.filter(" ^&*()abc".toCharArray) should equal (" ".toCharArray) + Filter.filter("%^&*()abc ".toCharArray) should equal (" ".toCharArray) + } + } + } + } +} + +object AsciiControlOnlyFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiControlOnlyFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala new file mode 100755 index 0000000..d86e7a5 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterFilterSpec.scala @@ -0,0 +1,29 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiLetterFilterSpec extends ScalaTest { + import AsciiLetterFilterSpec.Filter + + "AsciiLetterFilter" should provide { + "overloaded filter method" when passed { + "String with letters" should returns { + "String with letters removed" in { + Filter.filter(" Hello123World!") should equal (" 123!") + } + } + "character array with letters" should returns { + "character array with letters removed" in { + Filter.filter(" Hello123World!".toCharArray) should equal (" 123!".toCharArray) + } + } + } + } +} + +object AsciiLetterFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiLetterFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala new file mode 100755 index 0000000..edfdce6 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiLetterNumberFilterSpec extends ScalaTest { + import AsciiLetterNumberFilterSpec.Filter + + "AsciiLetterNumberFilter" should provide { + "overloaded filter method" when passed { + "String with letters and numbers" should returns { + "String with letters and numbers removed" in { + Filter.filter(" Hello123World!") should equal (" !") + Filter.filter("Hello123 !World") should equal (" !") + Filter.filter("!Hello123World ") should equal ("! ") + } + } + "character array with letters and numbers" should returns { + "character array with letters and numbers removed" in { + Filter.filter(" Hello123World!".toCharArray) should equal (" !".toCharArray) + Filter.filter("Hello123 !World".toCharArray) should equal (" !".toCharArray) + Filter.filter("!Hello123World ".toCharArray) should equal ("! ".toCharArray) + } + } + } + } +} + +object AsciiLetterNumberFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiLetterNumberFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala new file mode 100755 index 0000000..7998e39 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterNumberOnlyFilterSpec.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiLetterNumberOnlyFilterSpec extends ScalaTest { + import AsciiLetterNumberOnlyFilterSpec.Filter + + "AsciiLetterNumberOnlyFilter" should provide { + "overloaded filter method" when passed { + "String with mixed characters" should returns { + "String with non-letters and non-numbers removed" in { + Filter.filter("!@#$%^&*()abc") should equal ("abc") + Filter.filter("!@#$%^&*()abc123") should equal ("abc123") + Filter.filter("abc123!@#$%^&*()") should equal ("abc123") + Filter.filter("!@#$%abc123^&*()") should equal ("abc123") + } + } + "character array with mixed characters" should returns { + "character array with non-letters and non-numbers removed" in { + Filter.filter("!@#$%^&*()abc".toCharArray) should equal ("abc".toCharArray) + Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("abc123".toCharArray) + Filter.filter("abc123!@#$%^&*()".toCharArray) should equal ("abc123".toCharArray) + Filter.filter("!@#$%abc123^&*()".toCharArray) should equal ("abc123".toCharArray) + } + } + } + } +} + +object AsciiLetterNumberOnlyFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiLetterNumberOnlyFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala new file mode 100755 index 0000000..d134792 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiLetterOnlyFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiLetterOnlyFilterSpec extends ScalaTest { + import AsciiLetterOnlyFilterSpec.Filter + + "AsciiLetterOnlyFilter" should provide { + "overloaded filter method" when passed { + "String with mixed characters" should returns { + "String with non-letters removed" in { + Filter.filter("!@#$%^&*()abc") should equal ("abc") + Filter.filter("!@#$%^&*()abc123") should equal ("abc") + Filter.filter("abc!@#$%^&*()123") should equal ("abc") + } + } + "character array with mixed characters" should returns { + "character array with non-letters removed" in { + Filter.filter("!@#$%^&*()abc".toCharArray) should equal ("abc".toCharArray) + Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("abc".toCharArray) + Filter.filter("abc!@#$%^&*()123".toCharArray) should equal ("abc".toCharArray) + } + } + } + } +} + +object AsciiLetterOnlyFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiLetterOnlyFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala new file mode 100755 index 0000000..7c24d45 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiNumberFilterSpec extends ScalaTest { + import AsciiNumberFilterSpec.Filter + + "AsciiNumberFilter" should provide { + "overloaded filter method" when passed { + "String with numbers" should returns { + "String with numbers removed" in { + Filter.filter(" Hello123World!") should equal (" HelloWorld!") + Filter.filter("123 HelloWorld!") should equal (" HelloWorld!") + Filter.filter(" HelloWorld!123") should equal (" HelloWorld!") + } + } + "character array with numbers" should returns { + "character array with numbers removed" in { + Filter.filter(" Hello123World!".toCharArray) should equal (" HelloWorld!".toCharArray) + Filter.filter("123 HelloWorld!".toCharArray) should equal (" HelloWorld!".toCharArray) + Filter.filter(" HelloWorld!123".toCharArray) should equal (" HelloWorld!".toCharArray) + } + } + } + } +} + +object AsciiNumberFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiNumberFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala new file mode 100755 index 0000000..db49d25 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiNumberOnlyFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiNumberOnlyFilterSpec extends ScalaTest { + import AsciiNumberOnlyFilterSpec.Filter + + "AsciiNumberOnlyFilter" should provide { + "overloaded filter method" when passed { + "String with mixed characters" should returns { + "String with non-numbers removed" in { + Filter.filter("!@#$%^&*()abc123") should equal ("123") + Filter.filter("123!@#$%^&*()abc") should equal ("123") + Filter.filter("!@#$%^123&*()abc") should equal ("123") + } + } + "character array with mixed characters" should returns { + "character array with non-numbers removed" in { + Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("123".toCharArray) + Filter.filter("123!@#$%^&*()abc".toCharArray) should equal ("123".toCharArray) + Filter.filter("!@#$%^123&*()abc".toCharArray) should equal ("123".toCharArray) + } + } + } + } +} + +object AsciiNumberOnlyFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiNumberOnlyFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala new file mode 100755 index 0000000..759db3d --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSpaceFilterSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiSpaceFilterSpec extends ScalaTest { + import AsciiSpaceFilterSpec.Filter + + "AsciiSpaceFilter" should provide { + "overloaded filter method" when passed { + "String with spaces" should returns { + "String with spaces removed" in { + Filter.filter("HelloWorld") should equal ("HelloWorld") + Filter.filter(" HelloWorld ") should equal ("HelloWorld") + Filter.filter("Hello World") should equal ("HelloWorld") + Filter.filter("H e l l o W o r l d") should equal ("HelloWorld") + Filter.filter("H e l l o W o r l d") should equal ("HelloWorld") + } + } + "character array with spaces" should returns { + "character array with spaces removed" in { + Filter.filter("HelloWorld".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter(" HelloWorld ".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("Hello World".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("H e l l o W o r l d".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("H e l l o W o r l d".toCharArray) should equal ("HelloWorld".toCharArray) + } + } + } + } +} + +object AsciiSpaceFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiSpaceFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala new file mode 100755 index 0000000..99be533 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolFilterSpec.scala @@ -0,0 +1,31 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiSymbolFilterSpec extends ScalaTest { + import AsciiSymbolFilterSpec.Filter + + "AsciiSymbolFilter" should provide { + "overloaded filter method" when passed { + "String with symbols" should returns { + "String with symbols removed" in { + Filter.filter("[HelloWorld]") should equal ("HelloWorld") + Filter.filter("Hello!World") should equal ("HelloWorld") + } + } + "character array with symbols" should returns { + "character array with symbols removed" in { + Filter.filter("[HelloWorld]".toCharArray) should equal ("HelloWorld".toCharArray) + Filter.filter("Hello!World".toCharArray) should equal ("HelloWorld".toCharArray) + } + } + } + } +} + +object AsciiSymbolFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiSymbolFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala new file mode 100755 index 0000000..c6f1899 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/AsciiSymbolOnlyFilterSpec.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AsciiSymbolOnlyFilterSpec extends ScalaTest { + import AsciiSymbolOnlyFilterSpec.Filter + + "AsciiSymbolOnlyFilter" should provide { + "overloaded filter method" when passed { + "String with mixed characters" should returns { + "String with non-symbols removed" in { + Filter.filter("!@#$%^&*()abc123") should equal ("!@#$%^&*()") + Filter.filter("abc123!@#$%^&*()") should equal ("!@#$%^&*()") + Filter.filter("!@#$%abc123^&*()") should equal ("!@#$%^&*()") + } + } + "character array with mixed characters" should returns { + "character array with non-symbols removed" in { + Filter.filter("!@#$%^&*()abc123".toCharArray) should equal ("!@#$%^&*()".toCharArray) + Filter.filter("abc123!@#$%^&*()".toCharArray) should equal ("!@#$%^&*()".toCharArray) + Filter.filter("!@#$%abc123^&*()".toCharArray) should equal ("!@#$%^&*()".toCharArray) + } + } + } + } +} + +object AsciiSymbolOnlyFilterSpec { + private final val Filter = new StringFilterDelegate with AsciiSymbolOnlyFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala new file mode 100755 index 0000000..a04a0ae --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/IgnoreAsciiLetterCaseFilterSpec.scala @@ -0,0 +1,41 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class IgnoreAsciiLetterCaseFilterSpec extends ScalaTest { + import IgnoreAsciiLetterCaseFilterSpec.Filter + + "IgnoreAsciiLetterCaseFilter" should provide { + "overloaded filter method" when passed { + "String with mixed case" should returns { + "String with the same case" in { + Filter.filter("HelloWorld") should (equal ("helloworld") or equal ("HELLOWORLD")) + Filter.filter("Hello World") should (equal ("hello world") or equal ("HELLO WORLD")) + Filter.filter("H e l l o W o r l d") should + (equal ("h e l l o w o r l d") or equal ("H E L L O W O R L D")) + Filter.filter("H e l l o W o r l d") should + (equal ("h e l l o w o r l d") or equal ("H E L L O W O R L D")) + } + } + "character array with mixed case" should returns { + "character array with the same case" in { + Filter.filter("HelloWorld".toCharArray) should + (equal ("helloworld".toCharArray) or equal ("HELLOWORLD".toCharArray)) + Filter.filter("Hello World".toCharArray) should + (equal ("hello world".toCharArray) or equal ("HELLO WORLD".toCharArray)) + Filter.filter("H e l l o W o r l d".toCharArray) should + (equal ("h e l l o w o r l d".toCharArray) or equal ("H E L L O W O R L D".toCharArray)) + Filter.filter("H e l l o W o r l d".toCharArray) should + (equal ("h e l l o w o r l d".toCharArray) or equal ("H E L L O W O R L D".toCharArray)) + } + } + } + } +} + +object IgnoreAsciiLetterCaseFilterSpec { + private final val Filter = new StringFilterDelegate with IgnoreAsciiLetterCaseFilter +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala new file mode 100755 index 0000000..132156b --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/filter/StringFilterDelegateSpec.scala @@ -0,0 +1,31 @@ +package com.rockymadden.stringmetric.filter + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class StringFilterDelegateSpec extends ScalaTest { + import StringFilterDelegateSpec.Filter + + "StringFilter" should provide { + "overloaded filter method" when passed { + "String" should returns { + "the same String" in { + Filter.filter("Hello World") should equal ("Hello World") + Filter.filter(" Hello! World]") should equal (" Hello! World]") + } + } + "character array" should returns { + "the same character array" in { + Filter.filter("Hello World".toCharArray) should equal ("Hello World".toCharArray) + Filter.filter(" Hello! World]".toCharArray) should equal (" Hello! World]".toCharArray) + } + } + } + } +} + +object StringFilterDelegateSpec { + private final val Filter = new StringFilterDelegate +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala new file mode 100755 index 0000000..1f904d5 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala @@ -0,0 +1,226 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetaphoneAlgorithmSpec extends ScalaTest { + import MetaphoneAlgorithmSpec.Algorithm + + "MetaphoneAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + Algorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + Algorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // z + Algorithm.compute("z").get should equal ("s") + Algorithm.compute("zz").get should equal ("s") + + // y + Algorithm.compute("y").isDefined should be (false) + Algorithm.compute("zy").get should equal ("s") + Algorithm.compute("zyz").get should equal ("ss") + Algorithm.compute("zya").get should equal ("sy") + + // x + Algorithm.compute("x").get should equal ("s") + Algorithm.compute("zx").get should equal ("sks") + Algorithm.compute("zxz").get should equal ("skss") + + // w + Algorithm.compute("w").isDefined should be (false) + Algorithm.compute("zw").get should equal ("s") + Algorithm.compute("zwz").get should equal ("ss") + Algorithm.compute("zwa").get should equal ("sw") + + // v + Algorithm.compute("v").get should equal ("f") + Algorithm.compute("zv").get should equal ("sf") + Algorithm.compute("zvz").get should equal ("sfs") + + // u + Algorithm.compute("u").get should equal ("u") + Algorithm.compute("zu").get should equal ("s") + + // t + Algorithm.compute("t").get should equal ("t") + Algorithm.compute("ztiaz").get should equal ("sxs") + Algorithm.compute("ztioz").get should equal ("sxs") + Algorithm.compute("zthz").get should equal ("s0s") + Algorithm.compute("ztchz").get should equal ("sxs") + Algorithm.compute("ztz").get should equal ("sts") + + // s + Algorithm.compute("s").get should equal ("s") + Algorithm.compute("zshz").get should equal ("sxs") + Algorithm.compute("zsioz").get should equal ("sxs") + Algorithm.compute("zsiaz").get should equal ("sxs") + Algorithm.compute("zs").get should equal ("ss") + Algorithm.compute("zsz").get should equal ("sss") + + // r + Algorithm.compute("r").get should equal ("r") + Algorithm.compute("zr").get should equal ("sr") + Algorithm.compute("zrz").get should equal ("srs") + + // q + Algorithm.compute("q").get should equal ("k") + Algorithm.compute("zq").get should equal ("sk") + Algorithm.compute("zqz").get should equal ("sks") + + // p + Algorithm.compute("p").get should equal ("p") + Algorithm.compute("zp").get should equal ("sp") + Algorithm.compute("zph").get should equal ("sf") + Algorithm.compute("zpz").get should equal ("sps") + + // o + Algorithm.compute("o").get should equal ("o") + Algorithm.compute("zo").get should equal ("s") + + // n + Algorithm.compute("n").get should equal ("n") + Algorithm.compute("zn").get should equal ("sn") + Algorithm.compute("znz").get should equal ("sns") + + // m + Algorithm.compute("m").get should equal ("m") + Algorithm.compute("zm").get should equal ("sm") + Algorithm.compute("zmz").get should equal ("sms") + + // l + Algorithm.compute("l").get should equal ("l") + Algorithm.compute("zl").get should equal ("sl") + Algorithm.compute("zlz").get should equal ("sls") + + // k + Algorithm.compute("k").get should equal ("k") + Algorithm.compute("zk").get should equal ("sk") + Algorithm.compute("zck").get should equal ("sk") + + // j + Algorithm.compute("j").get should equal ("j") + Algorithm.compute("zj").get should equal ("sj") + Algorithm.compute("zjz").get should equal ("sjs") + + // i + Algorithm.compute("i").get should equal ("i") + Algorithm.compute("zi").get should equal ("s") + + // h + Algorithm.compute("h").get should equal ("h") // php wrongly says nothing + Algorithm.compute("zh").get should equal ("sh") // php wrongly says s + Algorithm.compute("zah").get should equal ("s") + Algorithm.compute("zchh").get should equal ("sx") + Algorithm.compute("ha").get should equal ("h") + + // g + Algorithm.compute("g").get should equal ("k") + Algorithm.compute("zg").get should equal ("sk") + Algorithm.compute("zgh").get should equal ("skh") // php wrongly says sf + Algorithm.compute("zghz").get should equal ("shs") // php wrongly says sfs + Algorithm.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh + Algorithm.compute("zgn").get should equal ("sn") + Algorithm.compute("zgns").get should equal ("skns") + Algorithm.compute("zgned").get should equal ("snt") // others wrongly says sknt + Algorithm.compute("zgneds").get should equal ("sknts") // php wrongly says snts + Algorithm.compute("zgi").get should equal ("sj") + Algorithm.compute("zgiz").get should equal ("sjs") + Algorithm.compute("zge").get should equal ("sj") + Algorithm.compute("zgez").get should equal ("sjs") + Algorithm.compute("zgy").get should equal ("sj") + Algorithm.compute("zgyz").get should equal ("sjs") + Algorithm.compute("zgz").get should equal ("sks") + + // f + Algorithm.compute("f").get should equal ("f") + Algorithm.compute("zf").get should equal ("sf") + Algorithm.compute("zfz").get should equal ("sfs") + + // e + Algorithm.compute("e").get should equal ("e") + Algorithm.compute("ze").get should equal ("s") + + // d + Algorithm.compute("d").get should equal ("t") + Algorithm.compute("fudge").get should equal ("fjj") // php wrongly says fj + Algorithm.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy + Algorithm.compute("dodgi").get should equal ("tjj") // php wrongly says tj + Algorithm.compute("zd").get should equal ("st") + Algorithm.compute("zdz").get should equal ("sts") + + // c + Algorithm.compute("c").get should equal ("k") + Algorithm.compute("zcia").get should equal ("sx") + Algorithm.compute("zciaz").get should equal ("sxs") + Algorithm.compute("zch").get should equal ("sx") + Algorithm.compute("zchz").get should equal ("sxs") + Algorithm.compute("zci").get should equal ("ss") + Algorithm.compute("zciz").get should equal ("sss") + Algorithm.compute("zce").get should equal ("ss") + Algorithm.compute("zcez").get should equal ("sss") + Algorithm.compute("zcy").get should equal ("ss") + Algorithm.compute("zcyz").get should equal ("sss") + Algorithm.compute("zsci").get should equal ("ss") + Algorithm.compute("zsciz").get should equal ("sss") + Algorithm.compute("zsce").get should equal ("ss") + Algorithm.compute("zscez").get should equal ("sss") + Algorithm.compute("zscy").get should equal ("ss") + Algorithm.compute("zscyz").get should equal ("sss") + Algorithm.compute("zsch").get should equal ("sskh") // php wrongly says ssx + Algorithm.compute("zc").get should equal ("sk") + Algorithm.compute("zcz").get should equal ("sks") + + // b + Algorithm.compute("b").get should equal ("b") + Algorithm.compute("zb").get should equal ("sb") + Algorithm.compute("zbz").get should equal ("sbs") + Algorithm.compute("zmb").get should equal ("sm") + + // a + Algorithm.compute("a").get should equal ("a") + Algorithm.compute("za").get should equal ("s") + + // Miscellaneous. + Algorithm.compute("dumb").get should equal ("tm") + Algorithm.compute("smith").get should equal ("sm0") + Algorithm.compute("school").get should equal ("skhl") // php wrongly says sxl + Algorithm.compute("merci").get should equal ("mrs") + Algorithm.compute("cool").get should equal ("kl") + Algorithm.compute("aebersold").get should equal ("ebrslt") + Algorithm.compute("gnagy").get should equal ("nj") + Algorithm.compute("knuth").get should equal ("n0") + Algorithm.compute("pniewski").get should equal ("nsk") + Algorithm.compute("wright").get should equal ("rht") // php wrongly says rft + Algorithm.compute("phone").get should equal ("fn") + Algorithm.compute("aggregate").get should equal ("akrkt") + Algorithm.compute("accuracy").get should equal ("akkrs") + Algorithm.compute("encyclopedia").get should equal ("ensklpt") + Algorithm.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs") + Algorithm.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm") + } + } + } + } + "MetaphoneAlgorithm companion object" should provide { + "pass-through compute method" should returns { + "same value as class" in { + MetaphoneAlgorithm.compute("dumb").get should equal ("tm") + } + } + } +} + +object MetaphoneAlgorithmSpec { + final private val Algorithm = MetaphoneAlgorithm() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala new file mode 100755 index 0000000..9a029d8 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala @@ -0,0 +1,54 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetaphoneMetricSpec extends ScalaTest { + import MetaphoneMetricSpec.Metric + + "MetaphoneMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + Metric.compare("123", "123").isDefined should be (false) + Metric.compare("123", "").isDefined should be (false) + Metric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + Metric.compare("dumb", "dum").get should be (true) + Metric.compare("smith", "smeth").get should be (true) + Metric.compare("merci", "mercy").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + Metric.compare("dumb", "gum").get should be (false) + Metric.compare("smith", "kiss").get should be (false) + Metric.compare("merci", "burpy").get should be (false) + } + } + } + } + "MetaphoneMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + MetaphoneMetric.compare("dumb", "gum").get should be (false) + } + } + } +} + +object MetaphoneMetricSpec { + final private val Metric = MetaphoneMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala new file mode 100755 index 0000000..6a0f113 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala @@ -0,0 +1,204 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NysiisAlgorithmSpec extends ScalaTest { + import NysiisAlgorithmSpec.Algorithm + + "NysiisAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + Algorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + Algorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + Algorithm.compute("a").get should equal ("a") + Algorithm.compute("aa").get should equal ("a") + + // b + Algorithm.compute("b").get should equal ("b") + Algorithm.compute("bb").get should equal ("bb") + + // c + Algorithm.compute("c").get should equal ("c") + Algorithm.compute("cc").get should equal ("cc") + + // d + Algorithm.compute("d").get should equal ("d") + Algorithm.compute("dd").get should equal ("dd") + + // e + Algorithm.compute("e").get should equal ("e") + Algorithm.compute("ee").get should equal ("y") + + // f + Algorithm.compute("f").get should equal ("f") + Algorithm.compute("ff").get should equal ("ff") + + // g + Algorithm.compute("g").get should equal ("g") + Algorithm.compute("gg").get should equal ("gg") + + // h + Algorithm.compute("h").get should equal ("h") + Algorithm.compute("hh").get should equal ("hh") + + // i + Algorithm.compute("i").get should equal ("i") + Algorithm.compute("ii").get should equal ("i") + + // j + Algorithm.compute("j").get should equal ("j") + Algorithm.compute("jj").get should equal ("jj") + + // k + Algorithm.compute("k").get should equal ("c") + Algorithm.compute("kk").get should equal ("cc") + + // l + Algorithm.compute("l").get should equal ("l") + Algorithm.compute("ll").get should equal ("ll") + + // m + Algorithm.compute("m").get should equal ("m") + Algorithm.compute("mm").get should equal ("mn") + + // n + Algorithm.compute("n").get should equal ("n") + Algorithm.compute("nn").get should equal ("nn") + + // o + Algorithm.compute("o").get should equal ("o") + Algorithm.compute("oo").get should equal ("o") + + // p + Algorithm.compute("p").get should equal ("p") + Algorithm.compute("pp").get should equal ("pp") + + // q + Algorithm.compute("q").get should equal ("q") + Algorithm.compute("qq").get should equal ("qg") + + // r + Algorithm.compute("r").get should equal ("r") + Algorithm.compute("rr").get should equal ("rr") + + // s + Algorithm.compute("s").get should equal ("s") + Algorithm.compute("ss").get should equal ("s") + + // t + Algorithm.compute("t").get should equal ("t") + Algorithm.compute("tt").get should equal ("tt") + + // u + Algorithm.compute("u").get should equal ("u") + Algorithm.compute("uu").get should equal ("u") + + // v + Algorithm.compute("v").get should equal ("v") + Algorithm.compute("vv").get should equal ("vv") + + // w + Algorithm.compute("w").get should equal ("w") + Algorithm.compute("ww").get should equal ("ww") + + // x + Algorithm.compute("x").get should equal ("x") + Algorithm.compute("xx").get should equal ("xx") + + // y + Algorithm.compute("y").get should equal ("y") + Algorithm.compute("yy").get should equal ("yy") + + // z + Algorithm.compute("z").get should equal ("z") + Algorithm.compute("zz").get should equal ("z") + + // Head cases. + Algorithm.compute("mac").get should equal ("mc") + Algorithm.compute("kn").get should equal ("nn") + Algorithm.compute("k").get should equal ("c") + Algorithm.compute("ph").get should equal ("ff") + Algorithm.compute("pf").get should equal ("ff") + Algorithm.compute("sch").get should equal ("s") // dropby wrongly says ss + + // Last cases. + Algorithm.compute("ee").get should equal ("y") + Algorithm.compute("ie").get should equal ("y") + Algorithm.compute("dt").get should equal ("d") + Algorithm.compute("rt").get should equal ("d") + Algorithm.compute("rd").get should equal ("d") + Algorithm.compute("nt").get should equal ("d") + Algorithm.compute("nd").get should equal ("d") + + // Core cases. + Algorithm.compute("eev").get should equal ("eaf") + Algorithm.compute("zev").get should equal ("zaf") + Algorithm.compute("kkn").get should equal ("cn") + Algorithm.compute("sschn").get should equal ("ssn") + Algorithm.compute("pph").get should equal ("pf") + + // Miscellaneous. + Algorithm.compute("macdonald").get should equal ("mcdanald") + Algorithm.compute("phone").get should equal ("ffan") + Algorithm.compute("aggregate").get should equal ("agragat") + Algorithm.compute("accuracy").get should equal ("acaracy") + Algorithm.compute("encyclopedia").get should equal ("encyclapad") + Algorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") + Algorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") + + // Dropby. + Algorithm.compute("macintosh").get should equal ("mcant") + Algorithm.compute("knuth").get should equal ("nnat") + Algorithm.compute("koehn").get should equal ("can") // dropby wrongly says c + Algorithm.compute("phillipson").get should equal ("ffalapsan") + Algorithm.compute("pfeister").get should equal ("ffastar") + Algorithm.compute("schoenhoeft").get should equal ("ssanaft") + Algorithm.compute("mckee").get should equal ("mcy") + Algorithm.compute("heitschmedt").get should equal ("hatsnad") + Algorithm.compute("bart").get should equal ("bad") + Algorithm.compute("hurd").get should equal ("had") + Algorithm.compute("hunt").get should equal ("had") + Algorithm.compute("westerlund").get should equal ("wastarlad") + Algorithm.compute("casstevens").get should equal ("castafan") + Algorithm.compute("vasquez").get should equal ("vasg") + Algorithm.compute("frazier").get should equal ("frasar") + Algorithm.compute("bowman").get should equal ("banan") + Algorithm.compute("mcknight").get should equal ("mcnagt") + Algorithm.compute("rickert").get should equal ("racad") + Algorithm.compute("deutsch").get should equal ("dat") // dropby wrongly says dats + Algorithm.compute("westphal").get should equal ("wastfal") + Algorithm.compute("shriver").get should equal ("shravar") + Algorithm.compute("kuhl").get should equal ("cal") // dropby wrongly says c + Algorithm.compute("rawson").get should equal ("rasan") + Algorithm.compute("jiles").get should equal ("jal") + Algorithm.compute("carraway").get should equal ("caray") + Algorithm.compute("yamada").get should equal ("yanad") + } + } + } + } + "NysiisAlgorithm companion object" should provide { + "pass-through compute method" should returns { + "same value as class" in { + NysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") + } + } + } +} + +object NysiisAlgorithmSpec { + final private val Algorithm = NysiisAlgorithm() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala new file mode 100755 index 0000000..c6929a2 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala @@ -0,0 +1,50 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NysiisMetricSpec extends ScalaTest { + import NysiisMetricSpec.Metric + + "NysiisMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + Metric.compare("123", "123").isDefined should be (false) + Metric.compare("123", "").isDefined should be (false) + Metric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + Metric.compare("ham", "hum").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + Metric.compare("dumb", "gum").get should be (false) + } + } + } + } + "NysiisMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + NysiisMetric.compare("dumb", "gum").get should be (false) + } + } + } +} + +object NysiisMetricSpec { + final private val Metric = NysiisMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala new file mode 100755 index 0000000..1298d2d --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala @@ -0,0 +1,221 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedNysiisAlgorithmSpec extends ScalaTest { + import RefinedNysiisAlgorithmSpec.Algorithm + + "RefinedNysiisAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + Algorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + Algorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + Algorithm.compute("a").get should equal ("a") + Algorithm.compute("aa").get should equal ("a") + + // b + Algorithm.compute("b").get should equal ("b") + Algorithm.compute("bb").get should equal ("b") + + // c + Algorithm.compute("c").get should equal ("c") + Algorithm.compute("cc").get should equal ("c") + + // d + Algorithm.compute("d").get should equal ("d") + Algorithm.compute("dd").get should equal ("d") + + // e + Algorithm.compute("e").get should equal ("e") + Algorithm.compute("ee").get should equal ("y") + + // f + Algorithm.compute("f").get should equal ("f") + Algorithm.compute("ff").get should equal ("f") + + // g + Algorithm.compute("g").get should equal ("g") + Algorithm.compute("gg").get should equal ("g") + + // h + Algorithm.compute("h").get should equal ("h") + Algorithm.compute("hh").get should equal ("h") + + // i + Algorithm.compute("i").get should equal ("i") + Algorithm.compute("ii").get should equal ("i") + + // j + Algorithm.compute("j").get should equal ("j") + Algorithm.compute("jj").get should equal ("j") + + // k + Algorithm.compute("k").get should equal ("c") + Algorithm.compute("kk").get should equal ("c") + + // l + Algorithm.compute("l").get should equal ("l") + Algorithm.compute("ll").get should equal ("l") + + // m + Algorithm.compute("m").get should equal ("m") + Algorithm.compute("mm").get should equal ("mn") + + // n + Algorithm.compute("n").get should equal ("n") + Algorithm.compute("nn").get should equal ("n") + + // o + Algorithm.compute("o").get should equal ("o") + Algorithm.compute("oo").get should equal ("o") + + // p + Algorithm.compute("p").get should equal ("p") + Algorithm.compute("pp").get should equal ("p") + + // q + Algorithm.compute("q").get should equal ("q") + Algorithm.compute("qq").get should equal ("qg") + + // r + Algorithm.compute("r").get should equal ("r") + Algorithm.compute("rr").get should equal ("r") + + // s + Algorithm.compute("s").get should equal ("s") + Algorithm.compute("ss").get should equal ("s") + + // t + Algorithm.compute("t").get should equal ("t") + Algorithm.compute("tt").get should equal ("t") + + // u + Algorithm.compute("u").get should equal ("u") + Algorithm.compute("uu").get should equal ("u") + + // v + Algorithm.compute("v").get should equal ("v") + Algorithm.compute("vv").get should equal ("v") + + // w + Algorithm.compute("w").get should equal ("w") + Algorithm.compute("ww").get should equal ("w") + + // x + Algorithm.compute("x").get should equal ("x") + Algorithm.compute("xx").get should equal ("x") + + // y + Algorithm.compute("y").get should equal ("y") + Algorithm.compute("yy").get should equal ("y") + Algorithm.compute("ybyb").get should equal ("ybab") + + // z + Algorithm.compute("z").get should equal ("z") + Algorithm.compute("zz").get should equal ("z") + + // Head cases. + Algorithm.compute("mac").get should equal ("mc") + Algorithm.compute("pf").get should equal ("f") + + // Last cases. + Algorithm.compute("ix").get should equal ("ic") + Algorithm.compute("ex").get should equal ("ec") + Algorithm.compute("ye").get should equal ("y") + Algorithm.compute("ee").get should equal ("y") + Algorithm.compute("ie").get should equal ("y") + Algorithm.compute("dt").get should equal ("d") + Algorithm.compute("rt").get should equal ("d") + Algorithm.compute("rd").get should equal ("d") + Algorithm.compute("nt").get should equal ("d") + Algorithm.compute("nd").get should equal ("d") + + // Core cases. + Algorithm.compute("bevb").get should equal ("bafb") + Algorithm.compute("bghtb").get should equal ("bgtb") + Algorithm.compute("bdgb").get should equal ("bgb") + Algorithm.compute("bphb").get should equal ("bfb") + Algorithm.compute("bknb").get should equal ("bnb") + Algorithm.compute("bshb").get should equal ("bsb") + Algorithm.compute("bschb").get should equal ("bsb") + Algorithm.compute("bywb").get should equal ("bab") + Algorithm.compute("byw").get should equal ("by") + Algorithm.compute("ywb").get should equal ("yb") + Algorithm.compute("bwrb").get should equal ("brb") + + // Transcode cases. + Algorithm.compute("bay").get should equal ("by") + + // Miscellaneous. + Algorithm.compute("macdonald").get should equal ("mcdanald") + Algorithm.compute("phone").get should equal ("fan") + Algorithm.compute("aggregate").get should equal ("agragat") + Algorithm.compute("accuracy").get should equal ("acaracy") + Algorithm.compute("encyclopedia").get should equal ("encaclapad") + Algorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") + Algorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") + + // Dropby. + Algorithm.compute("edwards").get should equal ("edwad") + Algorithm.compute("parez").get should equal ("par") + Algorithm.compute("macintosh").get should equal ("mcantas") + Algorithm.compute("phillipson").get should equal ("falapsan") + Algorithm.compute("haddix").get should equal ("hadac") + Algorithm.compute("essex").get should equal ("esac") + Algorithm.compute("moye").get should equal ("my") + Algorithm.compute("mckee").get should equal ("mcy") + Algorithm.compute("mackie").get should equal ("mcy") + Algorithm.compute("heitschmidt").get should equal ("hatsnad") + Algorithm.compute("bart").get should equal ("bad") + Algorithm.compute("hurd").get should equal ("had") + Algorithm.compute("hunt").get should equal ("had") + Algorithm.compute("westerlund").get should equal ("wastarlad") + Algorithm.compute("evers").get should equal ("evar") + Algorithm.compute("devito").get should equal ("dafat") + Algorithm.compute("rawson").get should equal ("rasan") + Algorithm.compute("shoulders").get should equal ("saldar") + Algorithm.compute("leighton").get should equal ("lagtan") + Algorithm.compute("wooldridge").get should equal ("waldrag") + Algorithm.compute("oliphant").get should equal ("olafad") + Algorithm.compute("hatchett").get should equal ("hatcat") + Algorithm.compute("mcknight").get should equal ("mcnagt") + Algorithm.compute("rickert").get should equal ("racad") + Algorithm.compute("bowman").get should equal ("banan") + Algorithm.compute("vasquez").get should equal ("vasg") + Algorithm.compute("bashaw").get should equal ("bas") + Algorithm.compute("schoenhoeft").get should equal ("sanaft") // dropby wrongly says scanaft + Algorithm.compute("heywood").get should equal ("had") + Algorithm.compute("hayman").get should equal ("hanan") + Algorithm.compute("seawright").get should equal ("saragt") + Algorithm.compute("kratzer").get should equal ("cratsar") + Algorithm.compute("canaday").get should equal ("canady") + Algorithm.compute("crepeau").get should equal ("crap") + } + } + } + } + "RefinedNysiisAlgorithm companion object" should provide { + "pass-through compute method" should returns { + "same value as class" in { + RefinedNysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") + } + } + } +} + +object RefinedNysiisAlgorithmSpec { + final private val Algorithm = RefinedNysiisAlgorithm() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala new file mode 100755 index 0000000..ca9d2ec --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala @@ -0,0 +1,50 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedNysiisMetricSpec extends ScalaTest { + import RefinedNysiisMetricSpec.Metric + + "RefinedNysiisMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + Metric.compare("123", "123").isDefined should be (false) + Metric.compare("123", "").isDefined should be (false) + Metric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + Metric.compare("ham", "hum").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + Metric.compare("dumb", "gum").get should be (false) + } + } + } + } + "RefinedNysiisMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + RefinedNysiisMetric.compare("dumb", "gum").get should be (false) + } + } + } +} + +object RefinedNysiisMetricSpec { + final private val Metric = RefinedNysiisMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala new file mode 100755 index 0000000..254bf06 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala @@ -0,0 +1,175 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedSoundexAlgorithmSpec extends ScalaTest { + import RefinedSoundexAlgorithmSpec.Algorithm + + "RefinedSoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + Algorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + Algorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + Algorithm.compute("a").get should equal ("a0") + Algorithm.compute("aa").get should equal ("a0") + + // b + Algorithm.compute("b").get should equal ("b1") + Algorithm.compute("bb").get should equal ("b1") + + // c + Algorithm.compute("c").get should equal ("c3") + Algorithm.compute("cc").get should equal ("c3") + + // d + Algorithm.compute("d").get should equal ("d6") + Algorithm.compute("dd").get should equal ("d6") + + // e + Algorithm.compute("e").get should equal ("e0") + Algorithm.compute("ee").get should equal ("e0") + + // f + Algorithm.compute("f").get should equal ("f2") + Algorithm.compute("ff").get should equal ("f2") + + // g + Algorithm.compute("g").get should equal ("g4") + Algorithm.compute("gg").get should equal ("g4") + + // h + Algorithm.compute("h").get should equal ("h0") + Algorithm.compute("hh").get should equal ("h0") + + // i + Algorithm.compute("i").get should equal ("i0") + Algorithm.compute("ii").get should equal ("i0") + + // j + Algorithm.compute("j").get should equal ("j4") + Algorithm.compute("jj").get should equal ("j4") + + // k + Algorithm.compute("k").get should equal ("k3") + Algorithm.compute("kk").get should equal ("k3") + + // l + Algorithm.compute("l").get should equal ("l7") + Algorithm.compute("ll").get should equal ("l7") + + // m + Algorithm.compute("m").get should equal ("m8") + Algorithm.compute("mm").get should equal ("m8") + + // n + Algorithm.compute("n").get should equal ("n8") + Algorithm.compute("nn").get should equal ("n8") + + // o + Algorithm.compute("o").get should equal ("o0") + Algorithm.compute("oo").get should equal ("o0") + + // p + Algorithm.compute("p").get should equal ("p1") + Algorithm.compute("pp").get should equal ("p1") + + // q + Algorithm.compute("q").get should equal ("q5") + Algorithm.compute("qq").get should equal ("q5") + + // r + Algorithm.compute("r").get should equal ("r9") + Algorithm.compute("rr").get should equal ("r9") + + // s + Algorithm.compute("s").get should equal ("s3") + Algorithm.compute("ss").get should equal ("s3") + + // t + Algorithm.compute("t").get should equal ("t6") + Algorithm.compute("tt").get should equal ("t6") + + // u + Algorithm.compute("u").get should equal ("u0") + Algorithm.compute("uu").get should equal ("u0") + + // v + Algorithm.compute("v").get should equal ("v2") + Algorithm.compute("vv").get should equal ("v2") + + // w + Algorithm.compute("w").get should equal ("w0") + Algorithm.compute("ww").get should equal ("w0") + + // x + Algorithm.compute("x").get should equal ("x5") + Algorithm.compute("xx").get should equal ("x5") + + // y + Algorithm.compute("y").get should equal ("y0") + Algorithm.compute("yy").get should equal ("y0") + + // z + Algorithm.compute("z").get should equal ("z5") + Algorithm.compute("zz").get should equal ("z5") + + // Starting with letter then numbers. + Algorithm.compute("x123456").get should equal ("x5") + Algorithm.compute("a123456").get should equal ("a0") + Algorithm.compute("f123456").get should equal ("f2") + + // Miscellaneous. + Algorithm.compute("braz").get should equal ("b1905") + Algorithm.compute("broz").get should equal ("b1905") + Algorithm.compute("caren").get should equal ("c30908") + Algorithm.compute("carren").get should equal ("c30908") + Algorithm.compute("coram").get should equal ("c30908") + Algorithm.compute("corran").get should equal ("c30908") + Algorithm.compute("curreen").get should equal ("c30908") + Algorithm.compute("curwen").get should equal ("c30908") + Algorithm.compute("hairs").get should equal ("h093") + Algorithm.compute("hark").get should equal ("h093") + Algorithm.compute("hars").get should equal ("h093") + Algorithm.compute("hayers").get should equal ("h093") + Algorithm.compute("heers").get should equal ("h093") + Algorithm.compute("hiers").get should equal ("h093") + Algorithm.compute("lambard").get should equal ("l7081096") + Algorithm.compute("lambart").get should equal ("l7081096") + Algorithm.compute("lambert").get should equal ("l7081096") + Algorithm.compute("lambird").get should equal ("l7081096") + Algorithm.compute("lampaert").get should equal ("l7081096") + Algorithm.compute("lampart").get should equal ("l7081096") + Algorithm.compute("lamport").get should equal ("l7081096") + Algorithm.compute("limbert").get should equal ("l7081096") + Algorithm.compute("lombard").get should equal ("l7081096") + Algorithm.compute("nolton").get should equal ("n807608") + Algorithm.compute("noulton").get should equal ("n807608") + } + } + } + } + "RefinedSoundexAlgorithm companion object" should provide { + "pass-through compute method" should returns { + "same value as class" in { + RefinedSoundexAlgorithm.compute("braz").get should equal ("b1905") + } + } + } +} + +object RefinedSoundexAlgorithmSpec { + final private val Algorithm = RefinedSoundexAlgorithm() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala new file mode 100755 index 0000000..cb6a222 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala @@ -0,0 +1,50 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedSoundexMetricSpec extends ScalaTest { + import RefinedSoundexMetricSpec.Metric + + "RefinedSoundexMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + Metric.compare("123", "123").isDefined should be (false) + Metric.compare("123", "").isDefined should be (false) + Metric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + Metric.compare("robert", "rupert").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + Metric.compare("robert", "rubin").get should be (false) + } + } + } + } + "RefinedSoundexMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + RefinedSoundexMetric.compare("robert", "rubin").get should be (false) + } + } + } +} + +object RefinedSoundexMetricSpec { + final private val Metric = RefinedSoundexMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala new file mode 100755 index 0000000..5b7deaa --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala @@ -0,0 +1,174 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexAlgorithmSpec extends ScalaTest { + import SoundexAlgorithmSpec.Algorithm + + "SoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + Algorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + Algorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + Algorithm.compute("a").get should equal ("a000") + Algorithm.compute("aa").get should equal ("a000") + + // b + Algorithm.compute("b").get should equal ("b000") + Algorithm.compute("bb").get should equal ("b000") + + // c + Algorithm.compute("c").get should equal ("c000") + Algorithm.compute("cc").get should equal ("c000") + + // d + Algorithm.compute("d").get should equal ("d000") + Algorithm.compute("dd").get should equal ("d000") + + // e + Algorithm.compute("e").get should equal ("e000") + Algorithm.compute("ee").get should equal ("e000") + + // f + Algorithm.compute("f").get should equal ("f000") + Algorithm.compute("ff").get should equal ("f000") + + // g + Algorithm.compute("g").get should equal ("g000") + Algorithm.compute("gg").get should equal ("g000") + + // h + Algorithm.compute("h").get should equal ("h000") + Algorithm.compute("hh").get should equal ("h000") + + // i + Algorithm.compute("i").get should equal ("i000") + Algorithm.compute("ii").get should equal ("i000") + + // j + Algorithm.compute("j").get should equal ("j000") + Algorithm.compute("jj").get should equal ("j000") + + // k + Algorithm.compute("k").get should equal ("k000") + Algorithm.compute("kk").get should equal ("k000") + + // l + Algorithm.compute("l").get should equal ("l000") + Algorithm.compute("ll").get should equal ("l000") + + // m + Algorithm.compute("m").get should equal ("m000") + Algorithm.compute("mm").get should equal ("m000") + + // n + Algorithm.compute("n").get should equal ("n000") + Algorithm.compute("nn").get should equal ("n000") + + // o + Algorithm.compute("o").get should equal ("o000") + Algorithm.compute("oo").get should equal ("o000") + + // p + Algorithm.compute("p").get should equal ("p000") + Algorithm.compute("pp").get should equal ("p000") + + // q + Algorithm.compute("q").get should equal ("q000") + Algorithm.compute("qq").get should equal ("q000") + + // r + Algorithm.compute("r").get should equal ("r000") + Algorithm.compute("rr").get should equal ("r000") + + // s + Algorithm.compute("s").get should equal ("s000") + Algorithm.compute("ss").get should equal ("s000") + + // t + Algorithm.compute("t").get should equal ("t000") + Algorithm.compute("tt").get should equal ("t000") + + // u + Algorithm.compute("u").get should equal ("u000") + Algorithm.compute("uu").get should equal ("u000") + + // v + Algorithm.compute("v").get should equal ("v000") + Algorithm.compute("vv").get should equal ("v000") + + // w + Algorithm.compute("w").get should equal ("w000") + Algorithm.compute("ww").get should equal ("w000") + + // x + Algorithm.compute("x").get should equal ("x000") + Algorithm.compute("xx").get should equal ("x000") + + // y + Algorithm.compute("y").get should equal ("y000") + Algorithm.compute("yy").get should equal ("y000") + + // z + Algorithm.compute("z").get should equal ("z000") + Algorithm.compute("zz").get should equal ("z000") + + // Starting with letter then numbers. + Algorithm.compute("x123456").get should equal ("x000") + Algorithm.compute("a123456").get should equal ("a000") + Algorithm.compute("f123456").get should equal ("f000") + + // Miscellaneous. + Algorithm.compute("abc").get should equal ("a120") + Algorithm.compute("xyz").get should equal ("x200") + Algorithm.compute("robert").get should equal ("r163") + Algorithm.compute("rupert").get should equal ("r163") + Algorithm.compute("rubin").get should equal ("r150") + Algorithm.compute("ashcraft").get should equal ("a261") + Algorithm.compute("tymczak").get should equal ("t522") + Algorithm.compute("pfister").get should equal ("p236") + Algorithm.compute("euler").get should equal ("e460") + Algorithm.compute("gauss").get should equal ("g200") + Algorithm.compute("hilbert").get should equal ("h416") + Algorithm.compute("knuth").get should equal ("k530") + Algorithm.compute("lloyd").get should equal ("l300") + Algorithm.compute("lukasiewicz").get should equal ("l222") + Algorithm.compute("ashcroft").get should equal ("a261") + Algorithm.compute("tymczak").get should equal ("t522") + Algorithm.compute("pfister").get should equal ("p236") + Algorithm.compute("ellery").get should equal ("e460") + Algorithm.compute("ghosh").get should equal ("g200") + Algorithm.compute("heilbronn").get should equal ("h416") + Algorithm.compute("kant").get should equal ("k530") + Algorithm.compute("ladd").get should equal ("l300") + Algorithm.compute("lissajous").get should equal ("l222") + Algorithm.compute("fusedale").get should equal ("f234") + } + } + } + } + "SoundexAlgorithm companion object" should provide { + "pass-through compute method" should returns { + "same value as class" in { + SoundexAlgorithm.compute("abc").get should equal ("a120") + } + } + } +} + +object SoundexAlgorithmSpec { + final private val Algorithm = SoundexAlgorithm() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala new file mode 100755 index 0000000..9fc47d8 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala @@ -0,0 +1,50 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexMetricSpec extends ScalaTest { + import SoundexMetricSpec.Metric + + "SoundexMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + Metric.compare("123", "123").isDefined should be (false) + Metric.compare("123", "").isDefined should be (false) + Metric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + Metric.compare("robert", "rupert").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + Metric.compare("robert", "rubin").get should be (false) + } + } + } + } + "SoundexMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + SoundexMetric.compare("robert", "rubin").get should be (false) + } + } + } +} + +object SoundexMetricSpec { + final private val Metric = SoundexMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala new file mode 100755 index 0000000..5ddfc06 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala @@ -0,0 +1,75 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class DiceSorensenMetricSpec extends ScalaTest { + import DiceSorensenMetricSpec.Metric + + "DiceSorensenMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "")(1).isDefined should be (false) + Metric.compare("abc", "")(1).isDefined should be (false) + Metric.compare("", "xyz")(1).isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("abc", "abc")(1).get should be (1) + Metric.compare("abc", "abc")(2).get should be (1) + Metric.compare("abc", "abc")(3).get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz")(1).get should be (0) + Metric.compare("abc", "xyz")(2).get should be (0) + Metric.compare("abc", "xyz")(3).get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + Metric.compare("n", "naght")(2).isDefined should be (false) + Metric.compare("night", "n")(2).isDefined should be (false) + Metric.compare("ni", "naght")(3).isDefined should be (false) + Metric.compare("night", "na")(3).isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("night", "nacht")(1).get should be (0.6) + Metric.compare("night", "naght")(1).get should be (0.8) + Metric.compare("context", "contact")(1).get should be (0.7142857142857143) + + Metric.compare("night", "nacht")(2).get should be (0.25) + Metric.compare("night", "naght")(2).get should be (0.5) + Metric.compare("context", "contact")(2).get should be (0.5) + Metric.compare("contextcontext", "contact")(2).get should be (0.3157894736842105) + Metric.compare("context", "contactcontact")(2).get should be (0.3157894736842105) + Metric.compare("ht", "nacht")(2).get should be (0.4) + Metric.compare("xp", "nacht")(2).get should be (0) + Metric.compare("ht", "hththt")(2).get should be (0.3333333333333333) + + Metric.compare("night", "nacht")(3).get should be (0) + Metric.compare("night", "naght")(3).get should be (0.3333333333333333) + Metric.compare("context", "contact")(3).get should be (0.4) + } + } + } + } + "DiceSorensenMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + DiceSorensenMetric.compare("context", "contact")(3).get should be (0.4) + } + } + } +} + +object DiceSorensenMetricSpec { + private final val Metric = DiceSorensenMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala new file mode 100755 index 0000000..c69d860 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala @@ -0,0 +1,52 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class HammingMetricSpec extends ScalaTest { + import HammingMetricSpec.Metric + + "HammingMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + Metric.compare("abc", "abc").get should be (0) + Metric.compare("123", "123").get should be (0) + } + } + "unequal arguments" should returns { + "Int indicating distance" in { + Metric.compare("abc", "xyz").get should be (3) + Metric.compare("123", "456").get should be (3) + } + } + "valid arguments" should returns { + "Int indicating distance" in { + Metric.compare("toned", "roses").get should be (3) + Metric.compare("1011101", "1001001").get should be (2) + Metric.compare("2173896", "2233796").get should be (3) + } + } + } + } + "HammingMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + HammingMetric.compare("2173896", "2233796").get should be (3) + } + } + } +} + +object HammingMetricSpec { + private final val Metric = HammingMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala new file mode 100755 index 0000000..17bc3ef --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala @@ -0,0 +1,77 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaccardMetricSpec extends ScalaTest { + import JaccardMetricSpec.Metric + + "JaccardMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "")(1).isDefined should be (false) + Metric.compare("abc", "")(1).isDefined should be (false) + Metric.compare("", "xyz")(1).isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("abc", "abc")(1).get should be (1) + Metric.compare("abc", "abc")(2).get should be (1) + Metric.compare("abc", "abc")(3).get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz")(1).get should be (0) + Metric.compare("abc", "xyz")(2).get should be (0) + Metric.compare("abc", "xyz")(3).get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + Metric.compare("n", "naght")(2).isDefined should be (false) + Metric.compare("night", "n")(2).isDefined should be (false) + Metric.compare("ni", "naght")(3).isDefined should be (false) + Metric.compare("night", "na")(3).isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("night", "nacht")(1).get should be (0.42857142857142855) + Metric.compare("night", "naght")(1).get should be (0.6666666666666666) + Metric.compare("context", "contact")(1).get should be (0.5555555555555556) + + Metric.compare("night", "nacht")(2).get should be (0.14285714285714285) + Metric.compare("night", "naght")(2).get should be (0.3333333333333333) + Metric.compare("context", "contact")(2).get should be (0.3333333333333333) + Metric.compare("contextcontext", "contact")(2).get should be (0.1875) + Metric.compare("context", "contactcontact")(2).get should be (0.1875) + Metric.compare("ht", "nacht")(2).get should be (0.25) + Metric.compare("xp", "nacht")(2).get should be (0) + Metric.compare("ht", "hththt")(2).get should be (0.2) + + Metric.compare("night", "nacht")(3).get should be (0) + Metric.compare("night", "naght")(3).get should be (0.2) + Metric.compare("context", "contact")(3).get should be (0.25) + } + } + } + } + "JaccardMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + JaccardMetric.compare("context", "contact")(3).get should be (0.25) + } + } + } +} + +object JaccardMetricSpec { + private final val Metric = JaccardMetric() +} + + diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala new file mode 100755 index 0000000..00f4daf --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala @@ -0,0 +1,66 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaroMetricSpec extends ScalaTest { + import JaroMetricSpec.Metric + + "JaroMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("a", "a").get should be (1) + Metric.compare("abc", "abc").get should be (1) + Metric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz").get should be (0) + Metric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("aa", "a").get should be (0.8333333333333334) + Metric.compare("a", "aa").get should be (0.8333333333333334) + Metric.compare("veryveryverylong", "v").get should be (0.6875) + Metric.compare("v", "veryveryverylong").get should be (0.6875) + Metric.compare("martha", "marhta").get should be (0.9444444444444445) + Metric.compare("dwayne", "duane").get should be (0.8222222222222223) + Metric.compare("dixon", "dicksonx").get should be (0.7666666666666666) + Metric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) + Metric.compare("jones", "johnson").get should be (0.7904761904761904) + Metric.compare("henka", "henkan").get should be (0.9444444444444445) + Metric.compare("fvie", "ten").get should be (0) + + Metric.compare("zac ephron", "zac efron").get should be > + Metric.compare("zac ephron", "kai ephron").get + Metric.compare("brittney spears", "britney spears").get should be > + Metric.compare("brittney spears", "brittney startzman").get + } + } + } + } + "JaroMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + JaroMetric.compare("fvie", "ten").get should be (0) + } + } + } +} + +object JaroMetricSpec { + private final val Metric = JaroMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala new file mode 100755 index 0000000..06421a8 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala @@ -0,0 +1,66 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaroWinklerMetricSpec extends ScalaTest { + import JaroWinklerMetricSpec.Metric + + "JaroWinklerMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("a", "a").get should be (1) + Metric.compare("abc", "abc").get should be (1) + Metric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz").get should be (0) + Metric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("aa", "a").get should be (0.8500000000000001) + Metric.compare("a", "aa").get should be (0.8500000000000001) + Metric.compare("veryveryverylong", "v").get should be (0.71875) + Metric.compare("v", "veryveryverylong").get should be (0.71875) + Metric.compare("martha", "marhta").get should be (0.9611111111111111) + Metric.compare("dwayne", "duane").get should be (0.8400000000000001) + Metric.compare("dixon", "dicksonx").get should be (0.8133333333333332) + Metric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) + Metric.compare("jones", "johnson").get should be (0.8323809523809523) + Metric.compare("henka", "henkan").get should be (0.9666666666666667) + Metric.compare("fvie", "ten").get should be (0) + + Metric.compare("zac ephron", "zac efron").get should be > + Metric.compare("zac ephron", "kai ephron").get + Metric.compare("brittney spears", "britney spears").get should be > + Metric.compare("brittney spears", "brittney startzman").get + } + } + } + } + "JaroWinklerMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + JaroWinklerMetric.compare("fvie", "ten").get should be (0) + } + } + } +} + +object JaroWinklerMetricSpec { + private final val Metric = JaroWinklerMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala new file mode 100755 index 0000000..51de2ca --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala @@ -0,0 +1,65 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class LevenshteinMetricSpec extends ScalaTest { + import LevenshteinMetricSpec.Metric + + "LevenshteinMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + Metric.compare("abc", "abc").get should be (0) + Metric.compare("123", "123").get should be (0) + } + } + "unequal arguments" should returns { + "Int indicating distance" in { + Metric.compare("abc", "xyz").get should be (3) + Metric.compare("123", "456").get should be (3) + } + } + "valid arguments" should returns { + "Int indicating distance" in { + Metric.compare("abc", "a").get should be (2) + Metric.compare("a", "abc").get should be (2) + Metric.compare("abc", "c").get should be (2) + Metric.compare("c", "abc").get should be (2) + Metric.compare("sitting", "kitten").get should be (3) + Metric.compare("kitten", "sitting").get should be (3) + Metric.compare("cake", "drake").get should be (2) + Metric.compare("drake", "cake").get should be (2) + Metric.compare("saturday", "sunday").get should be (3) + Metric.compare("sunday", "saturday").get should be (3) + Metric.compare("book", "back").get should be (2) + Metric.compare("dog", "fog").get should be (1) + Metric.compare("foq", "fog").get should be (1) + Metric.compare("fvg", "fog").get should be (1) + Metric.compare("encyclopedia", "encyclopediaz").get should be (1) + Metric.compare("encyclopediz", "encyclopediaz").get should be (1) + } + } + } + } + "LevenshteinMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + LevenshteinMetric.compare("fvg", "fog").get should be (1) + } + } + } +} + +object LevenshteinMetricSpec { + private final val Metric = LevenshteinMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala new file mode 100755 index 0000000..39d97e5 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala @@ -0,0 +1,75 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NGramMetricSpec extends ScalaTest { + import NGramMetricSpec.Metric + + "NGramMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "")(1).isDefined should be (false) + Metric.compare("abc", "")(1).isDefined should be (false) + Metric.compare("", "xyz")(1).isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("abc", "abc")(1).get should be (1) + Metric.compare("abc", "abc")(2).get should be (1) + Metric.compare("abc", "abc")(3).get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz")(1).get should be (0) + Metric.compare("abc", "xyz")(2).get should be (0) + Metric.compare("abc", "xyz")(3).get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + Metric.compare("n", "naght")(2).isDefined should be (false) + Metric.compare("night", "n")(2).isDefined should be (false) + Metric.compare("ni", "naght")(3).isDefined should be (false) + Metric.compare("night", "na")(3).isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("night", "nacht")(1).get should be (0.6) + Metric.compare("night", "naght")(1).get should be (0.8) + Metric.compare("context", "contact")(1).get should be (0.7142857142857143) + + Metric.compare("night", "nacht")(2).get should be (0.25) + Metric.compare("night", "naght")(2).get should be (0.5) + Metric.compare("context", "contact")(2).get should be (0.5) + Metric.compare("contextcontext", "contact")(2).get should be (0.23076923076923078) + Metric.compare("context", "contactcontact")(2).get should be (0.23076923076923078) + Metric.compare("ht", "nacht")(2).get should be (0.25) + Metric.compare("xp", "nacht")(2).get should be (0) + Metric.compare("ht", "hththt")(2).get should be (0.2) + + Metric.compare("night", "nacht")(3).get should be (0) + Metric.compare("night", "naght")(3).get should be (0.3333333333333333) + Metric.compare("context", "contact")(3).get should be (0.4) + } + } + } + } + "NGramMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + NGramMetric.compare("context", "contact")(3).get should be (0.4) + } + } + } +} + +object NGramMetricSpec { + private final val Metric = NGramMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala new file mode 100755 index 0000000..32c9650 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala @@ -0,0 +1,77 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class OverlapMetricSpec extends ScalaTest { + import OverlapMetricSpec.Metric + + "OverlapMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "")(1).isDefined should be (false) + Metric.compare("abc", "")(1).isDefined should be (false) + Metric.compare("", "xyz")(1).isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + Metric.compare("abc", "abc")(1).get should be (1) + Metric.compare("abc", "abc")(2).get should be (1) + Metric.compare("abc", "abc")(3).get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + Metric.compare("abc", "xyz")(1).get should be (0) + Metric.compare("abc", "xyz")(2).get should be (0) + Metric.compare("abc", "xyz")(3).get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + Metric.compare("n", "naght")(2).isDefined should be (false) + Metric.compare("night", "n")(2).isDefined should be (false) + Metric.compare("ni", "naght")(3).isDefined should be (false) + Metric.compare("night", "na")(3).isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("bob", "bobman") (1).get should be (1) + Metric.compare("bob", "manbobman") (1).get should be (1) + Metric.compare("night", "nacht")(1).get should be (0.6) + Metric.compare("night", "naght")(1).get should be (0.8) + Metric.compare("context", "contact")(1).get should be (0.7142857142857143) + + Metric.compare("night", "nacht")(2).get should be (0.25) + Metric.compare("night", "naght")(2).get should be (0.5) + Metric.compare("context", "contact")(2).get should be (0.5) + Metric.compare("contextcontext", "contact")(2).get should be (0.5) + Metric.compare("context", "contactcontact")(2).get should be (0.5) + Metric.compare("ht", "nacht")(2).get should be (1) + Metric.compare("xp", "nacht")(2).get should be (0) + Metric.compare("ht", "hththt")(2).get should be (1) + + Metric.compare("night", "nacht")(3).get should be (0) + Metric.compare("night", "naght")(3).get should be (0.3333333333333333) + Metric.compare("context", "contact")(3).get should be (0.4) + } + } + } + } + "OverlapMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + OverlapMetric.compare("context", "contact")(3).get should be (0.4) + } + } + } +} + +object OverlapMetricSpec { + private final val Metric = OverlapMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala new file mode 100755 index 0000000..638536f --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala @@ -0,0 +1,56 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RatcliffObershelpMetricSpec extends ScalaTest { + import RatcliffObershelpMetricSpec.Metric + + "RatcliffObershelpMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "").isDefined should be (false) + Metric.compare("abc", "").isDefined should be (false) + Metric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + Metric.compare("abc", "abc").get should be (1) + Metric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "Double indicating distance" in { + Metric.compare("abc", "xyz").get should be (0) + Metric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("aleksander", "alexandre").get should be (0.7368421052631579) + Metric.compare("alexandre", "aleksander").get should be (0.7368421052631579) + Metric.compare("pennsylvania", "pencilvaneya").get should be (0.6666666666666666) + Metric.compare("pencilvaneya", "pennsylvania").get should be (0.6666666666666666) + Metric.compare("abcefglmn", "abefglmo").get should be (0.8235294117647058) + Metric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058) + } + } + } + } + "RatcliffObershelpMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + RatcliffObershelpMetric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058) + } + } + } +} + +object RatcliffObershelpMetricSpec { + private final val Metric = RatcliffObershelpMetric() +} + diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala new file mode 100755 index 0000000..9c46c89 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala @@ -0,0 +1,64 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class WeightedLevenshteinMetricSpec extends ScalaTest { + import WeightedLevenshteinMetricSpec.{Metric, Options} + + "WeightedLevenshteinMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + Metric.compare("", "")(Options).isDefined should be (false) + Metric.compare("abc", "")(Options).isDefined should be (false) + Metric.compare("", "xyz")(Options).isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + Metric.compare("abc", "abc")(Options).get should be (0) + Metric.compare("123", "123")(Options).get should be (0) + } + } + "unequal arguments" should returns { + "Double indicating distance" in { + Metric.compare("abc", "xyz")(Options).get should be (3) + Metric.compare("123", "456")(Options).get should be (3) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + Metric.compare("az", "z")(Options).get should be (10) + Metric.compare("z", "az")(Options).get should be (0.1) + Metric.compare("a", "z")(Options).get should be (1) + Metric.compare("z", "a")(Options).get should be (1) + Metric.compare("ab", "yz")(Options).get should be (2) + Metric.compare("yz", "ab")(Options).get should be (2) + Metric.compare("0", "0123456789")(Options).get should be (0.9) + Metric.compare("0123456789", "0")(Options).get should be (90) + Metric.compare("book", "back")(Options).get should be (2) + Metric.compare("back", "book")(Options).get should be (2) + Metric.compare("hosp", "hospital")(Options).get should be (0.4) + Metric.compare("hospital", "hosp")(Options).get should be (40) + Metric.compare("clmbs blvd", "columbus boulevard")(Options).get should be (0.8) + Metric.compare("columbus boulevard", "clmbs blvd")(Options).get should be (80) + } + } + } + } + "WeightedLevenshteinMetric companion object" should provide { + "pass-through compare method" should returns { + "same value as class" in { + WeightedLevenshteinMetric.compare("hospital", "hosp")(Options).get should be (40) + } + } + } +} + +object WeightedLevenshteinMetricSpec { + private final val Options = Tuple3[BigDecimal, BigDecimal, BigDecimal](10, 0.1, 1) + private final val Metric = WeightedLevenshteinMetric() +} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala new file mode 100755 index 0000000..56fdc13 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala @@ -0,0 +1,69 @@ +package com.rockymadden.stringmetric.tokenization + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NGramTokenizerSpec extends ScalaTest { + import NGramTokenizerSpec.Tokenizer + + "NGramTokenizer" should provide { + "tokenize method" when passed { + "empty argument" should returns { + "None" in { + Tokenizer.tokenize("")(1).isDefined should be (false) + } + } + "invalid n argument" should throws { + "IllegalArgumentException" in { + evaluating { + Tokenizer.tokenize("")(0).isDefined should be (false) + } should produce [IllegalArgumentException] + + evaluating { + Tokenizer.tokenize("")(-1).isDefined should be (false) + } should produce [IllegalArgumentException] + } + } + "valid argument" should returns { + "Array[String]" in { + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal ( + Array( + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" + ) + ) + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(2).get should equal ( + Array( + "ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op", + "pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz" + ) + ) + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(3).get should equal ( + Array( + "abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno", + "nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz" + ) + ) + } + } + } + } + "NGramTokenizer companion object" should provide { + "pass-through tokenize method" should returns { + "same value as class" in { + NGramTokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal ( + Array( + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" + ) + ) + } + } + } +} + +object NGramTokenizerSpec { + private final val Tokenizer = NGramTokenizer() +} -- cgit v1.2.3