From 49de854bb464f1be37fbb27f942b9b65e52df751 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Thu, 2 Jan 2014 13:47:43 -0700 Subject: Moved from gradle to sbt. --- .gitignore | 2 - .jvmopts | 7 + .travis.yml | 2 - build.gradle | 9 - cli/build.gradle | 105 ---------- .../com/rockymadden/stringmetric/cli/package.scala | 105 ---------- .../cli/phonetic/metaphonealgorithm.scala | 16 -- .../cli/phonetic/metaphonemetric.scala | 21 -- .../cli/phonetic/nysiisalgorithm.scala | 16 -- .../stringmetric/cli/phonetic/nysiismetric.scala | 21 -- .../cli/phonetic/refinednysiisalgorithm.scala | 16 -- .../cli/phonetic/refinednysiismetric.scala | 21 -- .../cli/phonetic/refinedsoundexalgorithm.scala | 16 -- .../cli/phonetic/refinedsoundexmetric.scala | 21 -- .../cli/phonetic/soundexalgorithm.scala | 16 -- .../stringmetric/cli/phonetic/soundexmetric.scala | 21 -- .../cli/similarity/dicesorensenmetric.scala | 26 --- .../cli/similarity/hammingmetric.scala | 22 --- .../cli/similarity/jaccardmetric.scala | 26 --- .../stringmetric/cli/similarity/jarometric.scala | 22 --- .../cli/similarity/jarowinklermetric.scala | 22 --- .../cli/similarity/levenshteinmetric.scala | 23 --- .../stringmetric/cli/similarity/ngrammetric.scala | 26 --- .../cli/similarity/overlapmetric.scala | 26 --- .../cli/similarity/ratcliffobershelpmetric.scala | 22 --- .../cli/similarity/weightedlevenshteinmetric.scala | 33 ---- .../stringmetric/cli/OptionMapSpec.scala | 82 -------- .../cli/phonetic/metaphonealgorithmSpec.scala | 37 ---- .../cli/phonetic/metaphonemetricSpec.scala | 44 ----- .../cli/phonetic/nysiisalgorithmSpec.scala | 37 ---- .../cli/phonetic/nysiismetricSpec.scala | 44 ----- .../cli/phonetic/refinednysiisalgorithmSpec.scala | 37 ---- .../cli/phonetic/refinednysiismetricSpec.scala | 44 ----- .../cli/phonetic/refinedsoundexalgorithmSpec.scala | 37 ---- .../cli/phonetic/refinedsoundexmetricSpec.scala | 44 ----- .../cli/phonetic/soundexalgorithmSpec.scala | 37 ---- .../cli/phonetic/soundexmetricSpec.scala | 44 ----- .../cli/similarity/dicesorensenmetricSpec.scala | 37 ---- .../cli/similarity/hammingmetricSpec.scala | 37 ---- .../cli/similarity/jaccardmetricSpec.scala | 37 ---- .../cli/similarity/jarometricSpec.scala | 37 ---- .../cli/similarity/jarowinklermetricSpec.scala | 37 ---- .../cli/similarity/levenshteinmetricSpec.scala | 37 ---- .../cli/similarity/ngrammetricSpec.scala | 66 ------- .../cli/similarity/overlapmetricSpec.scala | 37 ---- .../similarity/ratcliffobershelpmetricSpec.scala | 37 ---- .../similarity/weightedlevenshteinmetricSpec.scala | 121 ------------ .../com/rockymadden/stringmetric/cli/package.scala | 105 ++++++++++ .../cli/phonetic/metaphonealgorithm.scala | 16 ++ .../cli/phonetic/metaphonemetric.scala | 21 ++ .../cli/phonetic/nysiisalgorithm.scala | 16 ++ .../stringmetric/cli/phonetic/nysiismetric.scala | 21 ++ .../cli/phonetic/refinednysiisalgorithm.scala | 16 ++ .../cli/phonetic/refinednysiismetric.scala | 21 ++ .../cli/phonetic/refinedsoundexalgorithm.scala | 16 ++ .../cli/phonetic/refinedsoundexmetric.scala | 21 ++ .../cli/phonetic/soundexalgorithm.scala | 16 ++ .../stringmetric/cli/phonetic/soundexmetric.scala | 21 ++ .../cli/similarity/dicesorensenmetric.scala | 26 +++ .../cli/similarity/hammingmetric.scala | 22 +++ .../cli/similarity/jaccardmetric.scala | 26 +++ .../stringmetric/cli/similarity/jarometric.scala | 22 +++ .../cli/similarity/jarowinklermetric.scala | 22 +++ .../cli/similarity/levenshteinmetric.scala | 23 +++ .../stringmetric/cli/similarity/ngrammetric.scala | 26 +++ .../cli/similarity/overlapmetric.scala | 26 +++ .../cli/similarity/ratcliffobershelpmetric.scala | 22 +++ .../cli/similarity/weightedlevenshteinmetric.scala | 33 ++++ .../com/rockymadden/stringmetric/cli/CliSpec.scala | 81 ++++++++ .../rockymadden/stringmetric/cli/ScalaTest.scala | 18 ++ .../cli/phonetic/metaphonealgorithmSpec.scala | 37 ++++ .../cli/phonetic/metaphonemetricSpec.scala | 44 +++++ .../cli/phonetic/nysiisalgorithmSpec.scala | 37 ++++ .../cli/phonetic/nysiismetricSpec.scala | 44 +++++ .../cli/phonetic/refinednysiisalgorithmSpec.scala | 37 ++++ .../cli/phonetic/refinednysiismetricSpec.scala | 44 +++++ .../cli/phonetic/refinedsoundexalgorithmSpec.scala | 37 ++++ .../cli/phonetic/refinedsoundexmetricSpec.scala | 44 +++++ .../cli/phonetic/soundexalgorithmSpec.scala | 37 ++++ .../cli/phonetic/soundexmetricSpec.scala | 44 +++++ .../cli/similarity/dicesorensenmetricSpec.scala | 37 ++++ .../cli/similarity/hammingmetricSpec.scala | 37 ++++ .../cli/similarity/jaccardmetricSpec.scala | 37 ++++ .../cli/similarity/jarometricSpec.scala | 37 ++++ .../cli/similarity/jarowinklermetricSpec.scala | 37 ++++ .../cli/similarity/levenshteinmetricSpec.scala | 37 ++++ .../cli/similarity/ngrammetricSpec.scala | 66 +++++++ .../cli/similarity/overlapmetricSpec.scala | 37 ++++ .../similarity/ratcliffobershelpmetricSpec.scala | 37 ++++ .../similarity/weightedlevenshteinmetricSpec.scala | 121 ++++++++++++ core/build.gradle | 46 ----- .../stringmetric/CaliperBenchmark.scala | 7 - .../rockymadden/stringmetric/CaliperRunner.scala | 7 - .../phonetic/MetaphoneAlgorithmBenchmark.scala | 26 --- .../phonetic/MetaphoneMetricBenchmark.scala | 49 ----- .../phonetic/NysiisAlgorithmBenchmark.scala | 26 --- .../phonetic/NysiisMetricBenchmark.scala | 49 ----- .../phonetic/RefinedNysiisAlgorithmBenchmark.scala | 26 --- .../phonetic/RefinedNysiisMetricBenchmark.scala | 49 ----- .../RefinedSoundexAlgorithmBenchmark.scala | 26 --- .../phonetic/RefinedSoundexMetricBenchmark.scala | 49 ----- .../phonetic/SoundexAlgorithmBenchmark.scala | 26 --- .../phonetic/SoundexMetricBenchmark.scala | 49 ----- .../similarity/DiceSorensenMetricBenchmark.scala | 48 ----- .../similarity/HammingMetricBenchmark.scala | 48 ----- .../similarity/JaccardMetricBenchmark.scala | 48 ----- .../similarity/JaroMetricBenchmark.scala | 48 ----- .../similarity/JaroWinklerMetricBenchmark.scala | 48 ----- .../similarity/LevenshteinMetricBenchmark.scala | 48 ----- .../similarity/NGramMetricBenchmark.scala | 51 ----- .../similarity/OverlapMetricBenchmark.scala | 48 ----- .../RatcliffObershelpMetricBenchmark.scala | 48 ----- .../WeightedLevenshteinMetricBenchmark.scala | 48 ----- .../tokenize/NGramTokenizerBenchmark.scala | 29 --- .../com/rockymadden/stringmetric/Algorithm.scala | 46 ----- .../com/rockymadden/stringmetric/Alphabet.scala | 42 ---- .../com/rockymadden/stringmetric/Metric.scala | 77 -------- .../com/rockymadden/stringmetric/Tokenize.scala | 33 ---- .../com/rockymadden/stringmetric/Transform.scala | 71 ------- .../com/rockymadden/stringmetric/package.scala | 17 -- .../stringmetric/phonetic/MetaphoneAlgorithm.scala | 105 ---------- .../stringmetric/phonetic/MetaphoneMetric.scala | 15 -- .../stringmetric/phonetic/NysiisAlgorithm.scala | 115 ----------- .../stringmetric/phonetic/NysiisMetric.scala | 24 --- .../phonetic/RefinedNysiisAlgorithm.scala | 121 ------------ .../phonetic/RefinedNysiisMetric.scala | 24 --- .../phonetic/RefinedSoundexAlgorithm.scala | 59 ------ .../phonetic/RefinedSoundexMetric.scala | 16 -- .../stringmetric/phonetic/SoundexAlgorithm.scala | 57 ------ .../stringmetric/phonetic/SoundexMetric.scala | 16 -- .../similarity/DiceSorensenMetric.scala | 27 --- .../stringmetric/similarity/HammingMetric.scala | 18 -- .../stringmetric/similarity/JaccardMetric.scala | 20 -- .../stringmetric/similarity/JaroMetric.scala | 66 ------- .../similarity/JaroWinklerMetric.scala | 23 --- .../similarity/LevenshteinMetric.scala | 40 ---- .../stringmetric/similarity/NGramMetric.scala | 24 --- .../stringmetric/similarity/OverlapMetric.scala | 24 --- .../similarity/RatcliffObershelpMetric.scala | 43 ----- .../similarity/WeightedLevenshteinMetric.scala | 36 ---- .../rockymadden/stringmetric/AlgorithmSpec.scala | 34 ---- .../rockymadden/stringmetric/AlphabetSpec.scala | 95 ---------- .../com/rockymadden/stringmetric/MetricSpec.scala | 56 ------ .../com/rockymadden/stringmetric/ScalaTest.scala | 18 -- .../rockymadden/stringmetric/TokenizeSpec.scala | 45 ----- .../rockymadden/stringmetric/TransformSpec.scala | 181 ------------------ .../phonetic/MetaphoneAlgorithmSpec.scala | 211 --------------------- .../phonetic/MetaphoneMetricSpec.scala | 39 ---- .../phonetic/NysiisAlgorithmSpec.scala | 189 ------------------ .../stringmetric/phonetic/NysiisMetricSpec.scala | 37 ---- .../phonetic/RefinedNysiisAlgorithmSpec.scala | 206 -------------------- .../phonetic/RefinedNysiisMetricSpec.scala | 35 ---- .../phonetic/RefinedSoundexAlgorithmSpec.scala | 160 ---------------- .../phonetic/RefinedSoundexMetricSpec.scala | 35 ---- .../phonetic/SoundexAlgorithmSpec.scala | 159 ---------------- .../stringmetric/phonetic/SoundexMetricSpec.scala | 35 ---- .../similarity/DiceSorensenMetricSpec.scala | 60 ------ .../similarity/HammingMetricSpec.scala | 37 ---- .../similarity/JaccardMetricSpec.scala | 60 ------ .../stringmetric/similarity/JaroMetricSpec.scala | 51 ----- .../similarity/JaroWinklerMetricSpec.scala | 51 ----- .../similarity/LevenshteinMetricSpec.scala | 50 ----- .../stringmetric/similarity/NGramMetricSpec.scala | 60 ------ .../similarity/OverlapMetricSpec.scala | 62 ------ .../similarity/RatcliffObershelpMetricSpec.scala | 40 ---- .../similarity/WeightedLevenshteinMetricSpec.scala | 48 ----- .../stringmetric/CaliperBenchmark.scala | 7 + .../rockymadden/stringmetric/CaliperRunner.scala | 7 + .../phonetic/MetaphoneAlgorithmBenchmark.scala | 26 +++ .../phonetic/MetaphoneMetricBenchmark.scala | 49 +++++ .../phonetic/NysiisAlgorithmBenchmark.scala | 26 +++ .../phonetic/NysiisMetricBenchmark.scala | 49 +++++ .../phonetic/RefinedNysiisAlgorithmBenchmark.scala | 26 +++ .../phonetic/RefinedNysiisMetricBenchmark.scala | 49 +++++ .../RefinedSoundexAlgorithmBenchmark.scala | 26 +++ .../phonetic/RefinedSoundexMetricBenchmark.scala | 49 +++++ .../phonetic/SoundexAlgorithmBenchmark.scala | 26 +++ .../phonetic/SoundexMetricBenchmark.scala | 49 +++++ .../similarity/DiceSorensenMetricBenchmark.scala | 48 +++++ .../similarity/HammingMetricBenchmark.scala | 48 +++++ .../similarity/JaccardMetricBenchmark.scala | 48 +++++ .../similarity/JaroMetricBenchmark.scala | 48 +++++ .../similarity/JaroWinklerMetricBenchmark.scala | 48 +++++ .../similarity/LevenshteinMetricBenchmark.scala | 48 +++++ .../similarity/NGramMetricBenchmark.scala | 51 +++++ .../similarity/OverlapMetricBenchmark.scala | 48 +++++ .../RatcliffObershelpMetricBenchmark.scala | 48 +++++ .../WeightedLevenshteinMetricBenchmark.scala | 48 +++++ .../tokenize/NGramTokenizerBenchmark.scala | 29 +++ .../com/rockymadden/stringmetric/Algorithm.scala | 46 +++++ .../com/rockymadden/stringmetric/Alphabet.scala | 42 ++++ .../com/rockymadden/stringmetric/Metric.scala | 77 ++++++++ .../com/rockymadden/stringmetric/Tokenize.scala | 33 ++++ .../com/rockymadden/stringmetric/Transform.scala | 71 +++++++ .../com/rockymadden/stringmetric/package.scala | 17 ++ .../stringmetric/phonetic/MetaphoneAlgorithm.scala | 105 ++++++++++ .../stringmetric/phonetic/MetaphoneMetric.scala | 15 ++ .../stringmetric/phonetic/NysiisAlgorithm.scala | 115 +++++++++++ .../stringmetric/phonetic/NysiisMetric.scala | 24 +++ .../phonetic/RefinedNysiisAlgorithm.scala | 121 ++++++++++++ .../phonetic/RefinedNysiisMetric.scala | 24 +++ .../phonetic/RefinedSoundexAlgorithm.scala | 59 ++++++ .../phonetic/RefinedSoundexMetric.scala | 16 ++ .../stringmetric/phonetic/SoundexAlgorithm.scala | 57 ++++++ .../stringmetric/phonetic/SoundexMetric.scala | 16 ++ .../similarity/DiceSorensenMetric.scala | 27 +++ .../stringmetric/similarity/HammingMetric.scala | 18 ++ .../stringmetric/similarity/JaccardMetric.scala | 20 ++ .../stringmetric/similarity/JaroMetric.scala | 66 +++++++ .../similarity/JaroWinklerMetric.scala | 23 +++ .../similarity/LevenshteinMetric.scala | 40 ++++ .../stringmetric/similarity/NGramMetric.scala | 24 +++ .../stringmetric/similarity/OverlapMetric.scala | 24 +++ .../similarity/RatcliffObershelpMetric.scala | 43 +++++ .../similarity/WeightedLevenshteinMetric.scala | 36 ++++ .../rockymadden/stringmetric/AlgorithmSpec.scala | 34 ++++ .../rockymadden/stringmetric/AlphabetSpec.scala | 95 ++++++++++ .../com/rockymadden/stringmetric/MetricSpec.scala | 56 ++++++ .../com/rockymadden/stringmetric/ScalaTest.scala | 18 ++ .../rockymadden/stringmetric/TokenizeSpec.scala | 45 +++++ .../rockymadden/stringmetric/TransformSpec.scala | 181 ++++++++++++++++++ .../phonetic/MetaphoneAlgorithmSpec.scala | 211 +++++++++++++++++++++ .../phonetic/MetaphoneMetricSpec.scala | 39 ++++ .../phonetic/NysiisAlgorithmSpec.scala | 189 ++++++++++++++++++ .../stringmetric/phonetic/NysiisMetricSpec.scala | 37 ++++ .../phonetic/RefinedNysiisAlgorithmSpec.scala | 206 ++++++++++++++++++++ .../phonetic/RefinedNysiisMetricSpec.scala | 35 ++++ .../phonetic/RefinedSoundexAlgorithmSpec.scala | 160 ++++++++++++++++ .../phonetic/RefinedSoundexMetricSpec.scala | 35 ++++ .../phonetic/SoundexAlgorithmSpec.scala | 159 ++++++++++++++++ .../stringmetric/phonetic/SoundexMetricSpec.scala | 35 ++++ .../similarity/DiceSorensenMetricSpec.scala | 60 ++++++ .../similarity/HammingMetricSpec.scala | 37 ++++ .../similarity/JaccardMetricSpec.scala | 60 ++++++ .../stringmetric/similarity/JaroMetricSpec.scala | 51 +++++ .../similarity/JaroWinklerMetricSpec.scala | 51 +++++ .../similarity/LevenshteinMetricSpec.scala | 50 +++++ .../stringmetric/similarity/NGramMetricSpec.scala | 60 ++++++ .../similarity/OverlapMetricSpec.scala | 62 ++++++ .../similarity/RatcliffObershelpMetricSpec.scala | 40 ++++ .../similarity/WeightedLevenshteinMetricSpec.scala | 48 +++++ deploy.gradle | 68 ------- gradle.properties | 2 - project/build.properties | 1 + project/build.scala | 41 ++++ settings.gradle | 6 - 246 files changed, 5688 insertions(+), 5862 deletions(-) create mode 100644 .jvmopts delete mode 100755 build.gradle delete mode 100755 cli/build.gradle delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/package.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala delete mode 100755 cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/OptionMapSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala delete mode 100755 cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala create mode 100755 cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/CliSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/ScalaTest.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala create mode 100755 cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala delete mode 100755 core/build.gradle delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala delete mode 100755 core/source/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Metric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala delete mode 100644 core/source/main/scala/com/rockymadden/stringmetric/Transform.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/package.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala delete mode 100755 core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala delete mode 100644 core/source/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala delete mode 100644 core/source/test/scala/com/rockymadden/stringmetric/MetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala delete mode 100644 core/source/test/scala/com/rockymadden/stringmetric/TransformSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala delete mode 100755 core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala create mode 100755 core/src/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/Metric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala create mode 100644 core/src/main/scala/com/rockymadden/stringmetric/Transform.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/package.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala create mode 100755 core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala create mode 100644 core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala create mode 100644 core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/ScalaTest.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala create mode 100644 core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala create mode 100755 core/src/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala delete mode 100755 deploy.gradle delete mode 100755 gradle.properties create mode 100644 project/build.properties create mode 100644 project/build.scala delete mode 100755 settings.gradle diff --git a/.gitignore b/.gitignore index 97edf7f..764a069 100755 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,5 @@ *.iws # Build ignores. -.gradle -build out target diff --git a/.jvmopts b/.jvmopts new file mode 100644 index 0000000..b0b6b75 --- /dev/null +++ b/.jvmopts @@ -0,0 +1,7 @@ +-Dfile.encoding=UTF8 +-XX:MaxPermSize=768m +-Xms1024m +-Xmx2048m +-Xss2m +-XX:+CMSClassUnloadingEnabled +-XX:+UseConcMarkSweepGC diff --git a/.travis.yml b/.travis.yml index 1c0f6fe..8b077f2 100755 --- a/.travis.yml +++ b/.travis.yml @@ -4,5 +4,3 @@ scala: jdk: - openjdk7 - oraclejdk7 -script: - - "gradle test" diff --git a/build.gradle b/build.gradle deleted file mode 100755 index 50974bb..0000000 --- a/build.gradle +++ /dev/null @@ -1,9 +0,0 @@ -description = 'String metrics and phonetic algorithms for Scala.' -group = 'com.rockymadden.stringmetric' -version = '0.26.1' - -ext.scm = 'scm:git@github.com:rockymadden/stringmetric.git' -ext.url = 'http://rockymadden.com/stringmetric/' - -allprojects { apply plugin: 'idea' } -subprojects { repositories { mavenCentral() } } diff --git a/cli/build.gradle b/cli/build.gradle deleted file mode 100755 index 1450615..0000000 --- a/cli/build.gradle +++ /dev/null @@ -1,105 +0,0 @@ -buildscript { - dependencies { classpath 'net.saliman:gradle-cobertura-plugin:2.2.2' } - repositories { mavenCentral() } -} - -evaluationDependsOn(':stringmetric-core') - -apply from: '../deploy.gradle' -apply plugin: 'cobertura' -apply plugin: 'scala' - -cobertura { - coverageFormats = ['html', 'xml'] - coverageSourceDirs = sourceSets.main.scala.srcDirs -} - -compileScala { compileScala.scalaCompileOptions.additionalParameters = ['-target:jvm-1.6', '–Xdisable-assertions'] } -compileTestScala { compileTestScala.scalaCompileOptions.additionalParameters = ['-target:jvm-1.6'] } - -dependencies { - compile project(':stringmetric-core') - compile'org.scala-lang:scala-compiler:2.10.2' - compile 'org.scala-lang:scala-library:2.10.2' - - testCompile project(':stringmetric-core').sourceSets.test.output - testCompile 'com.google.caliper:caliper:0.5-rc1' - testCompile 'junit:junit:4.11' - testCompile 'org.scalatest:scalatest_2.10:2.0.M5b' -} - -sourceSets { - main { - output.resourcesDir "${project.buildDir}/classes/main" - - resources { srcDir 'source/main/resource' } - scala { srcDir 'source/main/scala' } - } - test { - output.resourcesDir "${project.buildDir}/classes/test" - - resources { srcDir 'source/test/resource' } - scala { srcDir 'source/test/scala' } - } -} - -task tar(description: 'Assembles a compressed tar archive of source files.', dependsOn: [':stringmetric-cli:jar', ':stringmetric-core:jar']) { - ext.sourcePath = "${project.projectDir}/source/main/scala" - ext.outputPath = "${project.buildDir}" - ext.workingPath = "${project.buildDir}/${project.name}" - - inputs.dir new File(sourcePath) - outputs.dir new File(outputPath, 'generated') - outputs.upToDateWhen { new File(workingPath).isDirectory() } - - doLast { - // Clean up working directory and tar from last execution, should they exist. - ant.delete(dir: workingPath, failOnError: false) - ant.delete(file: "${project.buildDir}/${project.name}.tar.gz", failOnError: false) - - // Create project working directory. - ant.mkdir(dir: workingPath) - - // Create scala.sh header file. - ant.echo(file: "${workingPath}/scala.sh", message: '#!/bin/bash\ndir="`dirname \\"$0\\"`"\ndir="`( cd \\"$dir\\" && pwd )`"\ncp=`echo $dir/*.jar|sed \'s/ /:/g\'`\nexec scala -classpath "$cp" -savecompiled "$0" "$@"\n!#\n//') - - // Copy source files to working directory. - ant.copy(toDir: workingPath, force: true, overwrite: true) { - fileset(dir: sourcePath) { - exclude(name: '**/cli/*.scala') - exclude(name: '**/package.scala') - } - filterchain { - concatfilter(prepend: "${workingPath}/scala.sh") - } - } - - // Delete scala.sh header file. - ant.delete(file: "${workingPath}/scala.sh") - - // Flatten and remove file extension. - ant.move(toDir: workingPath) { - fileset(dir: workingPath) - chainedmapper { - mapper(type: 'flatten') - mapper(from: '*.scala', to: '*', type: 'glob') - } - } - - // Clean up empty folder(s) from flatten. - ant.delete(dir: "${workingPath}/org", includeEmptyDirs: true) - - // Copy project jars into place. - ant.copy(toDir: workingPath, force: true, overwrite: true) { - fileset(dir: "${project.buildDir}/libs") - } - ant.copy(toDir: workingPath, force: true, overwrite: true) { - fileset(dir: "${project(':stringmetric-core').buildDir}/libs") - } - - // Assemble compressed tar. - ant.tar(compression: 'gzip', destFile: "${project.buildDir}/${project.name}.tar.gz") { - tarfileset(dir: workingPath, fileMode: 755, prefix: project.name) - } - } -} diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/package.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/package.scala deleted file mode 100755 index 617f4a5..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/package.scala +++ /dev/null @@ -1,105 +0,0 @@ -package com.rockymadden.stringmetric - -/** - * Provides core CLI functionality. Note that some things might look sloppy (e.g. access modifiers, broad imports, - * repetitive imports, etc), but are required because of the way scalascript is ultimately compiled. - */ -package object cli { - import scala.language.implicitConversions - - - implicit def optionStringToArray(os: OptionString): Array[String] = - if (os.get.length == 0) Array.empty[String] - else os.get.split(" ") - implicit def optionStringToBigDecimal(os: OptionString): BigDecimal = BigDecimal(os.get) - implicit def optionStringToBigInt(os: OptionString): BigInt = BigInt(os.get) - implicit def optionStringToDouble(os: OptionString): Double = os.get.toDouble - implicit def optionStringToFloat(os: OptionString): Float = os.get.toFloat - implicit def optionStringToInt(os: OptionString): Int = os.get.toInt - implicit def optionStringToLong(os: OptionString): Long = os.get.toLong - implicit def optionStringToShort(os: OptionString): Short = os.get.toShort - implicit def optionStringToString(os: OptionString): String = os.get - implicit def stringToOptionString(s: String): OptionString = OptionString(s) - implicit def arrayOfStringToOptionMap(stringArray: Array[String]): OptionMap = OptionMap(stringArray) - - - final val Ls = sys.props("line.separator") - final val Tab = " " - - - class OptionString(val get: String) - - object OptionString { - def apply(s: String): OptionString = new OptionString(s) - } - - - type OptionMap = Map[Symbol, OptionString] - - object OptionMap { - def apply(args: Array[String]): OptionMap = apply(args: _*) - - def apply(varargs: String*): OptionMap = { - @annotation.tailrec - def next(om: OptionMap, a: List[String]): OptionMap = { - val double = """^(--[a-zA-Z0-9]+)(=[a-zA-Z0-9\.\-_]+)?""".r - val single = """^(-[a-zA-Z0-9]+)(=[a-zA-Z0-9\.\-_]+)?""".r - val less = """([a-zA-Z0-9/\-_\$\.]+)""".r - - a match { - // Empty, return. - case Nil => om - // Double dash options without value. - case double(k, null) :: t => next(om + (Symbol(k.tail.tail) -> ""), t) - // Double dash options with value. - case double(k, v) :: t => next(om + (Symbol(k.tail.tail) -> v.tail), t) - // Single dash options without value. - case single(k, null) :: t => next(om + (Symbol(k.tail) -> ""), t) - // Single dash options with value. Value is discarded. - case single(k, v) :: t => next(om + (Symbol(k.tail) -> ""), t) - // Dashless options. - case less(v) :: t if v.head != '-' => - if (om.contains('dashless)) - next((om - 'dashless) + ('dashless -> (om('dashless).get + " " + v.trim)), t) - else next(om + ('dashless -> v.trim), t) - // Invalid option, ignore. - case _ :: t => next(om, t) - } - } - - next(Map.empty[Symbol, OptionString], varargs.toList) - } - } - - - abstract class Command( - protected val help: (OptionMap => String), - protected val predicate: (OptionMap => Boolean), - protected val execute: (OptionMap => String) - ) { - final def main(args: Array[String]): Unit = { - val opts: OptionMap = args - - try - if (opts.contains('h) || opts.contains('help)) { - println(help(opts)) - exit(opts) - } else if (predicate(opts)) { - println(execute(opts)) - exit(opts) - } else throw new IllegalArgumentException("Expected valid syntax. See --help.") - catch { case e: Throwable => error(e, opts) } - } - - private def error(error: Throwable, opts: OptionMap): Unit = - if (!isUnitTest(opts)) { - println(error.getMessage) - sys.exit(1) - } else throw error - - private def exit(opts: OptionMap): Unit = if (!isUnitTest(opts)) sys.exit(0) - - private def isUnitTest(opts: OptionMap) = - opts.contains('ut) || (opts.contains('unitTest) && opts.get('unitTest) != "false") - } -} diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala deleted file mode 100755 index 95f14b6..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm - -case object metaphonealgorithm extends Command( - (opts) => - "Returns the phonetic representation of the passed string, per the Metaphone algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "metaphonealgorithm [Options] string..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, - (opts) => MetaphoneAlgorithm.compute(opts('dashless)).getOrElse("not computable") -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala deleted file mode 100755 index 1249723..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.MetaphoneMetric - -case object metaphonemetric extends Command( - (opts) => - "Compares two strings to determine if they are phonetically similarly, per the Metaphone algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "metaphonemetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - MetaphoneMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala deleted file mode 100755 index bef2bc5..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.NysiisAlgorithm - -case object nysiisalgorithm extends Command( - (opts) => - "Returns the phonetic representation of the passed string, per the NYSIIS algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "nysiisalgorithm [Options] string..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, - (opts) => NysiisAlgorithm.compute(opts('dashless)).getOrElse("not computable") -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala deleted file mode 100755 index 7857c21..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.NysiisMetric - -case object nysiismetric extends Command( - (opts) => - "Compares two strings to determine if they are phonetically similarly, per the NYSIIS algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "nysiismetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - NysiisMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala deleted file mode 100755 index 3fc9f25..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm - -case object refinednysiisalgorithm extends Command( - (opts) => - "Returns the phonetic representation of the passed string, per the refined NYSIIS algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "refinednysiisalgorithm [Options] string..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, - (opts) => RefinedNysiisAlgorithm.compute(opts('dashless)).getOrElse("not computable") -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala deleted file mode 100755 index 2566fa8..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric - -case object refinednysiismetric extends Command( - (opts) => - "Compares two strings to determine if they are phonetically similarly, per the refined NYSIIS algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "refinednysiismetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - RefinedNysiisMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala deleted file mode 100755 index 12ac8e0..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm - -case object refinedsoundexalgorithm extends Command( - (opts) => - "Returns the phonetic representation of the passed string, per the refined Soundex algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "refinedsoundexalgorithm [Options] string..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, - (opts) => RefinedSoundexAlgorithm.compute(opts('dashless)).getOrElse("not computable") -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala deleted file mode 100755 index 370bd71..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric - -case object refinedsoundexmetric extends Command( - (opts) => - "Compares two strings to determine if they are phonetically similarly, per the refined Soundex algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "refinedsoundexmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - RefinedSoundexMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala deleted file mode 100755 index 56cf068..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.SoundexAlgorithm - -case object soundexalgorithm extends Command( - (opts) => - "Returns the phonetic representation of the passed string, per the Soundex algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "soundexalgorithm [Options] string..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, - (opts) => SoundexAlgorithm.compute(opts('dashless)).getOrElse("not computable") -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala deleted file mode 100755 index c7ec0cd..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.phonetic.SoundexMetric - -case object soundexmetric extends Command( - (opts) => - "Compares two strings to determine if they are phonetically similarly, per the Soundex algorithm." + Ls + Ls + - "Syntax:" + Ls + - Tab + "soundexmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - SoundexMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala deleted file mode 100755 index bb7ac0b..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.DiceSorensenMetric - -case object dicesorensenmetric extends Command( - (opts) => - "Compares the similarity of two strings using the Dice / Sorensen coefficient." + Ls + Ls + - "Syntax:" + Ls + - Tab + "dicesorensenmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts." + - Tab + "--n" + Ls + - Tab + Tab + "The n-gram size, traditionally 2.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && - opts.contains('n) && (opts('n): Int) >= 1, - (opts) => { - val strings: Array[String] = opts('dashless) - val n: Int = opts('n) - - DiceSorensenMetric(n).compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala deleted file mode 100755 index e3db059..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.HammingMetric - -case object hammingmetric extends Command( - (opts) => - "Compares the number of characters that two equal length strings are different from one another." + Ls + Ls + - "Syntax:" + Ls + - Tab + "hammingmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - - HammingMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala deleted file mode 100755 index 8301158..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.JaccardMetric - -case object jaccardmetric extends Command( - (opts) => - "Compares the similarity of two strings using the Jaccard coefficient." + Ls + Ls + - "Syntax:" + Ls + - Tab + "jaccardmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts." + - Tab + "--n" + Ls + - Tab + Tab + "The n-gram size.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 - && opts.contains('n) && (opts('n): Int) >= 1, - (opts) => { - val strings: Array[String] = opts('dashless) - val n: Int = opts('n) - - JaccardMetric(n).compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala deleted file mode 100755 index cb7e188..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.JaroMetric - -case object jarometric extends Command( - (opts) => - "Compares two strings to calculate the Jaro distance." + Ls + Ls + - "Syntax:" + Ls + - Tab + "jarometric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - - JaroMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala deleted file mode 100755 index 5cfcb1e..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.JaroWinklerMetric - -case object jarowinklermetric extends Command( - (opts) => - "Compares two strings to calculate the Jaro-Winkler distance." + Ls + Ls + - "Syntax:" + Ls + - Tab + "jarowinklermetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - - JaroWinklerMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala deleted file mode 100755 index 9cfb021..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.LevenshteinMetric - -case object levenshteinmetric extends Command( - (opts) => - "Compares the number of characters that two strings are different from one another via insertion, deletion, " + - "and substitution." + Ls + Ls + - "Syntax:" + Ls + - Tab + "levenshteinmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - - LevenshteinMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala deleted file mode 100755 index 26914c8..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.NGramMetric - -case object ngrammetric extends Command( - (opts) => - "Compares the similarity of two strings using an N-Gram similarity index." + Ls + Ls + - "Syntax:" + Ls + - Tab + "ngrammetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and options." + - Tab + "--n" + Ls + - Tab + Tab + "The n.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && - opts.contains('n) && (opts('n): Int) >= 1, - (opts) => { - val strings: Array[String] = opts('dashless) - val n: Int = opts('n) - - NGramMetric(n).compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala deleted file mode 100755 index a045b49..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.OverlapMetric - -case object overlapmetric extends Command( - (opts) => - "Compares the similarity of two strings using the overlap coefficient." + Ls + Ls + - "Syntax:" + Ls + - Tab + "overlapmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts." + - Tab + "--n" + Ls + - Tab + Tab + "The n-gram size.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && - opts.contains('n) && (opts('n): Int) >= 1, - (opts) => { - val strings: Array[String] = opts('dashless) - val n: Int = opts('n) - - OverlapMetric(n).compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala deleted file mode 100755 index 08804b6..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.RatcliffObershelpMetric - -case object ratcliffobershelpmetric extends Command( - (opts) => - "Compares the similarity of two strings using the Ratcliff / Obershelp similarity index." + Ls + Ls + - "Syntax:" + Ls + - Tab + "ratcliffobershelpmetric [Options] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, - (opts) => { - val strings: Array[String] = opts('dashless) - - RatcliffObershelpMetric.compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala b/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala deleted file mode 100755 index c74f80a..0000000 --- a/cli/source/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.cli._ -import com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric - -case object weightedlevenshteinmetric extends Command( - (opts) => - "Compares the number of characters that two strings are different from one another via insertion, deletion, " + - "and substitution. Allows the invoker to indicate the weight each operation takes." + Ls + Ls + - "Syntax:" + Ls + - Tab + "weightedlevenshteinmetric [Options] --deleteWeight=[double] --insertWeight=[double] --substituteWeight=[double] string1 string2..." + Ls + Ls + - "Options:" + Ls + - Tab + "--deleteWeight" + Ls + - Tab + Tab + "The weight given to delete operations." + - Tab + "-h, --help" + Ls + - Tab + Tab + "Outputs description, syntax, and opts." + - Tab + "--insertWeight" + Ls + - Tab + Tab + "The weight given to insert operations." + - Tab + "--substituteWeight" + Ls + - Tab + Tab + "The weight given to substitute operations.", - (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && - opts.contains('deleteWeight) && (opts('deleteWeight): Double) >= 0 && - opts.contains('insertWeight) && (opts('insertWeight): Double) >= 0 && - opts.contains('substituteWeight) && (opts('substituteWeight): Double) >= 0, - (opts) => { - val strings: Array[String] = opts('dashless) - - WeightedLevenshteinMetric(opts('deleteWeight), opts('insertWeight), opts('substituteWeight)) - .compare(strings(0), strings(1)) - .map(_.toString) - .getOrElse("not comparable") - } -) diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/OptionMapSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/OptionMapSpec.scala deleted file mode 100755 index 8ecab11..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/OptionMapSpec.scala +++ /dev/null @@ -1,82 +0,0 @@ -package com.rockymadden.stringmetric.cli - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class OptionMapSpec extends ScalaTest { "OptionMap" should provide { - "apply method" when passed { - "single valid double dashed option" should returns { - "populated Map" in { - val opts: OptionMap = Array("--help") - - (opts('help): String) should equal ("") - } - } - "multiple valid double dashed opts" should returns { - "populated Map" in { - val opts: OptionMap = Array("--help", "--test=test") - - (opts('help): String) should equal ("") - (opts('test): String) should equal ("test") - } - } - "invalid double dashed opts" should returns { - "empty Map" in { - val opts: OptionMap = Array("--help#", "--test%=test") - - opts.keysIterator.length should be (0) - } - } - "single valid single dashed option" should returns { - "populated Map" in { - val opts: OptionMap = Array("-h") - - (opts('h): String) should equal ("") - } - } - "multiple valid single dashed opts" should returns { - "populated Map" in { - val opts: OptionMap = Array("-h", "-i") - - (opts('h): String) should equal ("") - (opts('i): String) should equal ("") - } - } - "invalid single dashed opts" should returns { - "empty Map" in { - val opts: OptionMap = Array("-h-i", "-i#gloo") - - opts.keysIterator.length should be (0) - } - } - "single nameless option" should returns { - "single key populated Map" in { - val opts: OptionMap = Array("filename0") - - (opts('dashless): String).count(_ == ' ') should be (0) - } - } - "multiple single nameless opts" should returns { - "single key populated Map" in { - val opts: OptionMap = Array("filename0", "filename1", "filename2") - - (opts('dashless): String).count(_ == ' ') should be (2) - } - } - "mixed opts" should returns { - "populated Map" in { - val opts: OptionMap = Array( - "-q", "--help", "--test=test", "-go", "filename0", "filename1", "filename2" - ) - - (opts('q): String) should equal ("") - (opts('help): String) should equal ("") - (opts('test): String) should equal ("test") - (opts('go): String) should equal ("") - (opts('dashless): String).count(_ == ' ') should be (2) - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala deleted file mode 100755 index 403c50e..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class metaphonealgorithmSpec extends ScalaTest { "metaphonealgorithm" should provide { - "main method" when passed { - "valid dashless argument" should executes { - "print phonetic representation" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - metaphonealgorithm.main(Array("--unitTest", "--debug", "abc")) - ) - - out.toString should equal ("abk\n") - out.reset() - - Console.withOut(out)( - metaphonealgorithm.main(Array("--unitTest", "--debug", "1")) - ) - - out.toString should equal ("not computable\n") - out.reset() - } - } - "no dashless argument" should throws { - "IllegalArgumentException" in { - evaluating { - metaphonealgorithm.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala deleted file mode 100755 index 11c84ca..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class metaphonemetricSpec extends ScalaTest { "metaphonemetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - metaphonemetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("true\n") - out.reset() - - Console.withOut(out)( - metaphonemetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("false\n") - out.reset() - - Console.withOut(out)( - metaphonemetric.main(Array("--unitTest", "--debug", "1", "1")) - ) - - out.toString should equal ("not comparable\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - metaphonemetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala deleted file mode 100755 index 707deb4..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class nysiisalgorithmSpec extends ScalaTest { "nysiisalgorithm" should provide { - "main method" when passed { - "valid dashless argument" should executes { - "print phonetic representation" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - nysiisalgorithm.main(Array("--unitTest", "--debug", "abc")) - ) - - out.toString should equal ("abc\n") - out.reset() - - Console.withOut(out)( - nysiisalgorithm.main(Array("--unitTest", "--debug", "1")) - ) - - out.toString should equal ("not computable\n") - out.reset() - } - } - "no dashless argument" should throws { - "IllegalArgumentException" in { - evaluating { - nysiisalgorithm.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala deleted file mode 100755 index 357bf6e..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class nysiismetricSpec extends ScalaTest { "nysiismetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - nysiismetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("true\n") - out.reset() - - Console.withOut(out)( - nysiismetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("false\n") - out.reset() - - Console.withOut(out)( - nysiismetric.main(Array("--unitTest", "--debug", "1", "1")) - ) - - out.toString should equal ("not comparable\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - nysiismetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala deleted file mode 100755 index d126b91..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class refinednysiisalgorithmSpec extends ScalaTest { "refinednysiisalgorithm" should provide { - "main method" when passed { - "valid dashless argument" should executes { - "print phonetic representation" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - refinednysiisalgorithm.main(Array("--unitTest", "--debug", "abc")) - ) - - out.toString should equal ("abc\n") - out.reset() - - Console.withOut(out)( - refinednysiisalgorithm.main(Array("--unitTest", "--debug", "1")) - ) - - out.toString should equal ("not computable\n") - out.reset() - } - } - "no dashless argument" should throws { - "IllegalArgumentException" in { - evaluating { - refinednysiisalgorithm.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala deleted file mode 100755 index 7f2f1b3..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class refinednysiismetricSpec extends ScalaTest { "refinednysiismetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - refinednysiismetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("true\n") - out.reset() - - Console.withOut(out)( - refinednysiismetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("false\n") - out.reset() - - Console.withOut(out)( - refinednysiismetric.main(Array("--unitTest", "--debug", "1", "1")) - ) - - out.toString should equal ("not comparable\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - refinednysiismetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala deleted file mode 100755 index 503f439..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class refinedsoundexalgorithmSpec extends ScalaTest { "refinedsoundexalgorithm" should provide { - "main method" when passed { - "valid dashless argument" should executes { - "print phonetic representation" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - refinedsoundexalgorithm.main(Array("--unitTest", "--debug", "abc")) - ) - - out.toString should equal ("a013\n") - out.reset() - - Console.withOut(out)( - refinedsoundexalgorithm.main(Array("--unitTest", "--debug", "1")) - ) - - out.toString should equal ("not computable\n") - out.reset() - } - } - "no dashless argument" should throws { - "IllegalArgumentException" in { - evaluating { - refinedsoundexalgorithm.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala deleted file mode 100755 index a10f1ed..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class refinedsoundexmetricSpec extends ScalaTest { "refinedsoundexmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - refinedsoundexmetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("true\n") - out.reset() - - Console.withOut(out)( - refinedsoundexmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("false\n") - out.reset() - - Console.withOut(out)( - refinedsoundexmetric.main(Array("--unitTest", "--debug", "1", "1")) - ) - - out.toString should equal ("not comparable\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - refinedsoundexmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala deleted file mode 100755 index 4319226..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class soundexalgorithmSpec extends ScalaTest { "soundexalgorithm" should provide { - "main method" when passed { - "valid dashless argument" should executes { - "print phonetic representation" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - soundexalgorithm.main(Array("--unitTest", "--debug", "abc")) - ) - - out.toString should equal ("a120\n") - out.reset() - - Console.withOut(out)( - soundexalgorithm.main(Array("--unitTest", "--debug", "1")) - ) - - out.toString should equal ("not computable\n") - out.reset() - } - } - "no dashless argument" should throws { - "IllegalArgumentException" in { - evaluating { - soundexalgorithm.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala deleted file mode 100755 index c0582fa..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.rockymadden.stringmetric.cli.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class soundexmetricSpec extends ScalaTest { "soundexmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - soundexmetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("true\n") - out.reset() - - Console.withOut(out)( - soundexmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("false\n") - out.reset() - - Console.withOut(out)( - soundexmetric.main(Array("--unitTest", "--debug", "1", "1")) - ) - - out.toString should equal ("not comparable\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - soundexmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala deleted file mode 100755 index 96e6082..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class dicesorensenmetricSpec extends ScalaTest { "dicesorensenmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - dicesorensenmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - dicesorensenmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - dicesorensenmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala deleted file mode 100755 index d3304b6..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class hammingmetricSpec extends ScalaTest { "hammingmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - hammingmetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("0\n") - out.reset() - - Console.withOut(out)( - hammingmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("3\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - hammingmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala deleted file mode 100755 index e684ae2..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class jaccardmetricSpec extends ScalaTest { "jaccardmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - jaccardmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - jaccardmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - jaccardmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala deleted file mode 100755 index bc07e30..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class jarometricSpec extends ScalaTest { "jarometric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print the distance" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - jarometric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - jarometric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - jarometric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala deleted file mode 100755 index 46ae0c6..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class jarowinklermetricSpec extends ScalaTest { "jarowinklermetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print the distance" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - jarowinklermetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - jarowinklermetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - jarowinklermetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala deleted file mode 100755 index 7c89405..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class levenshteinmetricSpec extends ScalaTest { "levenshteinmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - levenshteinmetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("0\n") - out.reset() - - Console.withOut(out)( - levenshteinmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("3\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - levenshteinmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala deleted file mode 100755 index 3a4277b..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class ngrammetricSpec extends ScalaTest { "ngrammetric" should provide { - "main method" when passed { - "valid dashless arguments and valid n argument" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - ngrammetric.main( - Array( - "--unitTest", - "--debug", - "--n=1", - "abc", - "abc" - ) - ) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - ngrammetric.main( - Array( - "--unitTest", - "--debug", - "--n=1", - "abc", - "xyz" - ) - ) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "valid dashless arguments and invalid n argument" should throws { - "IllegalArgumentException" in { - evaluating { - ngrammetric.main( - Array( - "--unitTest", - "abc", - "abc" - ) - ) - } should produce [IllegalArgumentException] - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - ngrammetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala deleted file mode 100755 index 6e15228..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class overlapmetricSpec extends ScalaTest { "overlapmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - overlapmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - overlapmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - overlapmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala deleted file mode 100755 index e1d2bef..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class ratcliffobershelpmetricSpec extends ScalaTest { "ratcliffobershelpmetric" should provide { - "main method" when passed { - "valid dashless arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - ratcliffobershelpmetric.main(Array("--unitTest", "--debug", "abc", "abc")) - ) - - out.toString should equal ("1.0\n") - out.reset() - - Console.withOut(out)( - ratcliffobershelpmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) - ) - - out.toString should equal ("0.0\n") - out.reset() - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - ratcliffobershelpmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala b/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala deleted file mode 100755 index f0ac8be..0000000 --- a/cli/source/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala +++ /dev/null @@ -1,121 +0,0 @@ -package com.rockymadden.stringmetric.cli.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class weightedlevenshteinmetricSpec extends ScalaTest { "weightedlevenshteinmetric" should provide { - "main method" when passed { - "valid dashless arguments and valid weight arguments" should executes { - "print if they are a match" in { - val out = new java.io.ByteArrayOutputStream() - - Console.withOut(out)( - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=1", - "--insertWeight=1", - "--substituteWeight=1", - "abc", - "abc" - ) - ) - ) - - out.toString should equal ("0.0\n") - out.reset() - - Console.withOut(out)( - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=2", - "--insertWeight=2", - "--substituteWeight=1", - "abc", - "xyz" - ) - ) - ) - - out.toString should equal ("3.0\n") - out.reset() - - Console.withOut(out)( - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=2", - "--insertWeight=1", - "--substituteWeight=2", - "xyz", - "xyzxyz" - ) - ) - ) - - out.toString should equal ("3.0\n") - out.reset() - - Console.withOut(out)( - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=1", - "--insertWeight=2", - "--substituteWeight=2", - "xyzxyz", - "xyz" - ) - ) - ) - - out.toString should equal ("3.0\n") - out.reset() - } - } - "valid dashless arguments and invalid weight arguments" should throws { - "IllegalArgumentException" in { - evaluating { - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=1", - "--substituteWeight=1", - "abc", - "abc" - ) - ) - } should produce [IllegalArgumentException] - - evaluating { - weightedlevenshteinmetric.main( - Array( - "--unitTest", - "--debug", - "--deleteWeight=1", - "--insertWeight=q", - "--substituteWeight=1", - "abc", - "abc" - ) - ) - } should produce [IllegalArgumentException] - } - } - "no dashless arguments" should throws { - "IllegalArgumentException" in { - evaluating { - weightedlevenshteinmetric.main(Array("--unitTest", "--debug")) - } should produce [IllegalArgumentException] - } - } - } -}} diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala new file mode 100755 index 0000000..617f4a5 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/package.scala @@ -0,0 +1,105 @@ +package com.rockymadden.stringmetric + +/** + * Provides core CLI functionality. Note that some things might look sloppy (e.g. access modifiers, broad imports, + * repetitive imports, etc), but are required because of the way scalascript is ultimately compiled. + */ +package object cli { + import scala.language.implicitConversions + + + implicit def optionStringToArray(os: OptionString): Array[String] = + if (os.get.length == 0) Array.empty[String] + else os.get.split(" ") + implicit def optionStringToBigDecimal(os: OptionString): BigDecimal = BigDecimal(os.get) + implicit def optionStringToBigInt(os: OptionString): BigInt = BigInt(os.get) + implicit def optionStringToDouble(os: OptionString): Double = os.get.toDouble + implicit def optionStringToFloat(os: OptionString): Float = os.get.toFloat + implicit def optionStringToInt(os: OptionString): Int = os.get.toInt + implicit def optionStringToLong(os: OptionString): Long = os.get.toLong + implicit def optionStringToShort(os: OptionString): Short = os.get.toShort + implicit def optionStringToString(os: OptionString): String = os.get + implicit def stringToOptionString(s: String): OptionString = OptionString(s) + implicit def arrayOfStringToOptionMap(stringArray: Array[String]): OptionMap = OptionMap(stringArray) + + + final val Ls = sys.props("line.separator") + final val Tab = " " + + + class OptionString(val get: String) + + object OptionString { + def apply(s: String): OptionString = new OptionString(s) + } + + + type OptionMap = Map[Symbol, OptionString] + + object OptionMap { + def apply(args: Array[String]): OptionMap = apply(args: _*) + + def apply(varargs: String*): OptionMap = { + @annotation.tailrec + def next(om: OptionMap, a: List[String]): OptionMap = { + val double = """^(--[a-zA-Z0-9]+)(=[a-zA-Z0-9\.\-_]+)?""".r + val single = """^(-[a-zA-Z0-9]+)(=[a-zA-Z0-9\.\-_]+)?""".r + val less = """([a-zA-Z0-9/\-_\$\.]+)""".r + + a match { + // Empty, return. + case Nil => om + // Double dash options without value. + case double(k, null) :: t => next(om + (Symbol(k.tail.tail) -> ""), t) + // Double dash options with value. + case double(k, v) :: t => next(om + (Symbol(k.tail.tail) -> v.tail), t) + // Single dash options without value. + case single(k, null) :: t => next(om + (Symbol(k.tail) -> ""), t) + // Single dash options with value. Value is discarded. + case single(k, v) :: t => next(om + (Symbol(k.tail) -> ""), t) + // Dashless options. + case less(v) :: t if v.head != '-' => + if (om.contains('dashless)) + next((om - 'dashless) + ('dashless -> (om('dashless).get + " " + v.trim)), t) + else next(om + ('dashless -> v.trim), t) + // Invalid option, ignore. + case _ :: t => next(om, t) + } + } + + next(Map.empty[Symbol, OptionString], varargs.toList) + } + } + + + abstract class Command( + protected val help: (OptionMap => String), + protected val predicate: (OptionMap => Boolean), + protected val execute: (OptionMap => String) + ) { + final def main(args: Array[String]): Unit = { + val opts: OptionMap = args + + try + if (opts.contains('h) || opts.contains('help)) { + println(help(opts)) + exit(opts) + } else if (predicate(opts)) { + println(execute(opts)) + exit(opts) + } else throw new IllegalArgumentException("Expected valid syntax. See --help.") + catch { case e: Throwable => error(e, opts) } + } + + private def error(error: Throwable, opts: OptionMap): Unit = + if (!isUnitTest(opts)) { + println(error.getMessage) + sys.exit(1) + } else throw error + + private def exit(opts: OptionMap): Unit = if (!isUnitTest(opts)) sys.exit(0) + + private def isUnitTest(opts: OptionMap) = + opts.contains('ut) || (opts.contains('unitTest) && opts.get('unitTest) != "false") + } +} diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala new file mode 100755 index 0000000..95f14b6 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithm.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.MetaphoneAlgorithm + +case object metaphonealgorithm extends Command( + (opts) => + "Returns the phonetic representation of the passed string, per the Metaphone algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "metaphonealgorithm [Options] string..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, + (opts) => MetaphoneAlgorithm.compute(opts('dashless)).getOrElse("not computable") +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala new file mode 100755 index 0000000..1249723 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetric.scala @@ -0,0 +1,21 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.MetaphoneMetric + +case object metaphonemetric extends Command( + (opts) => + "Compares two strings to determine if they are phonetically similarly, per the Metaphone algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "metaphonemetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + MetaphoneMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala new file mode 100755 index 0000000..bef2bc5 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithm.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.NysiisAlgorithm + +case object nysiisalgorithm extends Command( + (opts) => + "Returns the phonetic representation of the passed string, per the NYSIIS algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "nysiisalgorithm [Options] string..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, + (opts) => NysiisAlgorithm.compute(opts('dashless)).getOrElse("not computable") +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala new file mode 100755 index 0000000..7857c21 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetric.scala @@ -0,0 +1,21 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.NysiisMetric + +case object nysiismetric extends Command( + (opts) => + "Compares two strings to determine if they are phonetically similarly, per the NYSIIS algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "nysiismetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + NysiisMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala new file mode 100755 index 0000000..3fc9f25 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithm.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.RefinedNysiisAlgorithm + +case object refinednysiisalgorithm extends Command( + (opts) => + "Returns the phonetic representation of the passed string, per the refined NYSIIS algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "refinednysiisalgorithm [Options] string..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, + (opts) => RefinedNysiisAlgorithm.compute(opts('dashless)).getOrElse("not computable") +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala new file mode 100755 index 0000000..2566fa8 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetric.scala @@ -0,0 +1,21 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.RefinedNysiisMetric + +case object refinednysiismetric extends Command( + (opts) => + "Compares two strings to determine if they are phonetically similarly, per the refined NYSIIS algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "refinednysiismetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + RefinedNysiisMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala new file mode 100755 index 0000000..12ac8e0 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithm.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.RefinedSoundexAlgorithm + +case object refinedsoundexalgorithm extends Command( + (opts) => + "Returns the phonetic representation of the passed string, per the refined Soundex algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "refinedsoundexalgorithm [Options] string..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, + (opts) => RefinedSoundexAlgorithm.compute(opts('dashless)).getOrElse("not computable") +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala new file mode 100755 index 0000000..370bd71 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetric.scala @@ -0,0 +1,21 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.RefinedSoundexMetric + +case object refinedsoundexmetric extends Command( + (opts) => + "Compares two strings to determine if they are phonetically similarly, per the refined Soundex algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "refinedsoundexmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + RefinedSoundexMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala new file mode 100755 index 0000000..56cf068 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithm.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.SoundexAlgorithm + +case object soundexalgorithm extends Command( + (opts) => + "Returns the phonetic representation of the passed string, per the Soundex algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "soundexalgorithm [Options] string..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1, + (opts) => SoundexAlgorithm.compute(opts('dashless)).getOrElse("not computable") +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala new file mode 100755 index 0000000..c7ec0cd --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetric.scala @@ -0,0 +1,21 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.phonetic.SoundexMetric + +case object soundexmetric extends Command( + (opts) => + "Compares two strings to determine if they are phonetically similarly, per the Soundex algorithm." + Ls + Ls + + "Syntax:" + Ls + + Tab + "soundexmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + SoundexMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala new file mode 100755 index 0000000..bb7ac0b --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetric.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.DiceSorensenMetric + +case object dicesorensenmetric extends Command( + (opts) => + "Compares the similarity of two strings using the Dice / Sorensen coefficient." + Ls + Ls + + "Syntax:" + Ls + + Tab + "dicesorensenmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts." + + Tab + "--n" + Ls + + Tab + Tab + "The n-gram size, traditionally 2.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && + opts.contains('n) && (opts('n): Int) >= 1, + (opts) => { + val strings: Array[String] = opts('dashless) + val n: Int = opts('n) + + DiceSorensenMetric(n).compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala new file mode 100755 index 0000000..e3db059 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetric.scala @@ -0,0 +1,22 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.HammingMetric + +case object hammingmetric extends Command( + (opts) => + "Compares the number of characters that two equal length strings are different from one another." + Ls + Ls + + "Syntax:" + Ls + + Tab + "hammingmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + + HammingMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala new file mode 100755 index 0000000..8301158 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetric.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.JaccardMetric + +case object jaccardmetric extends Command( + (opts) => + "Compares the similarity of two strings using the Jaccard coefficient." + Ls + Ls + + "Syntax:" + Ls + + Tab + "jaccardmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts." + + Tab + "--n" + Ls + + Tab + Tab + "The n-gram size.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 + && opts.contains('n) && (opts('n): Int) >= 1, + (opts) => { + val strings: Array[String] = opts('dashless) + val n: Int = opts('n) + + JaccardMetric(n).compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala new file mode 100755 index 0000000..cb7e188 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarometric.scala @@ -0,0 +1,22 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.JaroMetric + +case object jarometric extends Command( + (opts) => + "Compares two strings to calculate the Jaro distance." + Ls + Ls + + "Syntax:" + Ls + + Tab + "jarometric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + + JaroMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala new file mode 100755 index 0000000..5cfcb1e --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetric.scala @@ -0,0 +1,22 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.JaroWinklerMetric + +case object jarowinklermetric extends Command( + (opts) => + "Compares two strings to calculate the Jaro-Winkler distance." + Ls + Ls + + "Syntax:" + Ls + + Tab + "jarowinklermetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + + JaroWinklerMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala new file mode 100755 index 0000000..9cfb021 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetric.scala @@ -0,0 +1,23 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.LevenshteinMetric + +case object levenshteinmetric extends Command( + (opts) => + "Compares the number of characters that two strings are different from one another via insertion, deletion, " + + "and substitution." + Ls + Ls + + "Syntax:" + Ls + + Tab + "levenshteinmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + + LevenshteinMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala new file mode 100755 index 0000000..26914c8 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetric.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.NGramMetric + +case object ngrammetric extends Command( + (opts) => + "Compares the similarity of two strings using an N-Gram similarity index." + Ls + Ls + + "Syntax:" + Ls + + Tab + "ngrammetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and options." + + Tab + "--n" + Ls + + Tab + Tab + "The n.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && + opts.contains('n) && (opts('n): Int) >= 1, + (opts) => { + val strings: Array[String] = opts('dashless) + val n: Int = opts('n) + + NGramMetric(n).compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala new file mode 100755 index 0000000..a045b49 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetric.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.OverlapMetric + +case object overlapmetric extends Command( + (opts) => + "Compares the similarity of two strings using the overlap coefficient." + Ls + Ls + + "Syntax:" + Ls + + Tab + "overlapmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts." + + Tab + "--n" + Ls + + Tab + Tab + "The n-gram size.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && + opts.contains('n) && (opts('n): Int) >= 1, + (opts) => { + val strings: Array[String] = opts('dashless) + val n: Int = opts('n) + + OverlapMetric(n).compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala new file mode 100755 index 0000000..08804b6 --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetric.scala @@ -0,0 +1,22 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.RatcliffObershelpMetric + +case object ratcliffobershelpmetric extends Command( + (opts) => + "Compares the similarity of two strings using the Ratcliff / Obershelp similarity index." + Ls + Ls + + "Syntax:" + Ls + + Tab + "ratcliffobershelpmetric [Options] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2, + (opts) => { + val strings: Array[String] = opts('dashless) + + RatcliffObershelpMetric.compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala new file mode 100755 index 0000000..c74f80a --- /dev/null +++ b/cli/src/main/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetric.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli._ +import com.rockymadden.stringmetric.similarity.WeightedLevenshteinMetric + +case object weightedlevenshteinmetric extends Command( + (opts) => + "Compares the number of characters that two strings are different from one another via insertion, deletion, " + + "and substitution. Allows the invoker to indicate the weight each operation takes." + Ls + Ls + + "Syntax:" + Ls + + Tab + "weightedlevenshteinmetric [Options] --deleteWeight=[double] --insertWeight=[double] --substituteWeight=[double] string1 string2..." + Ls + Ls + + "Options:" + Ls + + Tab + "--deleteWeight" + Ls + + Tab + Tab + "The weight given to delete operations." + + Tab + "-h, --help" + Ls + + Tab + Tab + "Outputs description, syntax, and opts." + + Tab + "--insertWeight" + Ls + + Tab + Tab + "The weight given to insert operations." + + Tab + "--substituteWeight" + Ls + + Tab + Tab + "The weight given to substitute operations.", + (opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 2 && + opts.contains('deleteWeight) && (opts('deleteWeight): Double) >= 0 && + opts.contains('insertWeight) && (opts('insertWeight): Double) >= 0 && + opts.contains('substituteWeight) && (opts('substituteWeight): Double) >= 0, + (opts) => { + val strings: Array[String] = opts('dashless) + + WeightedLevenshteinMetric(opts('deleteWeight), opts('insertWeight), opts('substituteWeight)) + .compare(strings(0), strings(1)) + .map(_.toString) + .getOrElse("not comparable") + } +) diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/CliSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/CliSpec.scala new file mode 100755 index 0000000..38c6fd3 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/CliSpec.scala @@ -0,0 +1,81 @@ +package com.rockymadden.stringmetric.cli + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class CliSpec extends ScalaTest { "OptionMap" should provide { + "apply method" when passed { + "single valid double dashed option" should returns { + "populated Map" in { + val opts: OptionMap = Array("--help") + + (opts('help): String) should equal ("") + } + } + "multiple valid double dashed opts" should returns { + "populated Map" in { + val opts: OptionMap = Array("--help", "--test=test") + + (opts('help): String) should equal ("") + (opts('test): String) should equal ("test") + } + } + "invalid double dashed opts" should returns { + "empty Map" in { + val opts: OptionMap = Array("--help#", "--test%=test") + + opts.keysIterator.length should be (0) + } + } + "single valid single dashed option" should returns { + "populated Map" in { + val opts: OptionMap = Array("-h") + + (opts('h): String) should equal ("") + } + } + "multiple valid single dashed opts" should returns { + "populated Map" in { + val opts: OptionMap = Array("-h", "-i") + + (opts('h): String) should equal ("") + (opts('i): String) should equal ("") + } + } + "invalid single dashed opts" should returns { + "empty Map" in { + val opts: OptionMap = Array("-h-i", "-i#gloo") + + opts.keysIterator.length should be (0) + } + } + "single nameless option" should returns { + "single key populated Map" in { + val opts: OptionMap = Array("filename0") + + (opts('dashless): String).count(_ == ' ') should be (0) + } + } + "multiple single nameless opts" should returns { + "single key populated Map" in { + val opts: OptionMap = Array("filename0", "filename1", "filename2") + + (opts('dashless): String).count(_ == ' ') should be (2) + } + } + "mixed opts" should returns { + "populated Map" in { + val opts: OptionMap = Array( + "-q", "--help", "--test=test", "-go", "filename0", "filename1", "filename2" + ) + + (opts('q): String) should equal ("") + (opts('help): String) should equal ("") + (opts('test): String) should equal ("test") + (opts('go): String) should equal ("") + (opts('dashless): String).count(_ == ' ') should be (2) + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/ScalaTest.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/ScalaTest.scala new file mode 100755 index 0000000..4cc0eed --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/ScalaTest.scala @@ -0,0 +1,18 @@ +package com.rockymadden.stringmetric.cli + +import org.scalatest.{BeforeAndAfter, ParallelTestExecution, WordSpec} +import org.scalatest.matchers.ShouldMatchers + +trait ScalaTest extends WordSpec with ShouldMatchers with BeforeAndAfter with ParallelTestExecution { + def allows = afterWord("allow") + + def executes = afterWord("execute") + + def passed = afterWord("passed") + + def provide = afterWord("provide") + + def returns = afterWord("return") + + def throws = afterWord("throw") +} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala new file mode 100755 index 0000000..8b8226d --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonealgorithmSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class metaphonealgorithmSpec extends ScalaTest { "metaphonealgorithm" should provide { + "main method" when passed { + "valid dashless argument" should executes { + "print phonetic representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + metaphonealgorithm.main(Array("--unitTest", "--debug", "abc")) + ) + + out.toString should equal ("abk\n") + out.reset() + + Console.withOut(out)( + metaphonealgorithm.main(Array("--unitTest", "--debug", "1")) + ) + + out.toString should equal ("not computable\n") + out.reset() + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + metaphonealgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala new file mode 100755 index 0000000..ab8d4fb --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/metaphonemetricSpec.scala @@ -0,0 +1,44 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class metaphonemetricSpec extends ScalaTest { "metaphonemetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + metaphonemetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + metaphonemetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + + Console.withOut(out)( + metaphonemetric.main(Array("--unitTest", "--debug", "1", "1")) + ) + + out.toString should equal ("not comparable\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + metaphonemetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala new file mode 100755 index 0000000..21ae04a --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiisalgorithmSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class nysiisalgorithmSpec extends ScalaTest { "nysiisalgorithm" should provide { + "main method" when passed { + "valid dashless argument" should executes { + "print phonetic representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + nysiisalgorithm.main(Array("--unitTest", "--debug", "abc")) + ) + + out.toString should equal ("abc\n") + out.reset() + + Console.withOut(out)( + nysiisalgorithm.main(Array("--unitTest", "--debug", "1")) + ) + + out.toString should equal ("not computable\n") + out.reset() + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + nysiisalgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala new file mode 100755 index 0000000..aa28fe4 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/nysiismetricSpec.scala @@ -0,0 +1,44 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class nysiismetricSpec extends ScalaTest { "nysiismetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + nysiismetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + nysiismetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + + Console.withOut(out)( + nysiismetric.main(Array("--unitTest", "--debug", "1", "1")) + ) + + out.toString should equal ("not comparable\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + nysiismetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala new file mode 100755 index 0000000..f2e78d9 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiisalgorithmSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class refinednysiisalgorithmSpec extends ScalaTest { "refinednysiisalgorithm" should provide { + "main method" when passed { + "valid dashless argument" should executes { + "print phonetic representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + refinednysiisalgorithm.main(Array("--unitTest", "--debug", "abc")) + ) + + out.toString should equal ("abc\n") + out.reset() + + Console.withOut(out)( + refinednysiisalgorithm.main(Array("--unitTest", "--debug", "1")) + ) + + out.toString should equal ("not computable\n") + out.reset() + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + refinednysiisalgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala new file mode 100755 index 0000000..ce9cebd --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinednysiismetricSpec.scala @@ -0,0 +1,44 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class refinednysiismetricSpec extends ScalaTest { "refinednysiismetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + refinednysiismetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + refinednysiismetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + + Console.withOut(out)( + refinednysiismetric.main(Array("--unitTest", "--debug", "1", "1")) + ) + + out.toString should equal ("not comparable\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + refinednysiismetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala new file mode 100755 index 0000000..18d46fa --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexalgorithmSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class refinedsoundexalgorithmSpec extends ScalaTest { "refinedsoundexalgorithm" should provide { + "main method" when passed { + "valid dashless argument" should executes { + "print phonetic representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + refinedsoundexalgorithm.main(Array("--unitTest", "--debug", "abc")) + ) + + out.toString should equal ("a013\n") + out.reset() + + Console.withOut(out)( + refinedsoundexalgorithm.main(Array("--unitTest", "--debug", "1")) + ) + + out.toString should equal ("not computable\n") + out.reset() + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + refinedsoundexalgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala new file mode 100755 index 0000000..041fada --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/refinedsoundexmetricSpec.scala @@ -0,0 +1,44 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class refinedsoundexmetricSpec extends ScalaTest { "refinedsoundexmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + refinedsoundexmetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + refinedsoundexmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + + Console.withOut(out)( + refinedsoundexmetric.main(Array("--unitTest", "--debug", "1", "1")) + ) + + out.toString should equal ("not comparable\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + refinedsoundexmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala new file mode 100755 index 0000000..e23b3c3 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexalgorithmSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class soundexalgorithmSpec extends ScalaTest { "soundexalgorithm" should provide { + "main method" when passed { + "valid dashless argument" should executes { + "print phonetic representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + soundexalgorithm.main(Array("--unitTest", "--debug", "abc")) + ) + + out.toString should equal ("a120\n") + out.reset() + + Console.withOut(out)( + soundexalgorithm.main(Array("--unitTest", "--debug", "1")) + ) + + out.toString should equal ("not computable\n") + out.reset() + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + soundexalgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala new file mode 100755 index 0000000..332b058 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/phonetic/soundexmetricSpec.scala @@ -0,0 +1,44 @@ +package com.rockymadden.stringmetric.cli.phonetic + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class soundexmetricSpec extends ScalaTest { "soundexmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + soundexmetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("true\n") + out.reset() + + Console.withOut(out)( + soundexmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("false\n") + out.reset() + + Console.withOut(out)( + soundexmetric.main(Array("--unitTest", "--debug", "1", "1")) + ) + + out.toString should equal ("not comparable\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + soundexmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala new file mode 100755 index 0000000..82cf8c2 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/dicesorensenmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class dicesorensenmetricSpec extends ScalaTest { "dicesorensenmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + dicesorensenmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + dicesorensenmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + dicesorensenmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala new file mode 100755 index 0000000..dc34e8f --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/hammingmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class hammingmetricSpec extends ScalaTest { "hammingmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + hammingmetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("0\n") + out.reset() + + Console.withOut(out)( + hammingmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("3\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + hammingmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala new file mode 100755 index 0000000..52da38f --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jaccardmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class jaccardmetricSpec extends ScalaTest { "jaccardmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + jaccardmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + jaccardmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + jaccardmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala new file mode 100755 index 0000000..866ea9e --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarometricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class jarometricSpec extends ScalaTest { "jarometric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print the distance" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + jarometric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + jarometric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + jarometric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala new file mode 100755 index 0000000..56bf014 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/jarowinklermetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class jarowinklermetricSpec extends ScalaTest { "jarowinklermetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print the distance" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + jarowinklermetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + jarowinklermetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + jarowinklermetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala new file mode 100755 index 0000000..e18133c --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/levenshteinmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class levenshteinmetricSpec extends ScalaTest { "levenshteinmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + levenshteinmetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("0\n") + out.reset() + + Console.withOut(out)( + levenshteinmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("3\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + levenshteinmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala new file mode 100755 index 0000000..3106b02 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ngrammetricSpec.scala @@ -0,0 +1,66 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class ngrammetricSpec extends ScalaTest { "ngrammetric" should provide { + "main method" when passed { + "valid dashless arguments and valid n argument" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + ngrammetric.main( + Array( + "--unitTest", + "--debug", + "--n=1", + "abc", + "abc" + ) + ) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + ngrammetric.main( + Array( + "--unitTest", + "--debug", + "--n=1", + "abc", + "xyz" + ) + ) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "valid dashless arguments and invalid n argument" should throws { + "IllegalArgumentException" in { + evaluating { + ngrammetric.main( + Array( + "--unitTest", + "abc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + ngrammetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala new file mode 100755 index 0000000..0721ebe --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/overlapmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class overlapmetricSpec extends ScalaTest { "overlapmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + overlapmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + overlapmetric.main(Array("--unitTest", "--debug", "--n=2", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + overlapmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala new file mode 100755 index 0000000..b4ae512 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/ratcliffobershelpmetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class ratcliffobershelpmetricSpec extends ScalaTest { "ratcliffobershelpmetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + ratcliffobershelpmetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + ratcliffobershelpmetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + ratcliffobershelpmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala new file mode 100755 index 0000000..568f583 --- /dev/null +++ b/cli/src/test/scala/com/rockymadden/stringmetric/cli/similarity/weightedlevenshteinmetricSpec.scala @@ -0,0 +1,121 @@ +package com.rockymadden.stringmetric.cli.similarity + +import com.rockymadden.stringmetric.cli.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class weightedlevenshteinmetricSpec extends ScalaTest { "weightedlevenshteinmetric" should provide { + "main method" when passed { + "valid dashless arguments and valid weight arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=1", + "--substituteWeight=1", + "abc", + "abc" + ) + ) + ) + + out.toString should equal ("0.0\n") + out.reset() + + Console.withOut(out)( + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=2", + "--insertWeight=2", + "--substituteWeight=1", + "abc", + "xyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + + Console.withOut(out)( + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=2", + "--insertWeight=1", + "--substituteWeight=2", + "xyz", + "xyzxyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + + Console.withOut(out)( + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=2", + "--substituteWeight=2", + "xyzxyz", + "xyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + } + } + "valid dashless arguments and invalid weight arguments" should throws { + "IllegalArgumentException" in { + evaluating { + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--substituteWeight=1", + "abc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + + evaluating { + weightedlevenshteinmetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=q", + "--substituteWeight=1", + "abc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + weightedlevenshteinmetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } +}} diff --git a/core/build.gradle b/core/build.gradle deleted file mode 100755 index 701358e..0000000 --- a/core/build.gradle +++ /dev/null @@ -1,46 +0,0 @@ -buildscript { - dependencies { classpath 'net.saliman:gradle-cobertura-plugin:2.2.2' } - repositories { mavenCentral() } -} - -apply from: '../deploy.gradle' -apply plugin: 'cobertura' -apply plugin: 'scala' - -cobertura { - coverageFormats = ['html', 'xml'] - coverageSourceDirs = sourceSets.main.scala.srcDirs -} - -compileScala { compileScala.scalaCompileOptions.additionalParameters = ['-target:jvm-1.6', '–Xdisable-assertions'] } -compileTestScala { compileTestScala.scalaCompileOptions.additionalParameters = ['-target:jvm-1.6'] } - -dependencies { - compile'org.scala-lang:scala-compiler:2.10.2' - compile 'org.scala-lang:scala-library:2.10.2' - - testCompile 'com.google.caliper:caliper:0.5-rc1' - testCompile 'junit:junit:4.11' - testCompile 'org.scalatest:scalatest_2.10:2.0.M5b' -} - -sourceSets { - benchmark { - output.resourcesDir "${project.buildDir}/classes/benchmark" - - resources { srcDir 'source/benchmark/resource' } - scala { srcDir 'source/benchmark/scala' } - } - main { - output.resourcesDir "${project.buildDir}/classes/main" - - resources { srcDir 'source/main/resource' } - scala { srcDir 'source/main/scala' } - } - test { - output.resourcesDir "${project.buildDir}/classes/test" - - resources { srcDir 'source/test/resource' } - scala { srcDir 'source/test/scala' } - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala deleted file mode 100755 index 55a6238..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala +++ /dev/null @@ -1,7 +0,0 @@ -package com.rockymadden.stringmetric - -import com.google.caliper.SimpleBenchmark - -trait CaliperBenchmark extends SimpleBenchmark { - def run(reps: Int)(code: => Unit) = (0 until reps).foreach(i => code) -} \ No newline at end of file diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala deleted file mode 100755 index 4474a8d..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala +++ /dev/null @@ -1,7 +0,0 @@ -package com.rockymadden.stringmetric - -import com.google.caliper.{Benchmark, Runner} - -abstract class CaliperRunner(private[this] val suite: java.lang.Class[_ <: Benchmark]) { - def main(args: Array[String]): Unit = Runner.main(suite, args) -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala deleted file mode 100755 index fe5c80d..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class MetaphoneAlgorithmBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.filter(_ > '9').take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - MetaphoneAlgorithm.compute(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - MetaphoneAlgorithm.compute(string) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala deleted file mode 100755 index c9b59fc..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.annotation.tailrec -import scala.util.Random - -final class MetaphoneMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.filter(_ > '9').take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - MetaphoneMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - MetaphoneMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - MetaphoneMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - MetaphoneMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala deleted file mode 100755 index ac04cd7..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class NysiisAlgorithmBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.filter(_ > '9').take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - NysiisAlgorithm.compute(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - NysiisAlgorithm.compute(string) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala deleted file mode 100755 index f975d29..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.annotation.tailrec -import scala.util.Random - -final class NysiisMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.filter(_ > '9').take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - NysiisMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - NysiisMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - NysiisMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - NysiisMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala deleted file mode 100755 index 4141b37..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class RefinedNysiisAlgorithmBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.filter(_ > '9').take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - RefinedNysiisAlgorithm.compute(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - RefinedNysiisAlgorithm.compute(string) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala deleted file mode 100755 index d927f18..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.annotation.tailrec -import scala.util.Random - -final class RefinedNysiisMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.filter(_ > '9').take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - RefinedNysiisMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - RefinedNysiisMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - RefinedNysiisMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - RefinedNysiisMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala deleted file mode 100755 index ec8d53c..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class RefinedSoundexAlgorithmBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.filter(_ > '9').take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - RefinedSoundexAlgorithm.compute(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - RefinedSoundexAlgorithm.compute(string) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala deleted file mode 100755 index 6c52e8d..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.annotation.tailrec -import scala.util.Random - -final class RefinedSoundexMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.filter(_ > '9').take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - RefinedSoundexMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - RefinedSoundexMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - RefinedSoundexMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - RefinedSoundexMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala deleted file mode 100755 index 9dc67de..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class SoundexAlgorithmBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.filter(_ > '9').take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - SoundexAlgorithm.compute(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - SoundexAlgorithm.compute(string) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala deleted file mode 100755 index 7707019..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.annotation.tailrec -import scala.util.Random - -final class SoundexMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.filter(_ > '9').take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - SoundexMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - SoundexMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - SoundexMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - SoundexMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala deleted file mode 100755 index 5df27fb..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class DiceSorensenMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - DiceSorensenMetric(2).compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - DiceSorensenMetric(2).compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - DiceSorensenMetric(2).compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - DiceSorensenMetric(2).compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala deleted file mode 100755 index 3d04074..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class HammingMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - HammingMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - HammingMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - HammingMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - HammingMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala deleted file mode 100755 index ddeef7a..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class JaccardMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - JaccardMetric(2).compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - JaccardMetric(2).compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - JaccardMetric(2).compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - JaccardMetric(2).compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala deleted file mode 100755 index 47baa6d..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class JaroMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - JaroMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - JaroMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - JaroMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - JaroMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala deleted file mode 100755 index c11a01c..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class JaroWinklerMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - JaroWinklerMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - JaroWinklerMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - JaroWinklerMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - JaroWinklerMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala deleted file mode 100755 index a8460e2..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class LevenshteinMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - LevenshteinMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - LevenshteinMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - LevenshteinMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - LevenshteinMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala deleted file mode 100755 index 8ba1bb0..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class NGramMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - @Param(Array("2", "3")) - var n: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - NGramMetric(n).compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - NGramMetric(n).compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - NGramMetric(n).compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - NGramMetric(n).compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala deleted file mode 100755 index 89207f2..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class OverlapMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - OverlapMetric(2).compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - OverlapMetric(2).compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - OverlapMetric(2).compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - OverlapMetric(2).compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala deleted file mode 100755 index 86196ab..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class RatcliffObershelpMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - RatcliffObershelpMetric.compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - RatcliffObershelpMetric.compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - RatcliffObershelpMetric.compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - RatcliffObershelpMetric.compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala deleted file mode 100755 index 837ce01..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class WeightedLevenshteinMetricBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - var string1: String = _ - var charArray1: Array[Char] = _ - var string2: String = _ - var charArray2: Array[Char] = _ - - override protected def setUp() { - @annotation.tailrec - def random(l: Int, ps: String = null): String = - if (l == 0) "" - else { - val s = Random.alphanumeric.take(l).mkString - - if (ps == null || s != ps) s - else random(l, ps) - } - - string1 = random(length) - string2 = random(length, string1) - charArray1 = string1.toCharArray - charArray2 = string2.toCharArray - } - - def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { - WeightedLevenshteinMetric(1, 1, 1).compare(charArray1, charArray2) - } - - def timeCompareWithDifferentStrings(reps: Int) = run(reps) { - WeightedLevenshteinMetric(1, 1, 1).compare(string1, string2) - } - - def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { - WeightedLevenshteinMetric(1, 1, 1).compare(charArray1, charArray1) - } - - def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { - WeightedLevenshteinMetric(1, 1, 1).compare(string1, string1) - } -} diff --git a/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala b/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala deleted file mode 100755 index d66bf88..0000000 --- a/core/source/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.rockymadden.stringmetric.tokenize - -import com.google.caliper.Param -import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} -import scala.util.Random - -final class NGramTokenizerBenchmark extends CaliperBenchmark { - @Param(Array("0", "1", "2", "4", "8", "16")) - var length: Int = _ - - @Param(Array("2", "3")) - var n: Int = _ - - var string: String = _ - var charArray: Array[Char] = _ - - override protected def setUp() { - string = Random.alphanumeric.take(length).mkString - charArray = string.toCharArray - } - - def timeComputeWithCharArray(reps: Int) = run(reps) { - NGramTokenizer(n).tokenize(charArray) - } - - def timeComputeWithString(reps: Int) = run(reps) { - NGramTokenizer(n).tokenize(string) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala deleted file mode 100755 index 84f136d..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Algorithm.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.rockymadden.stringmetric - -object Algorithm { - import Transform.StringTransform - - - trait Algorithm[A] { - def compute(a: A): Option[A] - } - - - trait StringAlgorithm extends Algorithm[Array[Char]] { - def compute(a: String): Option[String] - } - - - object StringAlgorithm { - final val Metaphone = phonetic.MetaphoneAlgorithm - final val Nysiis = phonetic.NysiisAlgorithm - final val RefinedNysiis = phonetic.RefinedNysiisAlgorithm - final val RefinedSoundex = phonetic.RefinedSoundexAlgorithm - final val Soundex = phonetic.SoundexAlgorithm - - def computeWithMetaphone(a: Array[Char]) = Metaphone.compute(a) - - def computeWithNysiis(a: Array[Char]) = Nysiis.compute(a) - - def computeWithRefinedNysiis(a: Array[Char]) = RefinedNysiis.compute(a) - - def computeWithRefinedSoundex(a: Array[Char]) = RefinedSoundex.compute(a) - - def computeWithSoundex(a: Array[Char]) = Soundex.compute(a) - } - - - final class StringAlgorithmDecorator(val sa: StringAlgorithm) { - val withTransform: (StringTransform => StringAlgorithm) = (st) => new StringAlgorithm { - private[this] val self: StringAlgorithm = sa - private[this] val transform: StringTransform = st - - override def compute(a: Array[Char]): Option[Array[Char]] = self.compute(transform(a)) - - override def compute(a: String): Option[String] = self.compute(transform(a.toCharArray)).map(_.mkString) - } - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala deleted file mode 100755 index 5e666d2..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Alphabet.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.rockymadden.stringmetric - -import scala.collection.immutable.Set - -object Alphabet { - sealed abstract class AlphabetSet(val chars: Set[Char]) { - def isSuperset(a: Char): Boolean = chars.contains(a) - - def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length - - def isSuperset(a: String): Boolean = isSuperset(a.toCharArray) - } - - - case object LowercaseConsonant extends AlphabetSet( - Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') - ) - - case object UppercaseConsonant extends AlphabetSet( - Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') - ) - - case object Consonant extends AlphabetSet(LowercaseConsonant.chars ++ UppercaseConsonant.chars) - - case object LowercaseVowel extends AlphabetSet(Set('a', 'e', 'i', 'o', 'u')) - - case object UppercaseVowel extends AlphabetSet(Set('A', 'E', 'I', 'O', 'U')) - - case object Vowel extends AlphabetSet(LowercaseVowel.chars ++ UppercaseVowel.chars) - - case object LowercaseY extends AlphabetSet(Set('y')) - - case object UppercaseY extends AlphabetSet(Set('Y')) - - case object Y extends AlphabetSet(LowercaseY.chars ++ UppercaseY.chars) - - case object LowercaseAlpha extends AlphabetSet(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars) - - case object UppercaseAlpha extends AlphabetSet(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars) - - case object Alpha extends AlphabetSet(LowercaseAlpha.chars ++ UppercaseAlpha.chars) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala deleted file mode 100755 index f45dd14..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Metric.scala +++ /dev/null @@ -1,77 +0,0 @@ -package com.rockymadden.stringmetric - -object Metric { - import Transform.StringTransform - - - trait Metric[A, B] { - def compare(a: A, b: A): Option[B] - } - - - trait StringMetric[A] extends Metric[Array[Char], A] { - def compare(a: String, b: String): Option[A] - } - - - object StringMetric { - final val DiceSorensen = similarity.DiceSorensenMetric - final val Hamming = similarity.HammingMetric - final val Jaccard = similarity.JaccardMetric - final val Jaro = similarity.JaroMetric - final val JaroWinkler = similarity.JaroWinklerMetric - final val Levenshtein = similarity.LevenshteinMetric - final val Metaphone = phonetic.MetaphoneMetric - final val NGram = similarity.NGramMetric - final val Nysiis = phonetic.NysiisMetric - final val Overlap = similarity.OverlapMetric - final val RefinedNysiis = phonetic.RefinedNysiisMetric - final val RefinedSoundex = phonetic.RefinedSoundexMetric - final val Soundex = phonetic.SoundexMetric - final val WeightedLevenshtein = similarity.WeightedLevenshteinMetric - - def compareWithDiceSorensen(n: Int)(a: Array[Char], b: Array[Char]) = DiceSorensen(n).compare(a, b) - - def compareWithHamming(a: Array[Char], b: Array[Char]) = Hamming.compare(a, b) - - def compareWithJaccard(n: Int)(a: Array[Char], b: Array[Char]) = Jaccard(n).compare(a, b) - - def compareWithJaro(a: Array[Char], b: Array[Char]) = Jaro.compare(a, b) - - def compareWithJaroWinkler(a: Array[Char], b: Array[Char]) = JaroWinkler.compare(a, b) - - def compareWithLevenshtein(a: Array[Char], b: Array[Char]) = Levenshtein.compare(a, b) - - def compareWithMetaphone(a: Array[Char], b: Array[Char]) = Metaphone.compare(a, b) - - def compareWithNGram(n: Int)(a: Array[Char], b: Array[Char]) = NGram(n).compare(a, b) - - def compareWithNysiis(a: Array[Char], b: Array[Char]) = Nysiis.compare(a, b) - - def compareWithOverlap(n: Int)(a: Array[Char], b: Array[Char]) = Overlap(n).compare(a, b) - - def compareWithRefinedNysiis(a: Array[Char], b: Array[Char]) = RefinedNysiis.compare(a, b) - - def compareWithRefinedSoundex(a: Array[Char], b: Array[Char]) = RefinedSoundex.compare(a, b) - - def compareWithSoundex(a: Array[Char], b: Array[Char]) = Soundex.compare(a, b) - - def compareWithWeightedLevenshtein(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) - (a: Array[Char], b: Array[Char]) = - - WeightedLevenshtein(delete, insert, substitute).compare(a, b) - } - - final class StringMetricDecorator[A](val sm: StringMetric[A]) { - val withTransform: (StringTransform => StringMetric[A]) = (st) => new StringMetric[A] { - private[this] val self: StringMetric[A] = sm - private[this] val transform: StringTransform = st - - override def compare(a: Array[Char], b: Array[Char]): Option[A] = - self.compare(transform(a), transform(b)) - - override def compare(a: String, b: String): Option[A] = - self.compare(transform(a.toCharArray), transform(b.toCharArray)) - } - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala b/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala deleted file mode 100755 index a011c96..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Tokenize.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.rockymadden.stringmetric - -object Tokenize { - sealed trait Tokenizer[A] { - def tokenize(a: A): Option[Array[A]] - } - - - sealed trait StringTokenizer extends Tokenizer[Array[Char]] { - def tokenize(a: String): Option[Array[String]] - } - - - object StringTokenizer { - val NGram = NGramTokenizer - - def tokenizeWithNGram(n: Int)(charArray: Array[Char]) = NGram(n).tokenize(charArray) - } - - - final case class NGramTokenizer(n: Int) extends StringTokenizer { - override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = - if (n <= 0 || a.length < n) None - else Some(sequence(a, Array.empty[Array[Char]], n)) - - override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString)) - - @annotation.tailrec - private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) => - if (i.length <= n) o :+ i - else sequence(i.tail, o :+ i.take(n), n) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala b/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala deleted file mode 100644 index c2cdace..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/Transform.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.rockymadden.stringmetric - -object Transform { - import scala.collection.immutable.NumericRange - - - type Transform[A] = (A => A) - type StringTransform = Transform[Array[Char]] - - - object StringTransform { - private final val Ascii = NumericRange(0x00, 0x7F, 1) - private final val ExtendedAscii = NumericRange(0x00, 0x7F, 1) - private final val Latin = NumericRange(0x00, 0x24F, 1) - private final val LowerCase = NumericRange(0x61, 0x7A, 1) - private final val Numbers = NumericRange(0x30, 0x39, 1) - private final val UpperCase = NumericRange(0x41, 0x5A, 1) - - private final val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filter(c => f(c)).mkString - - private final val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filterNot(c => f(c)).mkString - - val filterAlpha: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) - }) - - val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) - }) - - val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) - }) - - val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) - }) - - val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) - - val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) - - val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) - - val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) - - val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) - - val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) - - val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) - - val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) - - val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) - - val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) - - val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) - - val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) - - val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/package.scala b/core/source/main/scala/com/rockymadden/stringmetric/package.scala deleted file mode 100755 index e5bc19d..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/package.scala +++ /dev/null @@ -1,17 +0,0 @@ -package com.rockymadden - -package object stringmetric { - import scala.language.implicitConversions - import Algorithm._ - import Metric._ - - type CompareTuple[T] = (Array[T], Array[T]) - type MatchTuple[T] = (Array[T], Array[T]) - - implicit def stringToCharArray(s: String): Array[Char] = - s.toCharArray - implicit def stringAlgorithmToDecoratedStringAlgorithm(sa: StringAlgorithm): StringAlgorithmDecorator = - new StringAlgorithmDecorator(sa) - implicit def stringMetricToDecoratedStringMetric[A](sa: StringMetric[A]): StringMetricDecorator[A] = - new StringMetricDecorator[A](sa) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala deleted file mode 100755 index 3abe7cc..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala +++ /dev/null @@ -1,105 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm - -case object MetaphoneAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - - override def compute(a: Array[Char]): Option[Array[Char]] = - if (a.length == 0 || !(Alpha isSuperset a.head)) None - else { - val th = (transcodeHead andThen deduplicate)(a.map(_.toLower)) - val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) - - if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. - } - - override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - - private val deduplicate: (Array[Char] => Array[Char]) = (ca) => - if (ca.length <= 1) ca - else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last - - @annotation.tailrec - private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => - if (c == '\0' && r.length == 0) o - else { - def shift(d: Int, ca: Array[Char]) = { - val sca = r.splitAt(d - 1) - - ( - if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, - if (sca._2.length > 0) sca._2.head else '\0', - if (sca._2.length > 1) sca._2.tail else Array.empty[Char], - ca - ) - } - - val t = { - (c: @annotation.switch) match { - case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o) - case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c) - case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b') - case 'c' => - if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k') - else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x') - else if ((r.length >= 1 && r.head == 'h') - || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x') - else if (l.length >= 1 && r.length >= 1 && l.last == 's' - && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o) - else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's') - else shift(1, o :+ 'k') - case 'd' => - if (r.length >= 2 && r.head == 'g' - && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j') - else shift(1, o :+ 't') - case 'g' => - if ((r.length > 1 && r.head == 'h') - || (r.length == 1 && r.head == 'n') - || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o) - else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j') - else shift(1, o :+ 'k') - case 'h' => - if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head))) - || (l.length >= 2 && l.last == 'h' - && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p' - || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o) - else shift(1, o :+ 'h') - case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k') - case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p') - case 'q' => shift(1, o :+ 'k') - case 's' => - if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x') - else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x') - else shift(1, o :+ 's') - case 't' => - if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x') - else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0') - else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o) - else shift(1, o :+ 't') - case 'v' => shift(1, o :+ 'f') - case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c) - case 'x' => shift(1, (o :+ 'k') :+ 's') - case 'z' => shift(1, o :+ 's') - case _ => shift(1, o) - } - } - - transcode(t._1, t._2, t._3, t._4) - } - - private val transcodeHead: (Array[Char] => Array[Char]) = (ca) => - (ca.length: @annotation.switch) match { - case 0 => ca - case 1 => if (ca.head == 'x') Array('s') else ca - case _ => - (ca.head: @annotation.switch) match { - case 'a' if ca(1) == 'e' => ca.tail - case 'g' | 'k' | 'p' if ca(1) == 'n' => ca.tail - case 'w' if ca(1) == 'r' => ca.tail - case 'w' if ca(1) == 'h' => 'w' +: ca.drop(2) - case 'x' => 's' +: ca.tail - case _ => ca - } - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala deleted file mode 100755 index d06f774..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object MetaphoneMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = - if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None - else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap { mp1 => - MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_)) - } - - override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala deleted file mode 100755 index 3e46675..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala +++ /dev/null @@ -1,115 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm - -case object NysiisAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - - override def compute(a: Array[Char]): Option[Array[Char]] = - if (a.length == 0 || !(Alpha isSuperset a.head)) None - else { - val tr = transcodeRight(a.map(_.toLower)) - val tl = transcodeLeft(tr._1) - val t = - if (tl._2.length == 0) tl._1 ++ tr._2 - else tl._1 ++ transcodeCenter( - Array.empty[Char], - tl._2.head, - if (tl._2.length > 1) tl._2.tail else Array.empty[Char], - Array.empty[Char] - ) ++ tr._2 - - if (t.length == 1) Some(t) - else Some(t.head +: (cleanLast andThen cleanTerminal andThen deduplicate)(t.tail)) - } - - override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - - private val cleanLast: (Array[Char] => Array[Char]) = (ca) => - if (ca.length == 0) ca - else if(ca.last == 'a' || ca.last == 's') - ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length) - else ca - - private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) => - if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' - else ca - - private val deduplicate: (Array[Char] => Array[Char]) = (ca) => - if (ca.length <= 1) ca - else ca.sliding(2).withFilter(a => a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last - - @annotation.tailrec - private val transcodeCenter: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => - if (c == '\0' && r.length == 0) o - else { - def shift(d: Int, ca: Array[Char]) = { - val sca = r.splitAt(d - 1) - - ( - if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, - if (sca._2.length > 0) sca._2.head else '\0', - if (sca._2.length > 1) sca._2.tail else Array.empty[Char], - ca - ) - } - - val t = { - (c: @annotation.switch) match { - case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') - case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c) - case 'e' => - if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) - else shift(1, o :+ 'a') - case 'h' => - if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))) - shift(1, o) - else shift(1, o :+ c) - case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') - case 'm' => shift(1, o :+ 'n') - case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) - case 'q' => shift(1, o :+ 'g') - case 's' => - if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) - else shift(1, o :+ c) - case 'w' => - if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) - else shift(1, o :+ c) - case 'z' => shift(1, o :+ 's') - case _ => shift(1, o) - } - } - - transcodeCenter(t._1, t._2, t._3, t._4) - } - - private val transcodeLeft: (Array[Char] => (Array[Char], Array[Char])) = (ca) => - if (ca.length == 0) (Array.empty[Char], ca) - else { - lazy val tr2 = ca.takeRight(ca.length - 2) - lazy val tr3 = ca.takeRight(ca.length - 3) - - (ca.head: @annotation.switch) match { - case 'k' if ca.length >= 2 && ca(1) == 'n' => (Array('n', 'n'), tr2) - case 'k' => (Array('c'), ca.tail) - case 'm' if ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c') => (Array('m', 'c'), tr3) - case 'p' if ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f') => (Array('f', 'f'), tr2) - case 's' if ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h') => (Array('s', 's'), tr3) - case _ => (Array(ca.head), ca.tail) - } - } - - private val transcodeRight: (Array[Char] => (Array[Char], Array[Char])) = (ca) => - if (ca.length >= 2) { - val lc = ca(ca.length - 1) - val lcm1 = ca(ca.length - 2) - lazy val t2 = ca.take(ca.length - 2) - - (lc: @annotation.switch) match { - case 'd' if lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d')) - case 'e' if lcm1 == 'e' || lcm1 == 'i' => (t2, Array('y')) - case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d')) - case _ => (ca, Array.empty[Char]) - } - } else (ca, Array.empty[Char]) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala deleted file mode 100755 index c9a0914..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object NysiisMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { - val unequal: ((Char, Char) => Boolean) = (c1, c2) => { - val lc1 = c1.toLower - val lc2 = c2.toLower - - (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) - } - - if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None - else if (unequal(a.head, b.head)) Some(false) - else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { ny1 => - NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_)) - } - } - - override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala deleted file mode 100755 index 9976847..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala +++ /dev/null @@ -1,121 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm - -case object RefinedNysiisAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} - - override def compute(a: Array[Char]): Option[Array[Char]] = - if (a.length == 0 || !(Alpha isSuperset a.head)) None - else { - val lca = a.map(_.toLower) - val tlh = (transcodeHead andThen transcodeLast)(lca.head +: cleanLast(lca.tail, Set('s', 'z'))) - val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char]) - - if (t.length == 1) Some(t) - else Some(deduplicate( - t.head +: (cleanLast.tupled andThen cleanTerminal)(t.tail, Set('a')) - )) - } - - override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) - - private val cleanLast: ((Array[Char], Set[Char]) => Array[Char]) = (ca, s) => - if (ca.length == 0) ca - else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length) - else ca - - private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) => - if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' - else ca - - private val deduplicate: (Array[Char] => Array[Char]) = (ca) => - if (ca.length <= 1) ca - else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last - - @annotation.tailrec - private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => - if (c == '\0' && r.length == 0) o - else { - def shift(d: Int, ca: Array[Char]) = { - val sca = r.splitAt(d - 1) - - ( - if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, - if (sca._2.length > 0) sca._2.head else '\0', - if (sca._2.length > 1) sca._2.tail else Array.empty[Char], - ca - ) - } - - val t = { - (c: @annotation.switch) match { - case 'a' | 'i' | 'o' | 'u' => - if (l.length == 0) shift(1, o :+ c) - else shift(1, o :+ 'a') - case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c) - case 'd' => - if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c) - case 'e' => - if (l.length == 0) shift(1, o :+ c) - else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) - else shift(1, o :+ 'a') - case 'g' => - if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't')) - else shift(1, o :+ c) - case 'h' => - if (l.length == 0) shift(1, o :+ c) - else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))) - shift(1, o) - else shift(1, o :+ c) - case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') - case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n') - case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) - case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g') - case 's' => - if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) - else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c) - else shift(1, o :+ c) - case 'w' => - if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) - else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r') - else shift(1, o :+ c) - case 'y' => - if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a') - else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c) - else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a') - else shift(1, o :+ c) - case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's') - case _ => shift(1, o) - } - } - - transcode(t._1, t._2, t._3, t._4) - } - - private val transcodeHead: (Array[Char] => Array[Char]) = (ca) => - if (ca.length == 0) ca - else - (ca.head: @annotation.switch) match { - case 'm' if ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c' => - Array('m', 'c') ++ ca.takeRight(ca.length - 3) - case 'p' if ca.length >= 2 && ca(1) == 'f' =>'f' +: ca.takeRight(ca.length - 2) - case _ => ca - } - - private val transcodeLast: (Array[Char] => Array[Char]) = (ca) => - if (ca.length >= 2) { - val lc = ca(ca.length - 1) - val lcm1 = ca(ca.length - 2) - lazy val t2 = ca.take(ca.length - 2) - - (lc: @annotation.switch) match { - case 'd' if lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd' - case 'e' if lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y' => t2 :+ 'y' - case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd' - case 'x' if lcm1 == 'e' => t2 ++ Array('e', 'c') - case 'x' if lcm1 == 'i' => t2 ++ Array('i', 'c') - case _ => ca - } - } else ca -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala deleted file mode 100755 index 488f261..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object RefinedNysiisMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { - val unequal = (c1: Char, c2: Char) => { - val lc1 = c1.toLower - val lc2 = c2.toLower - - (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) - } - - if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None - else if (unequal(a.head, b.head)) Some(false) - else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { rny1 => - RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_)) - } - } - - override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala deleted file mode 100755 index e8f3af6..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala +++ /dev/null @@ -1,59 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm - -case object RefinedSoundexAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compute(a: Array[Char]): Option[Array[Char]] = - if (a.length == 0 || !(Alpha isSuperset a.head)) None - else Some(transcode(a, Array(a.head.toLower))) - - override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) - - @annotation.tailrec - private val transcode: ((Array[Char], Array[Char]) => Array[Char]) = (i, o) => - if (i.length == 0) o - else { - val c = i.head.toLower - val m2 = (mc: Char) => (mc: @annotation.switch) match { - case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0' - case 'b' | 'p' => '1' - case 'f' | 'v' => '2' - case 'c' | 'k' | 's' => '3' - case 'g' | 'j' => '4' - case 'q' | 'x' | 'z' => '5' - case 'd' | 't' => '6' - case 'l' => '7' - case 'm' | 'n' => '8' - case 'r' => '9' - case _ => '\0' - } - val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match { - case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0' - case 'b' | 'p' if pc != '1' => '1' - case 'f' | 'v' if pc != '2' => '2' - case 'c' | 'k' | 's' if pc != '3' => '3' - case 'g' | 'j' if pc != '4' => '4' - case 'q' | 'x' | 'z' if pc != '5' => '5' - case 'd' | 't' if pc != '6' => '6' - case 'l' if pc != '7' => '7' - case 'm' | 'n' if pc != '8' => '8' - case 'r' if pc != '9' => '9' - case _ => '\0' - } - val a = - // Code twice. - if (o.length == 1) m2(c) - // Code once. - else m1( - c, - (o.last: @annotation.switch) match { - case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last - case _ => m2(o.last) - } - ) - - transcode(i.tail, if (a != '\0') o :+ a else o) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala deleted file mode 100755 index 289fe29..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object RefinedSoundexMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = - if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None - else if (a.head.toLower != b.head.toLower) Some(false) - else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { rse1 => - RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_)) - } - - override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala deleted file mode 100755 index b211908..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala +++ /dev/null @@ -1,57 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Algorithm.StringAlgorithm - -case object SoundexAlgorithm extends StringAlgorithm { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compute(a: Array[Char]): Option[Array[Char]] = - if (a.length == 0 || !(Alpha isSuperset a.head)) None - else { - val fc = a.head.toLower - - Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0')) - } - - override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) - - @annotation.tailrec - private val transcode: ((Array[Char], Char, Array[Char]) => Array[Char]) = (i, pc, o) => - if (i.length == 0) o - else { - val c = i.head.toLower - val m2 = (mc: Char) => (mc: @annotation.switch) match { - case 'b' | 'f' | 'p' | 'v' => '1' - case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' - case 'd' | 't' => '3' - case 'l' => '4' - case 'm' | 'n' => '5' - case 'r' => '6' - case _ => '\0' - } - val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match { - case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' - case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' - case 'd' | 't' if pc != '3' => '3' - case 'l' if pc != '4' => '4' - case 'm' | 'n' if pc != '5' => '5' - case 'r' if pc != '6' => '6' - case _ => '\0' - } - val a = pc match { - // Code twice. - case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) - // Code once. - case _ => m1( - c, - (o.last: @annotation.switch) match { - case '1' | '2' | '3' | '4' | '5' | '6' => o.last - case _ => m2(o.last) - } - ) - } - - if (o.length == 3 && a != '\0') o :+ a - else transcode(i.tail, c, if (a != '\0') o :+ a else o) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala deleted file mode 100755 index eca32db..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object SoundexMetric extends StringMetric[Boolean] { - import com.rockymadden.stringmetric.Alphabet.Alpha - - override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = - if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None - else if (a.head.toLower != b.head.toLower) Some(false) - else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { se1 => - SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_)) - } - - final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala deleted file mode 100755 index 0ad3915..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -/** - * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required. - * Traditionally, the algorithm uses bigrams. - */ -final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import com.rockymadden.stringmetric.MatchTuple - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. - else if (a.sameElements(b)) Some(1d) - else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => - NGramTokenizer(n).tokenize(b).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - - (2d * ms) / (ca1bg.length + ca2bg.length) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala deleted file mode 100755 index 4a90f32..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object HammingMetric extends StringMetric[Int] { - import com.rockymadden.stringmetric.CompareTuple - - override def compare(a: Array[Char], b: Array[Char]): Option[Int] = - if (a.length == 0 || b.length == 0 || a.length != b.length) None - else if (a.sameElements(b)) Some(0) - else Some(hamming(a, b)) - - override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) - - private val hamming: (CompareTuple[Char] => Int) = (ct) => - if (ct._1.length == 0) 0 - else ct._1.zip(ct._2).count(t => t._1 != t._2) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala deleted file mode 100755 index 6ec5db4..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ /dev/null @@ -1,20 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -final case class JaccardMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. - else if (a.sameElements(b)) Some(1d) - else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => - NGramTokenizer(n).tokenize(b).map { ca2bg => - val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length - - i.toDouble / (ca1bg.length + ca2bg.length - i) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala deleted file mode 100755 index 575d67a..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric -import scala.Some - -/** - * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched - * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios. - */ -case object JaroMetric extends StringMetric[Double] { - import com.rockymadden.stringmetric.{CompareTuple, MatchTuple} - import scala.collection.mutable.{ArrayBuffer, HashSet} - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (a.length == 0 || b.length == 0) None - else if (a.sameElements(b)) Some(1d) - else { - val mt = `match`(a, b) - val ms = scoreMatches(mt._1, mt._2) - - if (ms == 0) Some(0d) - else { - val ts = scoreTranspositions(mt._1, mt._2) - - Some(((ms.toDouble / a.length) + (ms.toDouble / b.length) + ((ms.toDouble - ts) / ms)) / 3) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private val `match`: (CompareTuple[Char] => MatchTuple[Char]) = (ct) => { - lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) - val one = ArrayBuffer.empty[Int] - val two = HashSet.empty[Int] - var i = 0 - var bi = false - - while (i < ct._1.length && !bi) { - val start = if (i - window <= 0) 0 else i - window - val end = if (i + window >= ct._2.length - 1) ct._2.length - 1 else i + window - - if (start > ct._2.length - 1) bi = !bi - else { - var ii = start - var bii = false - - while (ii <= end && !bii) { - if (!two.contains(ii) && ct._1(i) == ct._2(ii)) { - one += i - two += ii - bii = !bii - } else ii += 1 - } - - i += 1 - } - } - - (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_))) - } - - private val scoreMatches: (MatchTuple[Char] => Int) = (mt) => mt._1.length - - private val scoreTranspositions: (MatchTuple[Char] => Int) = (mt) => - (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala deleted file mode 100755 index e83f73f..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -/** - * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is - * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios - * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722). - */ -case object JaroWinklerMetric extends StringMetric[Double] { - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - JaroMetric.compare(a, b).map { - case 0d => 0d - case 1d => 1d - case jaro => { - val prefix = a.zip(b).takeWhile(t => t._1 == t._2) - - jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro)) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala deleted file mode 100755 index fb90cdc..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object LevenshteinMetric extends StringMetric[Int] { - import com.rockymadden.stringmetric.CompareTuple - - override def compare(a: Array[Char], b: Array[Char]): Option[Int] = - if (a.length == 0 || b.length == 0) None - else if (a.sameElements(b)) Some(0) - else Some(levenshtein(a, b)) - - override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) - - private val levenshtein: (CompareTuple[Char] => Int) = (ct) => { - val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1) - - def distance(t: (Int, Int)): Int = t match { - case (r, 0) => r - case (0, c) => c - case (r, c) if m(r)(c) != -1 => m(r)(c) - case (r, c) => { - val min = - if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1) - else math.min( - math.min( - distance(r - 1, c) + 1, // Delete (left). - distance(r, c - 1) + 1 // Insert (up). - ), - distance(r - 1, c - 1) + 1 // Substitute (left-up). - ) - - m(r)(c) = min - min - } - } - - distance(ct._1.length, ct._2.length) - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala deleted file mode 100755 index 8c194ce..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -final case class NGramMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.MatchTuple - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import scala.math - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. - else if (a.sameElements(b)) Some(1d) - else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => - NGramTokenizer(n).tokenize(b).map { ca2bg => - val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) - - ms.toDouble / math.max(ca1bg.length, ca2bg.length) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala deleted file mode 100755 index 8f0418b..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -final case class OverlapMetric(n: Int) extends StringMetric[Double] { - import com.rockymadden.stringmetric.MatchTuple - import com.rockymadden.stringmetric.Tokenize.NGramTokenizer - import scala.math - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. - else if (a.sameElements(b)) Some(1d) - else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => - NGramTokenizer(n).tokenize(b).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - - ms.toDouble / math.min(ca1bg.length, ca2bg.length) - } - } - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala deleted file mode 100755 index fa113bc..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -case object RatcliffObershelpMetric extends StringMetric[Double] { - import com.rockymadden.stringmetric.CompareTuple - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (a.length == 0 || b.length == 0) None - else if (a.sameElements(b)) Some(1d) - else Some(2d * commonSequences(a, b).foldLeft(0)(_ + _.length) / (a.length + b.length)) - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private def longestCommonSubsequence(ct: CompareTuple[Char]) = { - val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1) - var lrc = (0, 0, 0) // Length, row, column. - - for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) { - if (ct._1(r) == ct._2(c)) { - val l = m(r)(c) + 1 - m(r + 1)(c + 1) = l - if (l > lrc._1) lrc = (l, r + 1, c + 1) - } - } - - lrc - } - - private val commonSequences: (CompareTuple[Char] => Array[Array[Char]]) = (ct) => { - val lcs = longestCommonSubsequence(ct) - - if (lcs._1 == 0) Array.empty - else { - val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2)) - val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3)) - - Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ - commonSequences(sct1._1, sct2._1) ++ - commonSequences(sct1._2, sct2._2) - } - } -} diff --git a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala deleted file mode 100755 index ae6f49c..0000000 --- a/core/source/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala +++ /dev/null @@ -1,36 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.Metric.StringMetric - -final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) - extends StringMetric[Double] { - - import com.rockymadden.stringmetric.CompareTuple - - override def compare(a: Array[Char], b: Array[Char]): Option[Double] = - if (a.length == 0 || b.length == 0) None - else if (a.sameElements(b)) Some(0d) - else Some(weightedLevenshtein((a, b), (delete, insert, substitute)).toDouble) - - override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) - - private val weightedLevenshtein: ((CompareTuple[Char], (BigDecimal, BigDecimal, BigDecimal)) => BigDecimal) = - (ct, w) => { - val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1) - - for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r - for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c - - for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) { - m(r)(c) = - if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1) - else (m(r - 1)(c) + w._1).min( // Delete (left). - (m(r)(c - 1) + w._2).min( // Insert (up). - m(r - 1)(c - 1) + w._3 // Substitute (left-up). - ) - ) - } - - m(ct._1.length)(ct._2.length) - } -} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala deleted file mode 100644 index d727145..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.rockymadden.stringmetric - -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class AlgorithmSpec extends ScalaTest { - import phonetic._ - import Algorithm._ - import Transform.StringTransform - - "StringAlgorithm" should provide { - "compute method and companion object pass through" in { - StringAlgorithm.computeWithMetaphone("testone").get should - equal (MetaphoneAlgorithm.compute("testone".toCharArray).get) - StringAlgorithm.computeWithNysiis("testone").get should - equal (NysiisAlgorithm.compute("testone".toCharArray).get) - StringAlgorithm.computeWithRefinedNysiis("testone").get should - equal (RefinedNysiisAlgorithm.compute("testone".toCharArray).get) - StringAlgorithm.computeWithRefinedSoundex("testone").get should - equal (RefinedSoundexAlgorithm.compute("testone".toCharArray).get) - StringAlgorithm.computeWithSoundex("testone").get should - equal (SoundexAlgorithm.compute("testone".toCharArray).get) - } - } - - "StringAlgorithmDecorator" should provide { - "withTransform()" in { - (MetaphoneAlgorithm withTransform StringTransform.filterAlpha).compute("abc123").get should - equal (MetaphoneAlgorithm.compute("abc").get) - } - } -} - diff --git a/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala deleted file mode 100755 index 5a6a1dc..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala +++ /dev/null @@ -1,95 +0,0 @@ -package com.rockymadden.stringmetric - -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class AlphabetSpec extends ScalaTest { "Alphabet" should provide { - import Alphabet.{Alpha, Vowel} - - "an overloaded isSuperset method which accepts Char" when passed { - "non-alphabet argument" should returns { - "false" in { - Alpha isSuperset '0' should be (false) - } - } - "alphabet argument" should returns { - "true" in { - Alpha isSuperset 'a' should be (true) - Alpha isSuperset 'A' should be (true) - } - } - "non-vowel argument" should returns { - "false" in { - Vowel isSuperset 'y' should be (false) - } - } - "vowel argument" should returns { - "true" in { - Vowel isSuperset 'a' should be (true) - Vowel isSuperset 'A' should be (true) - } - } - } - "an overloaded isSuperset method which accepts Array[Char]" when passed { - "empty argument" should returns { - "false" in { - Alpha isSuperset Array.empty[Char] should be (false) - } - } - "non-alphabet argument" should returns { - "false" in { - Alpha isSuperset "hi!".toCharArray should be (false) - Alpha isSuperset "helloworld!".toCharArray should be (false) - } - } - "alphabet argument" should returns { - "true" in { - Alpha isSuperset "hi".toCharArray should be (true) - Alpha isSuperset "helloworld".toCharArray should be (true) - Alpha isSuperset "HI".toCharArray should be (true) - Alpha isSuperset "HELLOWORLD".toCharArray should be (true) - } - } - "non-vowel argument" should returns { - "false" in { - Vowel isSuperset "y".toCharArray should be (false) - } - } - "vowel argument" should returns { - "true" in { - Vowel isSuperset "a".toCharArray should be (true) - Vowel isSuperset "A".toCharArray should be (true) - } - } - } - "an overloaded isSuperset method which accepts String" when passed { - "empty argument" should returns { - "false" in { - Alpha isSuperset "" should be (false) - } - } - "non-alphabet argument" should returns { - "false" in { - Alpha isSuperset "helloworld!" should be (false) - } - } - "alphabet argument" should returns { - "true" in { - Alpha isSuperset "helloworld" should be (true) - Alpha isSuperset "HELLOWORLD" should be (true) - } - } - "non-vowel argument" should returns { - "false" in { - Vowel isSuperset "y" should be (false) - } - } - "vowel argument" should returns { - "true" in { - Vowel isSuperset "a" should be (true) - Vowel isSuperset "A" should be (true) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/MetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/MetricSpec.scala deleted file mode 100644 index 3b9021d..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/MetricSpec.scala +++ /dev/null @@ -1,56 +0,0 @@ -package com.rockymadden.stringmetric - -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class MetricSpec extends ScalaTest { - import phonetic._ - import similarity._ - import Metric._ - import Transform.StringTransform - - "StringMetric standalone object" should provide { - "compare method and companion object pass through" in { - StringMetric.compareWithDiceSorensen(1)("testone", "testtwo").get should - equal (DiceSorensenMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithHamming("testone", "testtwo").get should - equal (HammingMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithJaccard(1)("testone", "testtwo").get should - equal (JaccardMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithJaro("testone", "testtwo").get should - equal (JaroMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithJaroWinkler("testone", "testtwo").get should - equal (JaroWinklerMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithLevenshtein("testone", "testtwo").get should - equal (LevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithMetaphone("testone", "testtwo").get should - equal (MetaphoneMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithNGram(1)("testone", "testtwo").get should - equal (NGramMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithNysiis("testone", "testtwo").get should - equal (NysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithOverlap(1)("testone", "testtwo").get should - equal (OverlapMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithRefinedNysiis("testone", "testtwo").get should - equal (RefinedNysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithRefinedSoundex("testone", "testtwo").get should - equal (RefinedSoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithSoundex("testone", "testtwo").get should - equal (SoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) - StringMetric.compareWithWeightedLevenshtein(1, 2, 3)("testone", "testtwo").get should - equal (WeightedLevenshteinMetric(1, 2, 3).compare("testone".toCharArray, "testtwo".toCharArray).get) - } - } - - "StringMetricDecorator" should provide { - "withTransform()" in { - (MetaphoneMetric withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get should - equal (true) - (DiceSorensenMetric(1) withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get should - equal (1.0) - - (MetaphoneMetric withTransform (StringTransform.filterAlpha andThen StringTransform.filterUpperCase)).compare("abc123", "abc456") - } - } -} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala b/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala deleted file mode 100755 index 5f4ab62..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/ScalaTest.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.rockymadden.stringmetric - -import org.scalatest.{BeforeAndAfter, ParallelTestExecution, WordSpec} -import org.scalatest.matchers.ShouldMatchers - -trait ScalaTest extends WordSpec with ShouldMatchers with BeforeAndAfter with ParallelTestExecution { - def allows = afterWord("allow") - - def executes = afterWord("execute") - - def passed = afterWord("passed") - - def provide = afterWord("provide") - - def returns = afterWord("return") - - def throws = afterWord("throw") -} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala deleted file mode 100755 index cfba0f7..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.rockymadden.stringmetric - -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class TokenizeSpec extends ScalaTest { "NGramTokenizer" should provide { - import Tokenize._ - - "tokenize method" when passed { - "empty argument" should returns { - "None" in { - NGramTokenizer(1).tokenize("").isDefined should be (false) - } - } - "invalid n argument" should returns { - "None" in { - NGramTokenizer(0).tokenize("").isDefined should be (false) - NGramTokenizer(-1).tokenize("").isDefined should be (false) - } - } - "valid argument" should returns { - "Array[String]" in { - NGramTokenizer(1).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( - Array( - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", - "s", "t", "u", "v", "w", "x", "y", "z" - ) - ) - NGramTokenizer(2).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( - Array( - "ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op", - "pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz" - ) - ) - NGramTokenizer(3).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( - Array( - "abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno", - "nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz" - ) - ) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/TransformSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/TransformSpec.scala deleted file mode 100644 index 5e79c62..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/TransformSpec.scala +++ /dev/null @@ -1,181 +0,0 @@ -package com.rockymadden.stringmetric - -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class TransformSpec extends ScalaTest { "StringTransform" should provide { - import Transform._ - - "filterAlpha()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterAlpha( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ("aBc".toCharArray) - } - } - } - "filterNotAlpha()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotAlpha( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("123" + 0x250.toChar).toCharArray - ) - } - } - } - "filterAlphaNumeric()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterAlphaNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ("aBc123".toCharArray) - } - } - } - "filterNotAlphaNumeric()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotAlphaNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("" + 0x250.toChar).toCharArray - ) - } - } - } - "filterAscii()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterAscii( - ("aBc" + 0x80.toChar).toCharArray - ) should equal ("aBc".toCharArray) - } - } - } - "filterNotAscii()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotAscii( - ("aBc" + 0x100.toChar).toCharArray - ) should equal ( - ("" + 0x100.toChar).toCharArray - ) - } - } - } - "filterExtendedAscii()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterExtendedAscii( - ("aBc" + 0x100.toChar).toCharArray - ) should equal ("aBc".toCharArray) - } - } - } - "filterNotExtendedAscii()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotExtendedAscii( - ("aBc" + 0x250.toChar).toCharArray - ) should equal ( - ("" + 0x250.toChar).toCharArray - ) - } - } - } - "filterLatin()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterLatin( - ("aBc" + 0x250.toChar).toCharArray - ) should equal ("aBc".toCharArray) - } - } - } - "filterNotLatin()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotLatin( - ("aBc" + 0x300.toChar).toCharArray - ) should equal ( - ("" + 0x300.toChar).toCharArray - ) - } - } - } - "filterLowerCase()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterLowerCase( - "aBc123" + 0x250.toChar - ) should equal ("ac".toCharArray) - } - } - } - "filterNotLowerCase()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotLowerCase( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("B123" + 0x250.toChar).toCharArray - ) - } - } - } - "filterNumeric()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ("123".toCharArray) - } - } - } - "filterNotNumeric()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("aBc" + 0x250.toChar).toCharArray - ) - } - } - } - "filterUpperCase()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterUpperCase( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ("B".toCharArray) - } - } - } - "filterNotUpperCase()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.filterNotUpperCase( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("ac123" + 0x250.toChar).toCharArray - ) - } - } - } - "ignoreAlphaCase()" when passed { - "String" should returns { - "transformed String" in { - StringTransform.ignoreAlphaCase( - ("aBc123" + 0x250.toChar).toCharArray - ) should equal ( - ("abc123" + 0x250.toChar).toCharArray - ) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala deleted file mode 100755 index 6f56b6d..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala +++ /dev/null @@ -1,211 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class MetaphoneAlgorithmSpec extends ScalaTest { "MetaphoneAlgorithm" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - MetaphoneAlgorithm.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - MetaphoneAlgorithm.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // z - MetaphoneAlgorithm.compute("z").get should equal ("s") - MetaphoneAlgorithm.compute("zz").get should equal ("s") - - // y - MetaphoneAlgorithm.compute("y").isDefined should be (false) - MetaphoneAlgorithm.compute("zy").get should equal ("s") - MetaphoneAlgorithm.compute("zyz").get should equal ("ss") - MetaphoneAlgorithm.compute("zya").get should equal ("sy") - - // x - MetaphoneAlgorithm.compute("x").get should equal ("s") - MetaphoneAlgorithm.compute("zx").get should equal ("sks") - MetaphoneAlgorithm.compute("zxz").get should equal ("skss") - - // w - MetaphoneAlgorithm.compute("w").isDefined should be (false) - MetaphoneAlgorithm.compute("zw").get should equal ("s") - MetaphoneAlgorithm.compute("zwz").get should equal ("ss") - MetaphoneAlgorithm.compute("zwa").get should equal ("sw") - - // v - MetaphoneAlgorithm.compute("v").get should equal ("f") - MetaphoneAlgorithm.compute("zv").get should equal ("sf") - MetaphoneAlgorithm.compute("zvz").get should equal ("sfs") - - // u - MetaphoneAlgorithm.compute("u").get should equal ("u") - MetaphoneAlgorithm.compute("zu").get should equal ("s") - - // t - MetaphoneAlgorithm.compute("t").get should equal ("t") - MetaphoneAlgorithm.compute("ztiaz").get should equal ("sxs") - MetaphoneAlgorithm.compute("ztioz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zthz").get should equal ("s0s") - MetaphoneAlgorithm.compute("ztchz").get should equal ("sxs") - MetaphoneAlgorithm.compute("ztz").get should equal ("sts") - - // s - MetaphoneAlgorithm.compute("s").get should equal ("s") - MetaphoneAlgorithm.compute("zshz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zsioz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zsiaz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zs").get should equal ("ss") - MetaphoneAlgorithm.compute("zsz").get should equal ("sss") - - // r - MetaphoneAlgorithm.compute("r").get should equal ("r") - MetaphoneAlgorithm.compute("zr").get should equal ("sr") - MetaphoneAlgorithm.compute("zrz").get should equal ("srs") - - // q - MetaphoneAlgorithm.compute("q").get should equal ("k") - MetaphoneAlgorithm.compute("zq").get should equal ("sk") - MetaphoneAlgorithm.compute("zqz").get should equal ("sks") - - // p - MetaphoneAlgorithm.compute("p").get should equal ("p") - MetaphoneAlgorithm.compute("zp").get should equal ("sp") - MetaphoneAlgorithm.compute("zph").get should equal ("sf") - MetaphoneAlgorithm.compute("zpz").get should equal ("sps") - - // o - MetaphoneAlgorithm.compute("o").get should equal ("o") - MetaphoneAlgorithm.compute("zo").get should equal ("s") - - // n - MetaphoneAlgorithm.compute("n").get should equal ("n") - MetaphoneAlgorithm.compute("zn").get should equal ("sn") - MetaphoneAlgorithm.compute("znz").get should equal ("sns") - - // m - MetaphoneAlgorithm.compute("m").get should equal ("m") - MetaphoneAlgorithm.compute("zm").get should equal ("sm") - MetaphoneAlgorithm.compute("zmz").get should equal ("sms") - - // l - MetaphoneAlgorithm.compute("l").get should equal ("l") - MetaphoneAlgorithm.compute("zl").get should equal ("sl") - MetaphoneAlgorithm.compute("zlz").get should equal ("sls") - - // k - MetaphoneAlgorithm.compute("k").get should equal ("k") - MetaphoneAlgorithm.compute("zk").get should equal ("sk") - MetaphoneAlgorithm.compute("zck").get should equal ("sk") - - // j - MetaphoneAlgorithm.compute("j").get should equal ("j") - MetaphoneAlgorithm.compute("zj").get should equal ("sj") - MetaphoneAlgorithm.compute("zjz").get should equal ("sjs") - - // i - MetaphoneAlgorithm.compute("i").get should equal ("i") - MetaphoneAlgorithm.compute("zi").get should equal ("s") - - // h - MetaphoneAlgorithm.compute("h").get should equal ("h") // php wrongly says nothing - MetaphoneAlgorithm.compute("zh").get should equal ("sh") // php wrongly says s - MetaphoneAlgorithm.compute("zah").get should equal ("s") - MetaphoneAlgorithm.compute("zchh").get should equal ("sx") - MetaphoneAlgorithm.compute("ha").get should equal ("h") - - // g - MetaphoneAlgorithm.compute("g").get should equal ("k") - MetaphoneAlgorithm.compute("zg").get should equal ("sk") - MetaphoneAlgorithm.compute("zgh").get should equal ("skh") // php wrongly says sf - MetaphoneAlgorithm.compute("zghz").get should equal ("shs") // php wrongly says sfs - MetaphoneAlgorithm.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh - MetaphoneAlgorithm.compute("zgn").get should equal ("sn") - MetaphoneAlgorithm.compute("zgns").get should equal ("skns") - MetaphoneAlgorithm.compute("zgned").get should equal ("snt") // others wrongly says sknt - MetaphoneAlgorithm.compute("zgneds").get should equal ("sknts") // php wrongly says snts - MetaphoneAlgorithm.compute("zgi").get should equal ("sj") - MetaphoneAlgorithm.compute("zgiz").get should equal ("sjs") - MetaphoneAlgorithm.compute("zge").get should equal ("sj") - MetaphoneAlgorithm.compute("zgez").get should equal ("sjs") - MetaphoneAlgorithm.compute("zgy").get should equal ("sj") - MetaphoneAlgorithm.compute("zgyz").get should equal ("sjs") - MetaphoneAlgorithm.compute("zgz").get should equal ("sks") - - // f - MetaphoneAlgorithm.compute("f").get should equal ("f") - MetaphoneAlgorithm.compute("zf").get should equal ("sf") - MetaphoneAlgorithm.compute("zfz").get should equal ("sfs") - - // e - MetaphoneAlgorithm.compute("e").get should equal ("e") - MetaphoneAlgorithm.compute("ze").get should equal ("s") - - // d - MetaphoneAlgorithm.compute("d").get should equal ("t") - MetaphoneAlgorithm.compute("fudge").get should equal ("fjj") // php wrongly says fj - MetaphoneAlgorithm.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy - MetaphoneAlgorithm.compute("dodgi").get should equal ("tjj") // php wrongly says tj - MetaphoneAlgorithm.compute("zd").get should equal ("st") - MetaphoneAlgorithm.compute("zdz").get should equal ("sts") - - // c - MetaphoneAlgorithm.compute("c").get should equal ("k") - MetaphoneAlgorithm.compute("zcia").get should equal ("sx") - MetaphoneAlgorithm.compute("zciaz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zch").get should equal ("sx") - MetaphoneAlgorithm.compute("zchz").get should equal ("sxs") - MetaphoneAlgorithm.compute("zci").get should equal ("ss") - MetaphoneAlgorithm.compute("zciz").get should equal ("sss") - MetaphoneAlgorithm.compute("zce").get should equal ("ss") - MetaphoneAlgorithm.compute("zcez").get should equal ("sss") - MetaphoneAlgorithm.compute("zcy").get should equal ("ss") - MetaphoneAlgorithm.compute("zcyz").get should equal ("sss") - MetaphoneAlgorithm.compute("zsci").get should equal ("ss") - MetaphoneAlgorithm.compute("zsciz").get should equal ("sss") - MetaphoneAlgorithm.compute("zsce").get should equal ("ss") - MetaphoneAlgorithm.compute("zscez").get should equal ("sss") - MetaphoneAlgorithm.compute("zscy").get should equal ("ss") - MetaphoneAlgorithm.compute("zscyz").get should equal ("sss") - MetaphoneAlgorithm.compute("zsch").get should equal ("sskh") // php wrongly says ssx - MetaphoneAlgorithm.compute("zc").get should equal ("sk") - MetaphoneAlgorithm.compute("zcz").get should equal ("sks") - - // b - MetaphoneAlgorithm.compute("b").get should equal ("b") - MetaphoneAlgorithm.compute("zb").get should equal ("sb") - MetaphoneAlgorithm.compute("zbz").get should equal ("sbs") - MetaphoneAlgorithm.compute("zmb").get should equal ("sm") - - // a - MetaphoneAlgorithm.compute("a").get should equal ("a") - MetaphoneAlgorithm.compute("za").get should equal ("s") - - // Miscellaneous. - MetaphoneAlgorithm.compute("dumb").get should equal ("tm") - MetaphoneAlgorithm.compute("smith").get should equal ("sm0") - MetaphoneAlgorithm.compute("school").get should equal ("skhl") // php wrongly says sxl - MetaphoneAlgorithm.compute("merci").get should equal ("mrs") - MetaphoneAlgorithm.compute("cool").get should equal ("kl") - MetaphoneAlgorithm.compute("aebersold").get should equal ("ebrslt") - MetaphoneAlgorithm.compute("gnagy").get should equal ("nj") - MetaphoneAlgorithm.compute("knuth").get should equal ("n0") - MetaphoneAlgorithm.compute("pniewski").get should equal ("nsk") - MetaphoneAlgorithm.compute("wright").get should equal ("rht") // php wrongly says rft - MetaphoneAlgorithm.compute("phone").get should equal ("fn") - MetaphoneAlgorithm.compute("aggregate").get should equal ("akrkt") - MetaphoneAlgorithm.compute("accuracy").get should equal ("akkrs") - MetaphoneAlgorithm.compute("encyclopedia").get should equal ("ensklpt") - MetaphoneAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs") - MetaphoneAlgorithm.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm") - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala deleted file mode 100755 index 05401b0..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class MetaphoneMetricSpec extends ScalaTest { "MetaphoneMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - MetaphoneMetric.compare("", "").isDefined should be (false) - MetaphoneMetric.compare("abc", "").isDefined should be (false) - MetaphoneMetric.compare("", "xyz").isDefined should be (false) - } - } - "non-phonetic arguments" should returns { - "None" in { - MetaphoneMetric.compare("123", "123").isDefined should be (false) - MetaphoneMetric.compare("123", "").isDefined should be (false) - MetaphoneMetric.compare("", "123").isDefined should be (false) - } - } - "phonetically similar arguments" should returns { - "Boolean indicating true" in { - MetaphoneMetric.compare("dumb", "dum").get should be (true) - MetaphoneMetric.compare("smith", "smeth").get should be (true) - MetaphoneMetric.compare("merci", "mercy").get should be (true) - } - } - "phonetically dissimilar arguments" should returns { - "Boolean indicating false" in { - MetaphoneMetric.compare("dumb", "gum").get should be (false) - MetaphoneMetric.compare("smith", "kiss").get should be (false) - MetaphoneMetric.compare("merci", "burpy").get should be (false) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala deleted file mode 100755 index db8e073..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala +++ /dev/null @@ -1,189 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class NysiisAlgorithmSpec extends ScalaTest { "NysiisAlgorithm" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - NysiisAlgorithm.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - NysiisAlgorithm.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // a - NysiisAlgorithm.compute("a").get should equal ("a") - NysiisAlgorithm.compute("aa").get should equal ("a") - - // b - NysiisAlgorithm.compute("b").get should equal ("b") - NysiisAlgorithm.compute("bb").get should equal ("bb") - - // c - NysiisAlgorithm.compute("c").get should equal ("c") - NysiisAlgorithm.compute("cc").get should equal ("cc") - - // d - NysiisAlgorithm.compute("d").get should equal ("d") - NysiisAlgorithm.compute("dd").get should equal ("dd") - - // e - NysiisAlgorithm.compute("e").get should equal ("e") - NysiisAlgorithm.compute("ee").get should equal ("y") - - // f - NysiisAlgorithm.compute("f").get should equal ("f") - NysiisAlgorithm.compute("ff").get should equal ("ff") - - // g - NysiisAlgorithm.compute("g").get should equal ("g") - NysiisAlgorithm.compute("gg").get should equal ("gg") - - // h - NysiisAlgorithm.compute("h").get should equal ("h") - NysiisAlgorithm.compute("hh").get should equal ("hh") - - // i - NysiisAlgorithm.compute("i").get should equal ("i") - NysiisAlgorithm.compute("ii").get should equal ("i") - - // j - NysiisAlgorithm.compute("j").get should equal ("j") - NysiisAlgorithm.compute("jj").get should equal ("jj") - - // k - NysiisAlgorithm.compute("k").get should equal ("c") - NysiisAlgorithm.compute("kk").get should equal ("cc") - - // l - NysiisAlgorithm.compute("l").get should equal ("l") - NysiisAlgorithm.compute("ll").get should equal ("ll") - - // m - NysiisAlgorithm.compute("m").get should equal ("m") - NysiisAlgorithm.compute("mm").get should equal ("mn") - - // n - NysiisAlgorithm.compute("n").get should equal ("n") - NysiisAlgorithm.compute("nn").get should equal ("nn") - - // o - NysiisAlgorithm.compute("o").get should equal ("o") - NysiisAlgorithm.compute("oo").get should equal ("o") - - // p - NysiisAlgorithm.compute("p").get should equal ("p") - NysiisAlgorithm.compute("pp").get should equal ("pp") - - // q - NysiisAlgorithm.compute("q").get should equal ("q") - NysiisAlgorithm.compute("qq").get should equal ("qg") - - // r - NysiisAlgorithm.compute("r").get should equal ("r") - NysiisAlgorithm.compute("rr").get should equal ("rr") - - // s - NysiisAlgorithm.compute("s").get should equal ("s") - NysiisAlgorithm.compute("ss").get should equal ("s") - - // t - NysiisAlgorithm.compute("t").get should equal ("t") - NysiisAlgorithm.compute("tt").get should equal ("tt") - - // u - NysiisAlgorithm.compute("u").get should equal ("u") - NysiisAlgorithm.compute("uu").get should equal ("u") - - // v - NysiisAlgorithm.compute("v").get should equal ("v") - NysiisAlgorithm.compute("vv").get should equal ("vv") - - // w - NysiisAlgorithm.compute("w").get should equal ("w") - NysiisAlgorithm.compute("ww").get should equal ("ww") - - // x - NysiisAlgorithm.compute("x").get should equal ("x") - NysiisAlgorithm.compute("xx").get should equal ("xx") - - // y - NysiisAlgorithm.compute("y").get should equal ("y") - NysiisAlgorithm.compute("yy").get should equal ("yy") - - // z - NysiisAlgorithm.compute("z").get should equal ("z") - NysiisAlgorithm.compute("zz").get should equal ("z") - - // Head cases. - NysiisAlgorithm.compute("mac").get should equal ("mc") - NysiisAlgorithm.compute("kn").get should equal ("nn") - NysiisAlgorithm.compute("k").get should equal ("c") - NysiisAlgorithm.compute("ph").get should equal ("ff") - NysiisAlgorithm.compute("pf").get should equal ("ff") - NysiisAlgorithm.compute("sch").get should equal ("s") // dropby wrongly says ss - - // Last cases. - NysiisAlgorithm.compute("ee").get should equal ("y") - NysiisAlgorithm.compute("ie").get should equal ("y") - NysiisAlgorithm.compute("dt").get should equal ("d") - NysiisAlgorithm.compute("rt").get should equal ("d") - NysiisAlgorithm.compute("rd").get should equal ("d") - NysiisAlgorithm.compute("nt").get should equal ("d") - NysiisAlgorithm.compute("nd").get should equal ("d") - - // Core cases. - NysiisAlgorithm.compute("eev").get should equal ("eaf") - NysiisAlgorithm.compute("zev").get should equal ("zaf") - NysiisAlgorithm.compute("kkn").get should equal ("cn") - NysiisAlgorithm.compute("sschn").get should equal ("ssn") - NysiisAlgorithm.compute("pph").get should equal ("pf") - - // Miscellaneous. - NysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") - NysiisAlgorithm.compute("phone").get should equal ("ffan") - NysiisAlgorithm.compute("aggregate").get should equal ("agragat") - NysiisAlgorithm.compute("accuracy").get should equal ("acaracy") - NysiisAlgorithm.compute("encyclopedia").get should equal ("encyclapad") - NysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") - NysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") - - // Dropby. - NysiisAlgorithm.compute("macintosh").get should equal ("mcant") - NysiisAlgorithm.compute("knuth").get should equal ("nnat") - NysiisAlgorithm.compute("koehn").get should equal ("can") // dropby wrongly says c - NysiisAlgorithm.compute("phillipson").get should equal ("ffalapsan") - NysiisAlgorithm.compute("pfeister").get should equal ("ffastar") - NysiisAlgorithm.compute("schoenhoeft").get should equal ("ssanaft") - NysiisAlgorithm.compute("mckee").get should equal ("mcy") - NysiisAlgorithm.compute("heitschmedt").get should equal ("hatsnad") - NysiisAlgorithm.compute("bart").get should equal ("bad") - NysiisAlgorithm.compute("hurd").get should equal ("had") - NysiisAlgorithm.compute("hunt").get should equal ("had") - NysiisAlgorithm.compute("westerlund").get should equal ("wastarlad") - NysiisAlgorithm.compute("casstevens").get should equal ("castafan") - NysiisAlgorithm.compute("vasquez").get should equal ("vasg") - NysiisAlgorithm.compute("frazier").get should equal ("frasar") - NysiisAlgorithm.compute("bowman").get should equal ("banan") - NysiisAlgorithm.compute("mcknight").get should equal ("mcnagt") - NysiisAlgorithm.compute("rickert").get should equal ("racad") - NysiisAlgorithm.compute("deutsch").get should equal ("dat") // dropby wrongly says dats - NysiisAlgorithm.compute("westphal").get should equal ("wastfal") - NysiisAlgorithm.compute("shriver").get should equal ("shravar") - NysiisAlgorithm.compute("kuhl").get should equal ("cal") // dropby wrongly says c - NysiisAlgorithm.compute("rawson").get should equal ("rasan") - NysiisAlgorithm.compute("jiles").get should equal ("jal") - NysiisAlgorithm.compute("carraway").get should equal ("caray") - NysiisAlgorithm.compute("yamada").get should equal ("yanad") - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala deleted file mode 100755 index 7e52101..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class NysiisMetricSpec extends ScalaTest { - "NysiisMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - NysiisMetric.compare("", "").isDefined should be (false) - NysiisMetric.compare("abc", "").isDefined should be (false) - NysiisMetric.compare("", "xyz").isDefined should be (false) - } - } - "non-phonetic arguments" should returns { - "None" in { - NysiisMetric.compare("123", "123").isDefined should be (false) - NysiisMetric.compare("123", "").isDefined should be (false) - NysiisMetric.compare("", "123").isDefined should be (false) - } - } - "phonetically similar arguments" should returns { - "Boolean indicating true" in { - NysiisMetric.compare("ham", "hum").get should be (true) - } - } - "phonetically dissimilar arguments" should returns { - "Boolean indicating false" in { - NysiisMetric.compare("dumb", "gum").get should be (false) - } - } - } - } -} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala deleted file mode 100755 index 9602f16..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala +++ /dev/null @@ -1,206 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RefinedNysiisAlgorithmSpec extends ScalaTest { "RefinedNysiisAlgorithm" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - RefinedNysiisAlgorithm.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - RefinedNysiisAlgorithm.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // a - RefinedNysiisAlgorithm.compute("a").get should equal ("a") - RefinedNysiisAlgorithm.compute("aa").get should equal ("a") - - // b - RefinedNysiisAlgorithm.compute("b").get should equal ("b") - RefinedNysiisAlgorithm.compute("bb").get should equal ("b") - - // c - RefinedNysiisAlgorithm.compute("c").get should equal ("c") - RefinedNysiisAlgorithm.compute("cc").get should equal ("c") - - // d - RefinedNysiisAlgorithm.compute("d").get should equal ("d") - RefinedNysiisAlgorithm.compute("dd").get should equal ("d") - - // e - RefinedNysiisAlgorithm.compute("e").get should equal ("e") - RefinedNysiisAlgorithm.compute("ee").get should equal ("y") - - // f - RefinedNysiisAlgorithm.compute("f").get should equal ("f") - RefinedNysiisAlgorithm.compute("ff").get should equal ("f") - - // g - RefinedNysiisAlgorithm.compute("g").get should equal ("g") - RefinedNysiisAlgorithm.compute("gg").get should equal ("g") - - // h - RefinedNysiisAlgorithm.compute("h").get should equal ("h") - RefinedNysiisAlgorithm.compute("hh").get should equal ("h") - - // i - RefinedNysiisAlgorithm.compute("i").get should equal ("i") - RefinedNysiisAlgorithm.compute("ii").get should equal ("i") - - // j - RefinedNysiisAlgorithm.compute("j").get should equal ("j") - RefinedNysiisAlgorithm.compute("jj").get should equal ("j") - - // k - RefinedNysiisAlgorithm.compute("k").get should equal ("c") - RefinedNysiisAlgorithm.compute("kk").get should equal ("c") - - // l - RefinedNysiisAlgorithm.compute("l").get should equal ("l") - RefinedNysiisAlgorithm.compute("ll").get should equal ("l") - - // m - RefinedNysiisAlgorithm.compute("m").get should equal ("m") - RefinedNysiisAlgorithm.compute("mm").get should equal ("mn") - - // n - RefinedNysiisAlgorithm.compute("n").get should equal ("n") - RefinedNysiisAlgorithm.compute("nn").get should equal ("n") - - // o - RefinedNysiisAlgorithm.compute("o").get should equal ("o") - RefinedNysiisAlgorithm.compute("oo").get should equal ("o") - - // p - RefinedNysiisAlgorithm.compute("p").get should equal ("p") - RefinedNysiisAlgorithm.compute("pp").get should equal ("p") - - // q - RefinedNysiisAlgorithm.compute("q").get should equal ("q") - RefinedNysiisAlgorithm.compute("qq").get should equal ("qg") - - // r - RefinedNysiisAlgorithm.compute("r").get should equal ("r") - RefinedNysiisAlgorithm.compute("rr").get should equal ("r") - - // s - RefinedNysiisAlgorithm.compute("s").get should equal ("s") - RefinedNysiisAlgorithm.compute("ss").get should equal ("s") - - // t - RefinedNysiisAlgorithm.compute("t").get should equal ("t") - RefinedNysiisAlgorithm.compute("tt").get should equal ("t") - - // u - RefinedNysiisAlgorithm.compute("u").get should equal ("u") - RefinedNysiisAlgorithm.compute("uu").get should equal ("u") - - // v - RefinedNysiisAlgorithm.compute("v").get should equal ("v") - RefinedNysiisAlgorithm.compute("vv").get should equal ("v") - - // w - RefinedNysiisAlgorithm.compute("w").get should equal ("w") - RefinedNysiisAlgorithm.compute("ww").get should equal ("w") - - // x - RefinedNysiisAlgorithm.compute("x").get should equal ("x") - RefinedNysiisAlgorithm.compute("xx").get should equal ("x") - - // y - RefinedNysiisAlgorithm.compute("y").get should equal ("y") - RefinedNysiisAlgorithm.compute("yy").get should equal ("y") - RefinedNysiisAlgorithm.compute("ybyb").get should equal ("ybab") - - // z - RefinedNysiisAlgorithm.compute("z").get should equal ("z") - RefinedNysiisAlgorithm.compute("zz").get should equal ("z") - - // Head cases. - RefinedNysiisAlgorithm.compute("mac").get should equal ("mc") - RefinedNysiisAlgorithm.compute("pf").get should equal ("f") - - // Last cases. - RefinedNysiisAlgorithm.compute("ix").get should equal ("ic") - RefinedNysiisAlgorithm.compute("ex").get should equal ("ec") - RefinedNysiisAlgorithm.compute("ye").get should equal ("y") - RefinedNysiisAlgorithm.compute("ee").get should equal ("y") - RefinedNysiisAlgorithm.compute("ie").get should equal ("y") - RefinedNysiisAlgorithm.compute("dt").get should equal ("d") - RefinedNysiisAlgorithm.compute("rt").get should equal ("d") - RefinedNysiisAlgorithm.compute("rd").get should equal ("d") - RefinedNysiisAlgorithm.compute("nt").get should equal ("d") - RefinedNysiisAlgorithm.compute("nd").get should equal ("d") - - // Core cases. - RefinedNysiisAlgorithm.compute("bevb").get should equal ("bafb") - RefinedNysiisAlgorithm.compute("bghtb").get should equal ("bgtb") - RefinedNysiisAlgorithm.compute("bdgb").get should equal ("bgb") - RefinedNysiisAlgorithm.compute("bphb").get should equal ("bfb") - RefinedNysiisAlgorithm.compute("bknb").get should equal ("bnb") - RefinedNysiisAlgorithm.compute("bshb").get should equal ("bsb") - RefinedNysiisAlgorithm.compute("bschb").get should equal ("bsb") - RefinedNysiisAlgorithm.compute("bywb").get should equal ("bab") - RefinedNysiisAlgorithm.compute("byw").get should equal ("by") - RefinedNysiisAlgorithm.compute("ywb").get should equal ("yb") - RefinedNysiisAlgorithm.compute("bwrb").get should equal ("brb") - - // Transcode cases. - RefinedNysiisAlgorithm.compute("bay").get should equal ("by") - - // Miscellaneous. - RefinedNysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") - RefinedNysiisAlgorithm.compute("phone").get should equal ("fan") - RefinedNysiisAlgorithm.compute("aggregate").get should equal ("agragat") - RefinedNysiisAlgorithm.compute("accuracy").get should equal ("acaracy") - RefinedNysiisAlgorithm.compute("encyclopedia").get should equal ("encaclapad") - RefinedNysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") - RefinedNysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") - - // Dropby. - RefinedNysiisAlgorithm.compute("edwards").get should equal ("edwad") - RefinedNysiisAlgorithm.compute("parez").get should equal ("par") - RefinedNysiisAlgorithm.compute("macintosh").get should equal ("mcantas") - RefinedNysiisAlgorithm.compute("phillipson").get should equal ("falapsan") - RefinedNysiisAlgorithm.compute("haddix").get should equal ("hadac") - RefinedNysiisAlgorithm.compute("essex").get should equal ("esac") - RefinedNysiisAlgorithm.compute("moye").get should equal ("my") - RefinedNysiisAlgorithm.compute("mckee").get should equal ("mcy") - RefinedNysiisAlgorithm.compute("mackie").get should equal ("mcy") - RefinedNysiisAlgorithm.compute("heitschmidt").get should equal ("hatsnad") - RefinedNysiisAlgorithm.compute("bart").get should equal ("bad") - RefinedNysiisAlgorithm.compute("hurd").get should equal ("had") - RefinedNysiisAlgorithm.compute("hunt").get should equal ("had") - RefinedNysiisAlgorithm.compute("westerlund").get should equal ("wastarlad") - RefinedNysiisAlgorithm.compute("evers").get should equal ("evar") - RefinedNysiisAlgorithm.compute("devito").get should equal ("dafat") - RefinedNysiisAlgorithm.compute("rawson").get should equal ("rasan") - RefinedNysiisAlgorithm.compute("shoulders").get should equal ("saldar") - RefinedNysiisAlgorithm.compute("leighton").get should equal ("lagtan") - RefinedNysiisAlgorithm.compute("wooldridge").get should equal ("waldrag") - RefinedNysiisAlgorithm.compute("oliphant").get should equal ("olafad") - RefinedNysiisAlgorithm.compute("hatchett").get should equal ("hatcat") - RefinedNysiisAlgorithm.compute("mcknight").get should equal ("mcnagt") - RefinedNysiisAlgorithm.compute("rickert").get should equal ("racad") - RefinedNysiisAlgorithm.compute("bowman").get should equal ("banan") - RefinedNysiisAlgorithm.compute("vasquez").get should equal ("vasg") - RefinedNysiisAlgorithm.compute("bashaw").get should equal ("bas") - RefinedNysiisAlgorithm.compute("schoenhoeft").get should equal ("sanaft") // dropby wrongly says scanaft - RefinedNysiisAlgorithm.compute("heywood").get should equal ("had") - RefinedNysiisAlgorithm.compute("hayman").get should equal ("hanan") - RefinedNysiisAlgorithm.compute("seawright").get should equal ("saragt") - RefinedNysiisAlgorithm.compute("kratzer").get should equal ("cratsar") - RefinedNysiisAlgorithm.compute("canaday").get should equal ("canady") - RefinedNysiisAlgorithm.compute("crepeau").get should equal ("crap") - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala deleted file mode 100755 index 8c6d9c1..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala +++ /dev/null @@ -1,35 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RefinedNysiisMetricSpec extends ScalaTest { "RefinedNysiisMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - RefinedNysiisMetric.compare("", "").isDefined should be (false) - RefinedNysiisMetric.compare("abc", "").isDefined should be (false) - RefinedNysiisMetric.compare("", "xyz").isDefined should be (false) - } - } - "non-phonetic arguments" should returns { - "None" in { - RefinedNysiisMetric.compare("123", "123").isDefined should be (false) - RefinedNysiisMetric.compare("123", "").isDefined should be (false) - RefinedNysiisMetric.compare("", "123").isDefined should be (false) - } - } - "phonetically similar arguments" should returns { - "Boolean indicating true" in { - RefinedNysiisMetric.compare("ham", "hum").get should be (true) - } - } - "phonetically dissimilar arguments" should returns { - "Boolean indicating false" in { - RefinedNysiisMetric.compare("dumb", "gum").get should be (false) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala deleted file mode 100755 index ca39da8..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala +++ /dev/null @@ -1,160 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RefinedSoundexAlgorithmSpec extends ScalaTest { "RefinedSoundexAlgorithm" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - RefinedSoundexAlgorithm.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - RefinedSoundexAlgorithm.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // a - RefinedSoundexAlgorithm.compute("a").get should equal ("a0") - RefinedSoundexAlgorithm.compute("aa").get should equal ("a0") - - // b - RefinedSoundexAlgorithm.compute("b").get should equal ("b1") - RefinedSoundexAlgorithm.compute("bb").get should equal ("b1") - - // c - RefinedSoundexAlgorithm.compute("c").get should equal ("c3") - RefinedSoundexAlgorithm.compute("cc").get should equal ("c3") - - // d - RefinedSoundexAlgorithm.compute("d").get should equal ("d6") - RefinedSoundexAlgorithm.compute("dd").get should equal ("d6") - - // e - RefinedSoundexAlgorithm.compute("e").get should equal ("e0") - RefinedSoundexAlgorithm.compute("ee").get should equal ("e0") - - // f - RefinedSoundexAlgorithm.compute("f").get should equal ("f2") - RefinedSoundexAlgorithm.compute("ff").get should equal ("f2") - - // g - RefinedSoundexAlgorithm.compute("g").get should equal ("g4") - RefinedSoundexAlgorithm.compute("gg").get should equal ("g4") - - // h - RefinedSoundexAlgorithm.compute("h").get should equal ("h0") - RefinedSoundexAlgorithm.compute("hh").get should equal ("h0") - - // i - RefinedSoundexAlgorithm.compute("i").get should equal ("i0") - RefinedSoundexAlgorithm.compute("ii").get should equal ("i0") - - // j - RefinedSoundexAlgorithm.compute("j").get should equal ("j4") - RefinedSoundexAlgorithm.compute("jj").get should equal ("j4") - - // k - RefinedSoundexAlgorithm.compute("k").get should equal ("k3") - RefinedSoundexAlgorithm.compute("kk").get should equal ("k3") - - // l - RefinedSoundexAlgorithm.compute("l").get should equal ("l7") - RefinedSoundexAlgorithm.compute("ll").get should equal ("l7") - - // m - RefinedSoundexAlgorithm.compute("m").get should equal ("m8") - RefinedSoundexAlgorithm.compute("mm").get should equal ("m8") - - // n - RefinedSoundexAlgorithm.compute("n").get should equal ("n8") - RefinedSoundexAlgorithm.compute("nn").get should equal ("n8") - - // o - RefinedSoundexAlgorithm.compute("o").get should equal ("o0") - RefinedSoundexAlgorithm.compute("oo").get should equal ("o0") - - // p - RefinedSoundexAlgorithm.compute("p").get should equal ("p1") - RefinedSoundexAlgorithm.compute("pp").get should equal ("p1") - - // q - RefinedSoundexAlgorithm.compute("q").get should equal ("q5") - RefinedSoundexAlgorithm.compute("qq").get should equal ("q5") - - // r - RefinedSoundexAlgorithm.compute("r").get should equal ("r9") - RefinedSoundexAlgorithm.compute("rr").get should equal ("r9") - - // s - RefinedSoundexAlgorithm.compute("s").get should equal ("s3") - RefinedSoundexAlgorithm.compute("ss").get should equal ("s3") - - // t - RefinedSoundexAlgorithm.compute("t").get should equal ("t6") - RefinedSoundexAlgorithm.compute("tt").get should equal ("t6") - - // u - RefinedSoundexAlgorithm.compute("u").get should equal ("u0") - RefinedSoundexAlgorithm.compute("uu").get should equal ("u0") - - // v - RefinedSoundexAlgorithm.compute("v").get should equal ("v2") - RefinedSoundexAlgorithm.compute("vv").get should equal ("v2") - - // w - RefinedSoundexAlgorithm.compute("w").get should equal ("w0") - RefinedSoundexAlgorithm.compute("ww").get should equal ("w0") - - // x - RefinedSoundexAlgorithm.compute("x").get should equal ("x5") - RefinedSoundexAlgorithm.compute("xx").get should equal ("x5") - - // y - RefinedSoundexAlgorithm.compute("y").get should equal ("y0") - RefinedSoundexAlgorithm.compute("yy").get should equal ("y0") - - // z - RefinedSoundexAlgorithm.compute("z").get should equal ("z5") - RefinedSoundexAlgorithm.compute("zz").get should equal ("z5") - - // Starting with letter then numbers. - RefinedSoundexAlgorithm.compute("x123456").get should equal ("x5") - RefinedSoundexAlgorithm.compute("a123456").get should equal ("a0") - RefinedSoundexAlgorithm.compute("f123456").get should equal ("f2") - - // Miscellaneous. - RefinedSoundexAlgorithm.compute("braz").get should equal ("b1905") - RefinedSoundexAlgorithm.compute("broz").get should equal ("b1905") - RefinedSoundexAlgorithm.compute("caren").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("carren").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("coram").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("corran").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("curreen").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("curwen").get should equal ("c30908") - RefinedSoundexAlgorithm.compute("hairs").get should equal ("h093") - RefinedSoundexAlgorithm.compute("hark").get should equal ("h093") - RefinedSoundexAlgorithm.compute("hars").get should equal ("h093") - RefinedSoundexAlgorithm.compute("hayers").get should equal ("h093") - RefinedSoundexAlgorithm.compute("heers").get should equal ("h093") - RefinedSoundexAlgorithm.compute("hiers").get should equal ("h093") - RefinedSoundexAlgorithm.compute("lambard").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lambart").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lambert").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lambird").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lampaert").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lampart").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lamport").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("limbert").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("lombard").get should equal ("l7081096") - RefinedSoundexAlgorithm.compute("nolton").get should equal ("n807608") - RefinedSoundexAlgorithm.compute("noulton").get should equal ("n807608") - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala deleted file mode 100755 index 84f547a..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala +++ /dev/null @@ -1,35 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RefinedSoundexMetricSpec extends ScalaTest { "RefinedSoundexMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - RefinedSoundexMetric.compare("", "").isDefined should be (false) - RefinedSoundexMetric.compare("abc", "").isDefined should be (false) - RefinedSoundexMetric.compare("", "xyz").isDefined should be (false) - } - } - "non-phonetic arguments" should returns { - "None" in { - RefinedSoundexMetric.compare("123", "123").isDefined should be (false) - RefinedSoundexMetric.compare("123", "").isDefined should be (false) - RefinedSoundexMetric.compare("", "123").isDefined should be (false) - } - } - "phonetically similar arguments" should returns { - "Boolean indicating true" in { - RefinedSoundexMetric.compare("robert", "rupert").get should be (true) - } - } - "phonetically dissimilar arguments" should returns { - "Boolean indicating false" in { - RefinedSoundexMetric.compare("robert", "rubin").get should be (false) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala deleted file mode 100755 index 157a24b..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala +++ /dev/null @@ -1,159 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class SoundexAlgorithmSpec extends ScalaTest { "SoundexAlgorithm" should provide { - "compute method" when passed { - "empty argument" should returns { - "None" in { - SoundexAlgorithm.compute("").isDefined should be (false) - } - } - "non-phonetic argument" should returns { - "None" in { - SoundexAlgorithm.compute("123").isDefined should be (false) - } - } - "phonetic argument" should returns { - "Some" in { - // a - SoundexAlgorithm.compute("a").get should equal ("a000") - SoundexAlgorithm.compute("aa").get should equal ("a000") - - // b - SoundexAlgorithm.compute("b").get should equal ("b000") - SoundexAlgorithm.compute("bb").get should equal ("b000") - - // c - SoundexAlgorithm.compute("c").get should equal ("c000") - SoundexAlgorithm.compute("cc").get should equal ("c000") - - // d - SoundexAlgorithm.compute("d").get should equal ("d000") - SoundexAlgorithm.compute("dd").get should equal ("d000") - - // e - SoundexAlgorithm.compute("e").get should equal ("e000") - SoundexAlgorithm.compute("ee").get should equal ("e000") - - // f - SoundexAlgorithm.compute("f").get should equal ("f000") - SoundexAlgorithm.compute("ff").get should equal ("f000") - - // g - SoundexAlgorithm.compute("g").get should equal ("g000") - SoundexAlgorithm.compute("gg").get should equal ("g000") - - // h - SoundexAlgorithm.compute("h").get should equal ("h000") - SoundexAlgorithm.compute("hh").get should equal ("h000") - - // i - SoundexAlgorithm.compute("i").get should equal ("i000") - SoundexAlgorithm.compute("ii").get should equal ("i000") - - // j - SoundexAlgorithm.compute("j").get should equal ("j000") - SoundexAlgorithm.compute("jj").get should equal ("j000") - - // k - SoundexAlgorithm.compute("k").get should equal ("k000") - SoundexAlgorithm.compute("kk").get should equal ("k000") - - // l - SoundexAlgorithm.compute("l").get should equal ("l000") - SoundexAlgorithm.compute("ll").get should equal ("l000") - - // m - SoundexAlgorithm.compute("m").get should equal ("m000") - SoundexAlgorithm.compute("mm").get should equal ("m000") - - // n - SoundexAlgorithm.compute("n").get should equal ("n000") - SoundexAlgorithm.compute("nn").get should equal ("n000") - - // o - SoundexAlgorithm.compute("o").get should equal ("o000") - SoundexAlgorithm.compute("oo").get should equal ("o000") - - // p - SoundexAlgorithm.compute("p").get should equal ("p000") - SoundexAlgorithm.compute("pp").get should equal ("p000") - - // q - SoundexAlgorithm.compute("q").get should equal ("q000") - SoundexAlgorithm.compute("qq").get should equal ("q000") - - // r - SoundexAlgorithm.compute("r").get should equal ("r000") - SoundexAlgorithm.compute("rr").get should equal ("r000") - - // s - SoundexAlgorithm.compute("s").get should equal ("s000") - SoundexAlgorithm.compute("ss").get should equal ("s000") - - // t - SoundexAlgorithm.compute("t").get should equal ("t000") - SoundexAlgorithm.compute("tt").get should equal ("t000") - - // u - SoundexAlgorithm.compute("u").get should equal ("u000") - SoundexAlgorithm.compute("uu").get should equal ("u000") - - // v - SoundexAlgorithm.compute("v").get should equal ("v000") - SoundexAlgorithm.compute("vv").get should equal ("v000") - - // w - SoundexAlgorithm.compute("w").get should equal ("w000") - SoundexAlgorithm.compute("ww").get should equal ("w000") - - // x - SoundexAlgorithm.compute("x").get should equal ("x000") - SoundexAlgorithm.compute("xx").get should equal ("x000") - - // y - SoundexAlgorithm.compute("y").get should equal ("y000") - SoundexAlgorithm.compute("yy").get should equal ("y000") - - // z - SoundexAlgorithm.compute("z").get should equal ("z000") - SoundexAlgorithm.compute("zz").get should equal ("z000") - - // Starting with letter then numbers. - SoundexAlgorithm.compute("x123456").get should equal ("x000") - SoundexAlgorithm.compute("a123456").get should equal ("a000") - SoundexAlgorithm.compute("f123456").get should equal ("f000") - - // Miscellaneous. - SoundexAlgorithm.compute("abc").get should equal ("a120") - SoundexAlgorithm.compute("xyz").get should equal ("x200") - SoundexAlgorithm.compute("robert").get should equal ("r163") - SoundexAlgorithm.compute("rupert").get should equal ("r163") - SoundexAlgorithm.compute("rubin").get should equal ("r150") - SoundexAlgorithm.compute("ashcraft").get should equal ("a261") - SoundexAlgorithm.compute("tymczak").get should equal ("t522") - SoundexAlgorithm.compute("pfister").get should equal ("p236") - SoundexAlgorithm.compute("euler").get should equal ("e460") - SoundexAlgorithm.compute("gauss").get should equal ("g200") - SoundexAlgorithm.compute("hilbert").get should equal ("h416") - SoundexAlgorithm.compute("knuth").get should equal ("k530") - SoundexAlgorithm.compute("lloyd").get should equal ("l300") - SoundexAlgorithm.compute("lukasiewicz").get should equal ("l222") - SoundexAlgorithm.compute("ashcroft").get should equal ("a261") - SoundexAlgorithm.compute("tymczak").get should equal ("t522") - SoundexAlgorithm.compute("pfister").get should equal ("p236") - SoundexAlgorithm.compute("ellery").get should equal ("e460") - SoundexAlgorithm.compute("ghosh").get should equal ("g200") - SoundexAlgorithm.compute("heilbronn").get should equal ("h416") - SoundexAlgorithm.compute("kant").get should equal ("k530") - SoundexAlgorithm.compute("ladd").get should equal ("l300") - SoundexAlgorithm.compute("lissajous").get should equal ("l222") - SoundexAlgorithm.compute("fusedale").get should equal ("f234") - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala deleted file mode 100755 index b903add..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala +++ /dev/null @@ -1,35 +0,0 @@ -package com.rockymadden.stringmetric.phonetic - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class SoundexMetricSpec extends ScalaTest { "SoundexMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - SoundexMetric.compare("", "").isDefined should be (false) - SoundexMetric.compare("abc", "").isDefined should be (false) - SoundexMetric.compare("", "xyz").isDefined should be (false) - } - } - "non-phonetic arguments" should returns { - "None" in { - SoundexMetric.compare("123", "123").isDefined should be (false) - SoundexMetric.compare("123", "").isDefined should be (false) - SoundexMetric.compare("", "123").isDefined should be (false) - } - } - "phonetically similar arguments" should returns { - "Boolean indicating true" in { - SoundexMetric.compare("robert", "rupert").get should be (true) - } - } - "phonetically dissimilar arguments" should returns { - "Boolean indicating false" in { - SoundexMetric.compare("robert", "rubin").get should be (false) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala deleted file mode 100755 index b7a3b58..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class DiceSorensenMetricSpec extends ScalaTest { "DiceSorensenMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - DiceSorensenMetric(1).compare("", "").isDefined should be (false) - DiceSorensenMetric(1).compare("abc", "").isDefined should be (false) - DiceSorensenMetric(1).compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - DiceSorensenMetric(1).compare("abc", "abc").get should be (1) - DiceSorensenMetric(2).compare("abc", "abc").get should be (1) - DiceSorensenMetric(2).compare("abc", "abc").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - DiceSorensenMetric(1).compare("abc", "xyz").get should be (0) - DiceSorensenMetric(2).compare("abc", "xyz").get should be (0) - DiceSorensenMetric(3).compare("abc", "xyz").get should be (0) - } - } - "invalid arguments" should returns { - "None" in { - DiceSorensenMetric(2).compare("n", "naght").isDefined should be (false) - DiceSorensenMetric(2).compare("night", "n").isDefined should be (false) - DiceSorensenMetric(3).compare("ni", "naght").isDefined should be (false) - DiceSorensenMetric(3).compare("night", "na").isDefined should be (false) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - DiceSorensenMetric(1).compare("night", "nacht").get should be (0.6) - DiceSorensenMetric(1).compare("night", "naght").get should be (0.8) - DiceSorensenMetric(1).compare("context", "contact").get should be (0.7142857142857143) - - DiceSorensenMetric(2).compare("night", "nacht").get should be (0.25) - DiceSorensenMetric(2).compare("night", "naght").get should be (0.5) - DiceSorensenMetric(2).compare("context", "contact").get should be (0.5) - DiceSorensenMetric(2).compare("contextcontext", "contact").get should be (0.3157894736842105) - DiceSorensenMetric(2).compare("context", "contactcontact").get should be (0.3157894736842105) - DiceSorensenMetric(2).compare("ht", "nacht").get should be (0.4) - DiceSorensenMetric(2).compare("xp", "nacht").get should be (0) - DiceSorensenMetric(2).compare("ht", "hththt").get should be (0.3333333333333333) - - DiceSorensenMetric(3).compare("night", "nacht").get should be (0) - DiceSorensenMetric(3).compare("night", "naght").get should be (0.3333333333333333) - DiceSorensenMetric(3).compare("context", "contact").get should be (0.4) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala deleted file mode 100755 index abb56db..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class HammingMetricSpec extends ScalaTest { "HammingMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - HammingMetric.compare("", "").isDefined should be (false) - HammingMetric.compare("abc", "").isDefined should be (false) - HammingMetric.compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "0" in { - HammingMetric.compare("abc", "abc").get should be (0) - HammingMetric.compare("123", "123").get should be (0) - } - } - "unequal arguments" should returns { - "Int indicating distance" in { - HammingMetric.compare("abc", "xyz").get should be (3) - HammingMetric.compare("123", "456").get should be (3) - } - } - "valid arguments" should returns { - "Int indicating distance" in { - HammingMetric.compare("toned", "roses").get should be (3) - HammingMetric.compare("1011101", "1001001").get should be (2) - HammingMetric.compare("2173896", "2233796").get should be (3) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala deleted file mode 100755 index 4c04193..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class JaccardMetricSpec extends ScalaTest { "JaccardMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - JaccardMetric(1).compare("", "").isDefined should be (false) - JaccardMetric(1).compare("abc", "").isDefined should be (false) - JaccardMetric(1).compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - JaccardMetric(1).compare("abc", "abc").get should be (1) - JaccardMetric(2).compare("abc", "abc").get should be (1) - JaccardMetric(3).compare("abc", "abc").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - JaccardMetric(1).compare("abc", "xyz").get should be (0) - JaccardMetric(2).compare("abc", "xyz").get should be (0) - JaccardMetric(3).compare("abc", "xyz").get should be (0) - } - } - "invalid arguments" should returns { - "None" in { - JaccardMetric(2).compare("n", "naght").isDefined should be (false) - JaccardMetric(2).compare("night", "n").isDefined should be (false) - JaccardMetric(3).compare("ni", "naght").isDefined should be (false) - JaccardMetric(3).compare("night", "na").isDefined should be (false) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - JaccardMetric(1).compare("night", "nacht").get should be (0.42857142857142855) - JaccardMetric(1).compare("night", "naght").get should be (0.6666666666666666) - JaccardMetric(1).compare("context", "contact").get should be (0.5555555555555556) - - JaccardMetric(2).compare("night", "nacht").get should be (0.14285714285714285) - JaccardMetric(2).compare("night", "naght").get should be (0.3333333333333333) - JaccardMetric(2).compare("context", "contact").get should be (0.3333333333333333) - JaccardMetric(2).compare("contextcontext", "contact").get should be (0.1875) - JaccardMetric(2).compare("context", "contactcontact").get should be (0.1875) - JaccardMetric(2).compare("ht", "nacht").get should be (0.25) - JaccardMetric(2).compare("xp", "nacht").get should be (0) - JaccardMetric(2).compare("ht", "hththt").get should be (0.2) - - JaccardMetric(3).compare("night", "nacht").get should be (0) - JaccardMetric(3).compare("night", "naght").get should be (0.2) - JaccardMetric(3).compare("context", "contact").get should be (0.25) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala deleted file mode 100755 index 26ae38e..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class JaroMetricSpec extends ScalaTest { "JaroMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - JaroMetric.compare("", "").isDefined should be (false) - JaroMetric.compare("abc", "").isDefined should be (false) - JaroMetric.compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - JaroMetric.compare("a", "a").get should be (1) - JaroMetric.compare("abc", "abc").get should be (1) - JaroMetric.compare("123", "123").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - JaroMetric.compare("abc", "xyz").get should be (0) - JaroMetric.compare("123", "456").get should be (0) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - JaroMetric.compare("aa", "a").get should be (0.8333333333333334) - JaroMetric.compare("a", "aa").get should be (0.8333333333333334) - JaroMetric.compare("veryveryverylong", "v").get should be (0.6875) - JaroMetric.compare("v", "veryveryverylong").get should be (0.6875) - JaroMetric.compare("martha", "marhta").get should be (0.9444444444444445) - JaroMetric.compare("dwayne", "duane").get should be (0.8222222222222223) - JaroMetric.compare("dixon", "dicksonx").get should be (0.7666666666666666) - JaroMetric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) - JaroMetric.compare("jones", "johnson").get should be (0.7904761904761904) - JaroMetric.compare("henka", "henkan").get should be (0.9444444444444445) - JaroMetric.compare("fvie", "ten").get should be (0) - - JaroMetric.compare("zac ephron", "zac efron").get should be > - JaroMetric.compare("zac ephron", "kai ephron").get - JaroMetric.compare("brittney spears", "britney spears").get should be > - JaroMetric.compare("brittney spears", "brittney startzman").get - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala deleted file mode 100755 index d645456..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class JaroWinklerMetricSpec extends ScalaTest { "JaroWinklerMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - JaroWinklerMetric.compare("", "").isDefined should be (false) - JaroWinklerMetric.compare("abc", "").isDefined should be (false) - JaroWinklerMetric.compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - JaroWinklerMetric.compare("a", "a").get should be (1) - JaroWinklerMetric.compare("abc", "abc").get should be (1) - JaroWinklerMetric.compare("123", "123").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - JaroWinklerMetric.compare("abc", "xyz").get should be (0) - JaroWinklerMetric.compare("123", "456").get should be (0) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - JaroWinklerMetric.compare("aa", "a").get should be (0.8500000000000001) - JaroWinklerMetric.compare("a", "aa").get should be (0.8500000000000001) - JaroWinklerMetric.compare("veryveryverylong", "v").get should be (0.71875) - JaroWinklerMetric.compare("v", "veryveryverylong").get should be (0.71875) - JaroWinklerMetric.compare("martha", "marhta").get should be (0.9611111111111111) - JaroWinklerMetric.compare("dwayne", "duane").get should be (0.8400000000000001) - JaroWinklerMetric.compare("dixon", "dicksonx").get should be (0.8133333333333332) - JaroWinklerMetric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) - JaroWinklerMetric.compare("jones", "johnson").get should be (0.8323809523809523) - JaroWinklerMetric.compare("henka", "henkan").get should be (0.9666666666666667) - JaroWinklerMetric.compare("fvie", "ten").get should be (0) - - JaroWinklerMetric.compare("zac ephron", "zac efron").get should be > - JaroWinklerMetric.compare("zac ephron", "kai ephron").get - JaroWinklerMetric.compare("brittney spears", "britney spears").get should be > - JaroWinklerMetric.compare("brittney spears", "brittney startzman").get - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala deleted file mode 100755 index 8e1538b..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala +++ /dev/null @@ -1,50 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class LevenshteinMetricSpec extends ScalaTest { "LevenshteinMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - LevenshteinMetric.compare("", "").isDefined should be (false) - LevenshteinMetric.compare("abc", "").isDefined should be (false) - LevenshteinMetric.compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "0" in { - LevenshteinMetric.compare("abc", "abc").get should be (0) - LevenshteinMetric.compare("123", "123").get should be (0) - } - } - "unequal arguments" should returns { - "Int indicating distance" in { - LevenshteinMetric.compare("abc", "xyz").get should be (3) - LevenshteinMetric.compare("123", "456").get should be (3) - } - } - "valid arguments" should returns { - "Int indicating distance" in { - LevenshteinMetric.compare("abc", "a").get should be (2) - LevenshteinMetric.compare("a", "abc").get should be (2) - LevenshteinMetric.compare("abc", "c").get should be (2) - LevenshteinMetric.compare("c", "abc").get should be (2) - LevenshteinMetric.compare("sitting", "kitten").get should be (3) - LevenshteinMetric.compare("kitten", "sitting").get should be (3) - LevenshteinMetric.compare("cake", "drake").get should be (2) - LevenshteinMetric.compare("drake", "cake").get should be (2) - LevenshteinMetric.compare("saturday", "sunday").get should be (3) - LevenshteinMetric.compare("sunday", "saturday").get should be (3) - LevenshteinMetric.compare("book", "back").get should be (2) - LevenshteinMetric.compare("dog", "fog").get should be (1) - LevenshteinMetric.compare("foq", "fog").get should be (1) - LevenshteinMetric.compare("fvg", "fog").get should be (1) - LevenshteinMetric.compare("encyclopedia", "encyclopediaz").get should be (1) - LevenshteinMetric.compare("encyclopediz", "encyclopediaz").get should be (1) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala deleted file mode 100755 index 5e1abb8..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class NGramMetricSpec extends ScalaTest { "NGramMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - NGramMetric(1).compare("", "").isDefined should be (false) - NGramMetric(1).compare("abc", "").isDefined should be (false) - NGramMetric(1).compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - NGramMetric(1).compare("abc", "abc").get should be (1) - NGramMetric(2).compare("abc", "abc").get should be (1) - NGramMetric(3).compare("abc", "abc").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - NGramMetric(1).compare("abc", "xyz").get should be (0) - NGramMetric(2).compare("abc", "xyz").get should be (0) - NGramMetric(3).compare("abc", "xyz").get should be (0) - } - } - "invalid arguments" should returns { - "None" in { - NGramMetric(2).compare("n", "naght").isDefined should be (false) - NGramMetric(2).compare("night", "n").isDefined should be (false) - NGramMetric(3).compare("ni", "naght").isDefined should be (false) - NGramMetric(3).compare("night", "na").isDefined should be (false) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - NGramMetric(1).compare("night", "nacht").get should be (0.6) - NGramMetric(1).compare("night", "naght").get should be (0.8) - NGramMetric(1).compare("context", "contact").get should be (0.7142857142857143) - - NGramMetric(2).compare("night", "nacht").get should be (0.25) - NGramMetric(2).compare("night", "naght").get should be (0.5) - NGramMetric(2).compare("context", "contact").get should be (0.5) - NGramMetric(2).compare("contextcontext", "contact").get should be (0.23076923076923078) - NGramMetric(2).compare("context", "contactcontact").get should be (0.23076923076923078) - NGramMetric(2).compare("ht", "nacht").get should be (0.25) - NGramMetric(2).compare("xp", "nacht").get should be (0) - NGramMetric(2).compare("ht", "hththt").get should be (0.2) - - NGramMetric(3).compare("night", "nacht").get should be (0) - NGramMetric(3).compare("night", "naght").get should be (0.3333333333333333) - NGramMetric(3).compare("context", "contact").get should be (0.4) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala deleted file mode 100755 index ec1e7b2..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class OverlapMetricSpec extends ScalaTest { "OverlapMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - OverlapMetric(1).compare("", "").isDefined should be (false) - OverlapMetric(1).compare("abc", "").isDefined should be (false) - OverlapMetric(1).compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "1" in { - OverlapMetric(1).compare("abc", "abc").get should be (1) - OverlapMetric(2).compare("abc", "abc").get should be (1) - OverlapMetric(3).compare("abc", "abc").get should be (1) - } - } - "unequal arguments" should returns { - "0" in { - OverlapMetric(1).compare("abc", "xyz").get should be (0) - OverlapMetric(2).compare("abc", "xyz").get should be (0) - OverlapMetric(3).compare("abc", "xyz").get should be (0) - } - } - "invalid arguments" should returns { - "None" in { - OverlapMetric(2).compare("n", "naght").isDefined should be (false) - OverlapMetric(2).compare("night", "n").isDefined should be (false) - OverlapMetric(3).compare("ni", "naght").isDefined should be (false) - OverlapMetric(3).compare("night", "na").isDefined should be (false) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - OverlapMetric(1).compare("bob", "bobman").get should be (1) - OverlapMetric(1).compare("bob", "manbobman").get should be (1) - OverlapMetric(1).compare("night", "nacht").get should be (0.6) - OverlapMetric(1).compare("night", "naght").get should be (0.8) - OverlapMetric(1).compare("context", "contact").get should be (0.7142857142857143) - - OverlapMetric(2).compare("night", "nacht").get should be (0.25) - OverlapMetric(2).compare("night", "naght").get should be (0.5) - OverlapMetric(2).compare("context", "contact").get should be (0.5) - OverlapMetric(2).compare("contextcontext", "contact").get should be (0.5) - OverlapMetric(2).compare("context", "contactcontact").get should be (0.5) - OverlapMetric(2).compare("ht", "nacht").get should be (1) - OverlapMetric(2).compare("xp", "nacht").get should be (0) - OverlapMetric(2).compare("ht", "hththt").get should be (1) - - OverlapMetric(3).compare("night", "nacht").get should be (0) - OverlapMetric(3).compare("night", "naght").get should be (0.3333333333333333) - OverlapMetric(3).compare("context", "contact").get should be (0.4) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala deleted file mode 100755 index 30bbf49..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class RatcliffObershelpMetricSpec extends ScalaTest { "RatcliffObershelpMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - RatcliffObershelpMetric.compare("", "").isDefined should be (false) - RatcliffObershelpMetric.compare("abc", "").isDefined should be (false) - RatcliffObershelpMetric.compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "0" in { - RatcliffObershelpMetric.compare("abc", "abc").get should be (1) - RatcliffObershelpMetric.compare("123", "123").get should be (1) - } - } - "unequal arguments" should returns { - "Double indicating distance" in { - RatcliffObershelpMetric.compare("abc", "xyz").get should be (0) - RatcliffObershelpMetric.compare("123", "456").get should be (0) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - RatcliffObershelpMetric.compare("aleksander", "alexandre").get should be (0.7368421052631579) - RatcliffObershelpMetric.compare("alexandre", "aleksander").get should be (0.7368421052631579) - RatcliffObershelpMetric.compare("pennsylvania", "pencilvaneya").get should be (0.6666666666666666) - RatcliffObershelpMetric.compare("pencilvaneya", "pennsylvania").get should be (0.6666666666666666) - RatcliffObershelpMetric.compare("abcefglmn", "abefglmo").get should be (0.8235294117647058) - RatcliffObershelpMetric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058) - } - } - } -}} diff --git a/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala deleted file mode 100755 index 070458a..0000000 --- a/core/source/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.rockymadden.stringmetric.similarity - -import com.rockymadden.stringmetric.ScalaTest -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner - -@RunWith(classOf[JUnitRunner]) -final class WeightedLevenshteinMetricSpec extends ScalaTest { "WeightedLevenshteinMetric" should provide { - "compare method" when passed { - "empty arguments" should returns { - "None" in { - WeightedLevenshteinMetric(10, 0.1, 1).compare("", "").isDefined should be (false) - WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "").isDefined should be (false) - WeightedLevenshteinMetric(10, 0.1, 1).compare("", "xyz").isDefined should be (false) - } - } - "equal arguments" should returns { - "0" in { - WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "abc").get should be (0) - WeightedLevenshteinMetric(10, 0.1, 1).compare("123", "123").get should be (0) - } - } - "unequal arguments" should returns { - "Double indicating distance" in { - WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "xyz").get should be (3) - WeightedLevenshteinMetric(10, 0.1, 1).compare("123", "456").get should be (3) - } - } - "valid arguments" should returns { - "Double indicating distance" in { - WeightedLevenshteinMetric(10, 0.1, 1).compare("az", "z").get should be (10) - WeightedLevenshteinMetric(10, 0.1, 1).compare("z", "az").get should be (0.1) - WeightedLevenshteinMetric(10, 0.1, 1).compare("a", "z").get should be (1) - WeightedLevenshteinMetric(10, 0.1, 1).compare("z", "a").get should be (1) - WeightedLevenshteinMetric(10, 0.1, 1).compare("ab", "yz").get should be (2) - WeightedLevenshteinMetric(10, 0.1, 1).compare("yz", "ab").get should be (2) - WeightedLevenshteinMetric(10, 0.1, 1).compare("0", "0123456789").get should be (0.9) - WeightedLevenshteinMetric(10, 0.1, 1).compare("0123456789", "0").get should be (90) - WeightedLevenshteinMetric(10, 0.1, 1).compare("book", "back").get should be (2) - WeightedLevenshteinMetric(10, 0.1, 1).compare("back", "book").get should be (2) - WeightedLevenshteinMetric(10, 0.1, 1).compare("hosp", "hospital").get should be (0.4) - WeightedLevenshteinMetric(10, 0.1, 1).compare("hospital", "hosp").get should be (40) - WeightedLevenshteinMetric(10, 0.1, 1).compare("clmbs blvd", "columbus boulevard").get should be (0.8) - WeightedLevenshteinMetric(10, 0.1, 1).compare("columbus boulevard", "clmbs blvd").get should be (80) - } - } - } -}} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala new file mode 100755 index 0000000..55a6238 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperBenchmark.scala @@ -0,0 +1,7 @@ +package com.rockymadden.stringmetric + +import com.google.caliper.SimpleBenchmark + +trait CaliperBenchmark extends SimpleBenchmark { + def run(reps: Int)(code: => Unit) = (0 until reps).foreach(i => code) +} \ No newline at end of file diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala new file mode 100755 index 0000000..4474a8d --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/CaliperRunner.scala @@ -0,0 +1,7 @@ +package com.rockymadden.stringmetric + +import com.google.caliper.{Benchmark, Runner} + +abstract class CaliperRunner(private[this] val suite: java.lang.Class[_ <: Benchmark]) { + def main(args: Array[String]): Unit = Runner.main(suite, args) +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala new file mode 100755 index 0000000..fe5c80d --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class MetaphoneAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + MetaphoneAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + MetaphoneAlgorithm.compute(string) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala new file mode 100755 index 0000000..c9b59fc --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricBenchmark.scala @@ -0,0 +1,49 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class MetaphoneMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + MetaphoneMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + MetaphoneMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + MetaphoneMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + MetaphoneMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala new file mode 100755 index 0000000..ac04cd7 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class NysiisAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + NysiisAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + NysiisAlgorithm.compute(string) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala new file mode 100755 index 0000000..f975d29 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricBenchmark.scala @@ -0,0 +1,49 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class NysiisMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + NysiisMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + NysiisMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + NysiisMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + NysiisMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala new file mode 100755 index 0000000..4141b37 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmBenchmark.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class RefinedNysiisAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + RefinedNysiisAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + RefinedNysiisAlgorithm.compute(string) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala new file mode 100755 index 0000000..d927f18 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricBenchmark.scala @@ -0,0 +1,49 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class RefinedNysiisMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + RefinedNysiisMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + RefinedNysiisMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + RefinedNysiisMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + RefinedNysiisMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..ec8d53c --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class RefinedSoundexAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + RefinedSoundexAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + RefinedSoundexAlgorithm.compute(string) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala new file mode 100755 index 0000000..6c52e8d --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala @@ -0,0 +1,49 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class RefinedSoundexMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..9dc67de --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala @@ -0,0 +1,26 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class SoundexAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + SoundexAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + SoundexAlgorithm.compute(string) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala new file mode 100755 index 0000000..7707019 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricBenchmark.scala @@ -0,0 +1,49 @@ +package com.rockymadden.stringmetric.phonetic + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.annotation.tailrec +import scala.util.Random + +final class SoundexMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + SoundexMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + SoundexMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + SoundexMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + SoundexMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala new file mode 100755 index 0000000..5df27fb --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class DiceSorensenMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + DiceSorensenMetric(2).compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + DiceSorensenMetric(2).compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + DiceSorensenMetric(2).compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + DiceSorensenMetric(2).compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala new file mode 100755 index 0000000..3d04074 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/HammingMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class HammingMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + HammingMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + HammingMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + HammingMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + HammingMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala new file mode 100755 index 0000000..ddeef7a --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaccardMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class JaccardMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + JaccardMetric(2).compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + JaccardMetric(2).compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + JaccardMetric(2).compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + JaccardMetric(2).compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala new file mode 100755 index 0000000..47baa6d --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class JaroMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + JaroMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + JaroMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + JaroMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + JaroMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala new file mode 100755 index 0000000..c11a01c --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class JaroWinklerMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + JaroWinklerMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + JaroWinklerMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + JaroWinklerMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + JaroWinklerMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..a8460e2 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class LevenshteinMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + LevenshteinMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + LevenshteinMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + LevenshteinMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + LevenshteinMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala new file mode 100755 index 0000000..8ba1bb0 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/NGramMetricBenchmark.scala @@ -0,0 +1,51 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class NGramMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + NGramMetric(n).compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + NGramMetric(n).compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + NGramMetric(n).compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + NGramMetric(n).compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala new file mode 100755 index 0000000..89207f2 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/OverlapMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class OverlapMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + OverlapMetric(2).compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + OverlapMetric(2).compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + OverlapMetric(2).compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + OverlapMetric(2).compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala new file mode 100755 index 0000000..86196ab --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class RatcliffObershelpMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + RatcliffObershelpMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + RatcliffObershelpMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + RatcliffObershelpMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + RatcliffObershelpMetric.compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..837ce01 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class WeightedLevenshteinMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @annotation.tailrec + def random(l: Int, ps: String = null): String = + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + WeightedLevenshteinMetric(1, 1, 1).compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + WeightedLevenshteinMetric(1, 1, 1).compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + WeightedLevenshteinMetric(1, 1, 1).compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + WeightedLevenshteinMetric(1, 1, 1).compare(string1, string1) + } +} diff --git a/core/src/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala b/core/src/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala new file mode 100755 index 0000000..d66bf88 --- /dev/null +++ b/core/src/benchmark/scala/com/rockymadden/stringmetric/tokenize/NGramTokenizerBenchmark.scala @@ -0,0 +1,29 @@ +package com.rockymadden.stringmetric.tokenize + +import com.google.caliper.Param +import com.rockymadden.stringmetric.{CaliperBenchmark, CaliperRunner} +import scala.util.Random + +final class NGramTokenizerBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + NGramTokenizer(n).tokenize(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + NGramTokenizer(n).tokenize(string) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala new file mode 100755 index 0000000..84f136d --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Algorithm.scala @@ -0,0 +1,46 @@ +package com.rockymadden.stringmetric + +object Algorithm { + import Transform.StringTransform + + + trait Algorithm[A] { + def compute(a: A): Option[A] + } + + + trait StringAlgorithm extends Algorithm[Array[Char]] { + def compute(a: String): Option[String] + } + + + object StringAlgorithm { + final val Metaphone = phonetic.MetaphoneAlgorithm + final val Nysiis = phonetic.NysiisAlgorithm + final val RefinedNysiis = phonetic.RefinedNysiisAlgorithm + final val RefinedSoundex = phonetic.RefinedSoundexAlgorithm + final val Soundex = phonetic.SoundexAlgorithm + + def computeWithMetaphone(a: Array[Char]) = Metaphone.compute(a) + + def computeWithNysiis(a: Array[Char]) = Nysiis.compute(a) + + def computeWithRefinedNysiis(a: Array[Char]) = RefinedNysiis.compute(a) + + def computeWithRefinedSoundex(a: Array[Char]) = RefinedSoundex.compute(a) + + def computeWithSoundex(a: Array[Char]) = Soundex.compute(a) + } + + + final class StringAlgorithmDecorator(val sa: StringAlgorithm) { + val withTransform: (StringTransform => StringAlgorithm) = (st) => new StringAlgorithm { + private[this] val self: StringAlgorithm = sa + private[this] val transform: StringTransform = st + + override def compute(a: Array[Char]): Option[Array[Char]] = self.compute(transform(a)) + + override def compute(a: String): Option[String] = self.compute(transform(a.toCharArray)).map(_.mkString) + } + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala b/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala new file mode 100755 index 0000000..5e666d2 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Alphabet.scala @@ -0,0 +1,42 @@ +package com.rockymadden.stringmetric + +import scala.collection.immutable.Set + +object Alphabet { + sealed abstract class AlphabetSet(val chars: Set[Char]) { + def isSuperset(a: Char): Boolean = chars.contains(a) + + def isSuperset(a: Array[Char]): Boolean = a.length > 0 && a.takeWhile(chars.contains).length == a.length + + def isSuperset(a: String): Boolean = isSuperset(a.toCharArray) + } + + + case object LowercaseConsonant extends AlphabetSet( + Set('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x' ,'z') + ) + + case object UppercaseConsonant extends AlphabetSet( + Set('B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X' ,'Z') + ) + + case object Consonant extends AlphabetSet(LowercaseConsonant.chars ++ UppercaseConsonant.chars) + + case object LowercaseVowel extends AlphabetSet(Set('a', 'e', 'i', 'o', 'u')) + + case object UppercaseVowel extends AlphabetSet(Set('A', 'E', 'I', 'O', 'U')) + + case object Vowel extends AlphabetSet(LowercaseVowel.chars ++ UppercaseVowel.chars) + + case object LowercaseY extends AlphabetSet(Set('y')) + + case object UppercaseY extends AlphabetSet(Set('Y')) + + case object Y extends AlphabetSet(LowercaseY.chars ++ UppercaseY.chars) + + case object LowercaseAlpha extends AlphabetSet(LowercaseConsonant.chars ++ LowercaseVowel.chars ++ LowercaseY.chars) + + case object UppercaseAlpha extends AlphabetSet(UppercaseConsonant.chars ++ UppercaseVowel.chars ++ UppercaseY.chars) + + case object Alpha extends AlphabetSet(LowercaseAlpha.chars ++ UppercaseAlpha.chars) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala b/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala new file mode 100755 index 0000000..f45dd14 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Metric.scala @@ -0,0 +1,77 @@ +package com.rockymadden.stringmetric + +object Metric { + import Transform.StringTransform + + + trait Metric[A, B] { + def compare(a: A, b: A): Option[B] + } + + + trait StringMetric[A] extends Metric[Array[Char], A] { + def compare(a: String, b: String): Option[A] + } + + + object StringMetric { + final val DiceSorensen = similarity.DiceSorensenMetric + final val Hamming = similarity.HammingMetric + final val Jaccard = similarity.JaccardMetric + final val Jaro = similarity.JaroMetric + final val JaroWinkler = similarity.JaroWinklerMetric + final val Levenshtein = similarity.LevenshteinMetric + final val Metaphone = phonetic.MetaphoneMetric + final val NGram = similarity.NGramMetric + final val Nysiis = phonetic.NysiisMetric + final val Overlap = similarity.OverlapMetric + final val RefinedNysiis = phonetic.RefinedNysiisMetric + final val RefinedSoundex = phonetic.RefinedSoundexMetric + final val Soundex = phonetic.SoundexMetric + final val WeightedLevenshtein = similarity.WeightedLevenshteinMetric + + def compareWithDiceSorensen(n: Int)(a: Array[Char], b: Array[Char]) = DiceSorensen(n).compare(a, b) + + def compareWithHamming(a: Array[Char], b: Array[Char]) = Hamming.compare(a, b) + + def compareWithJaccard(n: Int)(a: Array[Char], b: Array[Char]) = Jaccard(n).compare(a, b) + + def compareWithJaro(a: Array[Char], b: Array[Char]) = Jaro.compare(a, b) + + def compareWithJaroWinkler(a: Array[Char], b: Array[Char]) = JaroWinkler.compare(a, b) + + def compareWithLevenshtein(a: Array[Char], b: Array[Char]) = Levenshtein.compare(a, b) + + def compareWithMetaphone(a: Array[Char], b: Array[Char]) = Metaphone.compare(a, b) + + def compareWithNGram(n: Int)(a: Array[Char], b: Array[Char]) = NGram(n).compare(a, b) + + def compareWithNysiis(a: Array[Char], b: Array[Char]) = Nysiis.compare(a, b) + + def compareWithOverlap(n: Int)(a: Array[Char], b: Array[Char]) = Overlap(n).compare(a, b) + + def compareWithRefinedNysiis(a: Array[Char], b: Array[Char]) = RefinedNysiis.compare(a, b) + + def compareWithRefinedSoundex(a: Array[Char], b: Array[Char]) = RefinedSoundex.compare(a, b) + + def compareWithSoundex(a: Array[Char], b: Array[Char]) = Soundex.compare(a, b) + + def compareWithWeightedLevenshtein(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) + (a: Array[Char], b: Array[Char]) = + + WeightedLevenshtein(delete, insert, substitute).compare(a, b) + } + + final class StringMetricDecorator[A](val sm: StringMetric[A]) { + val withTransform: (StringTransform => StringMetric[A]) = (st) => new StringMetric[A] { + private[this] val self: StringMetric[A] = sm + private[this] val transform: StringTransform = st + + override def compare(a: Array[Char], b: Array[Char]): Option[A] = + self.compare(transform(a), transform(b)) + + override def compare(a: String, b: String): Option[A] = + self.compare(transform(a.toCharArray), transform(b.toCharArray)) + } + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala b/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala new file mode 100755 index 0000000..a011c96 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Tokenize.scala @@ -0,0 +1,33 @@ +package com.rockymadden.stringmetric + +object Tokenize { + sealed trait Tokenizer[A] { + def tokenize(a: A): Option[Array[A]] + } + + + sealed trait StringTokenizer extends Tokenizer[Array[Char]] { + def tokenize(a: String): Option[Array[String]] + } + + + object StringTokenizer { + val NGram = NGramTokenizer + + def tokenizeWithNGram(n: Int)(charArray: Array[Char]) = NGram(n).tokenize(charArray) + } + + + final case class NGramTokenizer(n: Int) extends StringTokenizer { + override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = + if (n <= 0 || a.length < n) None + else Some(sequence(a, Array.empty[Array[Char]], n)) + + override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString)) + + @annotation.tailrec + private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) => + if (i.length <= n) o :+ i + else sequence(i.tail, o :+ i.take(n), n) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala new file mode 100644 index 0000000..c2cdace --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/Transform.scala @@ -0,0 +1,71 @@ +package com.rockymadden.stringmetric + +object Transform { + import scala.collection.immutable.NumericRange + + + type Transform[A] = (A => A) + type StringTransform = Transform[Array[Char]] + + + object StringTransform { + private final val Ascii = NumericRange(0x00, 0x7F, 1) + private final val ExtendedAscii = NumericRange(0x00, 0x7F, 1) + private final val Latin = NumericRange(0x00, 0x24F, 1) + private final val LowerCase = NumericRange(0x61, 0x7A, 1) + private final val Numbers = NumericRange(0x30, 0x39, 1) + private final val UpperCase = NumericRange(0x41, 0x5A, 1) + + private final val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filter(c => f(c)).mkString + + private final val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => + ca.filterNot(c => f(c)).mkString + + val filterAlpha: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) + + val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) + }) + + val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) + + val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { + val ci = c.toInt + LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + }) + + val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) + + val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) + + val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) + + val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) + + val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) + + val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) + + val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) + + val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) + + val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) + + val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) + + val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) + + val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) + + val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/package.scala b/core/src/main/scala/com/rockymadden/stringmetric/package.scala new file mode 100755 index 0000000..e5bc19d --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/package.scala @@ -0,0 +1,17 @@ +package com.rockymadden + +package object stringmetric { + import scala.language.implicitConversions + import Algorithm._ + import Metric._ + + type CompareTuple[T] = (Array[T], Array[T]) + type MatchTuple[T] = (Array[T], Array[T]) + + implicit def stringToCharArray(s: String): Array[Char] = + s.toCharArray + implicit def stringAlgorithmToDecoratedStringAlgorithm(sa: StringAlgorithm): StringAlgorithmDecorator = + new StringAlgorithmDecorator(sa) + implicit def stringMetricToDecoratedStringMetric[A](sa: StringMetric[A]): StringMetricDecorator[A] = + new StringMetricDecorator[A](sa) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala new file mode 100755 index 0000000..3abe7cc --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithm.scala @@ -0,0 +1,105 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object MetaphoneAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else { + val th = (transcodeHead andThen deduplicate)(a.map(_.toLower)) + val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char]) + + if (t.length == 0) None else Some(t) // Single Y or W would have 0 length. + } + + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) + + private val deduplicate: (Array[Char] => Array[Char]) = (ca) => + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last + + @annotation.tailrec + private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @annotation.switch) match { + case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o) + case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c) + case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b') + case 'c' => + if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k') + else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x') + else if ((r.length >= 1 && r.head == 'h') + || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x') + else if (l.length >= 1 && r.length >= 1 && l.last == 's' + && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's') + else shift(1, o :+ 'k') + case 'd' => + if (r.length >= 2 && r.head == 'g' + && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j') + else shift(1, o :+ 't') + case 'g' => + if ((r.length > 1 && r.head == 'h') + || (r.length == 1 && r.head == 'n') + || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o) + else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j') + else shift(1, o :+ 'k') + case 'h' => + if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head))) + || (l.length >= 2 && l.last == 'h' + && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p' + || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o) + else shift(1, o :+ 'h') + case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p') + case 'q' => shift(1, o :+ 'k') + case 's' => + if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x') + else shift(1, o :+ 's') + case 't' => + if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x') + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0') + else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o) + else shift(1, o :+ 't') + case 'v' => shift(1, o :+ 'f') + case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c) + case 'x' => shift(1, (o :+ 'k') :+ 's') + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + + private val transcodeHead: (Array[Char] => Array[Char]) = (ca) => + (ca.length: @annotation.switch) match { + case 0 => ca + case 1 => if (ca.head == 'x') Array('s') else ca + case _ => + (ca.head: @annotation.switch) match { + case 'a' if ca(1) == 'e' => ca.tail + case 'g' | 'k' | 'p' if ca(1) == 'n' => ca.tail + case 'w' if ca(1) == 'r' => ca.tail + case 'w' if ca(1) == 'h' => 'w' +: ca.drop(2) + case 'x' => 's' +: ca.tail + case _ => ca + } + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala new file mode 100755 index 0000000..d06f774 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala @@ -0,0 +1,15 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object MetaphoneMetric extends StringMetric[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else MetaphoneAlgorithm.compute(a).filter(_.length > 0).flatMap { mp1 => + MetaphoneAlgorithm.compute(b).filter(_.length > 0).map(mp1.sameElements(_)) + } + + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala new file mode 100755 index 0000000..3e46675 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithm.scala @@ -0,0 +1,115 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object NysiisAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else { + val tr = transcodeRight(a.map(_.toLower)) + val tl = transcodeLeft(tr._1) + val t = + if (tl._2.length == 0) tl._1 ++ tr._2 + else tl._1 ++ transcodeCenter( + Array.empty[Char], + tl._2.head, + if (tl._2.length > 1) tl._2.tail else Array.empty[Char], + Array.empty[Char] + ) ++ tr._2 + + if (t.length == 1) Some(t) + else Some(t.head +: (cleanLast andThen cleanTerminal andThen deduplicate)(t.tail)) + } + + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) + + private val cleanLast: (Array[Char] => Array[Char]) = (ca) => + if (ca.length == 0) ca + else if(ca.last == 'a' || ca.last == 's') + ca.dropRight(ca.reverseIterator.takeWhile(c => c == 'a' || c == 's').length) + else ca + + private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) => + if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' + else ca + + private val deduplicate: (Array[Char] => Array[Char]) = (ca) => + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) != a(1)).map(_(0)).toArray[Char] :+ ca.last + + @annotation.tailrec + private val transcodeCenter: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @annotation.switch) match { + case 'a' | 'i' | 'o' | 'u' => shift(1, o :+ 'a') + case 'b' | 'c' | 'd' | 'f' | 'g' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' | 'y' => shift(1, o :+ c) + case 'e' => + if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) + else shift(1, o :+ 'a') + case 'h' => + if (l.length >= 1 && (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head)))) + shift(1, o) + else shift(1, o :+ c) + case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') + case 'm' => shift(1, o :+ 'n') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) + case 'q' => shift(1, o :+ 'g') + case 's' => + if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) + else shift(1, o :+ c) + case 'w' => + if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) + else shift(1, o :+ c) + case 'z' => shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcodeCenter(t._1, t._2, t._3, t._4) + } + + private val transcodeLeft: (Array[Char] => (Array[Char], Array[Char])) = (ca) => + if (ca.length == 0) (Array.empty[Char], ca) + else { + lazy val tr2 = ca.takeRight(ca.length - 2) + lazy val tr3 = ca.takeRight(ca.length - 3) + + (ca.head: @annotation.switch) match { + case 'k' if ca.length >= 2 && ca(1) == 'n' => (Array('n', 'n'), tr2) + case 'k' => (Array('c'), ca.tail) + case 'm' if ca.length >= 3 && (ca(1) == 'a' && ca(2) == 'c') => (Array('m', 'c'), tr3) + case 'p' if ca.length >= 2 && (ca(1) == 'h' || ca(1) == 'f') => (Array('f', 'f'), tr2) + case 's' if ca.length >= 3 && (ca(1) == 'c' && ca(2) == 'h') => (Array('s', 's'), tr3) + case _ => (Array(ca.head), ca.tail) + } + } + + private val transcodeRight: (Array[Char] => (Array[Char], Array[Char])) = (ca) => + if (ca.length >= 2) { + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) + lazy val t2 = ca.take(ca.length - 2) + + (lc: @annotation.switch) match { + case 'd' if lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d')) + case 'e' if lcm1 == 'e' || lcm1 == 'i' => (t2, Array('y')) + case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => (t2, Array('d')) + case _ => (ca, Array.empty[Char]) + } + } else (ca, Array.empty[Char]) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala new file mode 100755 index 0000000..c9a0914 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala @@ -0,0 +1,24 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object NysiisMetric extends StringMetric[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { + val unequal: ((Char, Char) => Boolean) = (c1, c2) => { + val lc1 = c1.toLower + val lc2 = c2.toLower + + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) + } + + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (unequal(a.head, b.head)) Some(false) + else NysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { ny1 => + NysiisAlgorithm.compute(b).filter(_.length > 0).map(ny1.sameElements(_)) + } + } + + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala new file mode 100755 index 0000000..9976847 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithm.scala @@ -0,0 +1,121 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object RefinedNysiisAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel} + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else { + val lca = a.map(_.toLower) + val tlh = (transcodeHead andThen transcodeLast)(lca.head +: cleanLast(lca.tail, Set('s', 'z'))) + val t = transcode(Array.empty[Char], tlh.head, tlh.tail, Array.empty[Char]) + + if (t.length == 1) Some(t) + else Some(deduplicate( + t.head +: (cleanLast.tupled andThen cleanTerminal)(t.tail, Set('a')) + )) + } + + override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) + + private val cleanLast: ((Array[Char], Set[Char]) => Array[Char]) = (ca, s) => + if (ca.length == 0) ca + else if(s.contains(ca.last)) ca.dropRight(ca.reverseIterator.takeWhile(c => s.contains(c)).length) + else ca + + private val cleanTerminal: (Array[Char] => Array[Char]) = (ca) => + if (ca.length >= 2 && ca.last == 'y' && ca(ca.length - 2) == 'a') ca.dropRight(2) :+ 'y' + else ca + + private val deduplicate: (Array[Char] => Array[Char]) = (ca) => + if (ca.length <= 1) ca + else ca.sliding(2).withFilter(a => a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last + + @annotation.tailrec + private val transcode: ((Array[Char], Char, Array[Char], Array[Char]) => Array[Char]) = (l, c, r, o) => + if (c == '\0' && r.length == 0) o + else { + def shift(d: Int, ca: Array[Char]) = { + val sca = r.splitAt(d - 1) + + ( + if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c, + if (sca._2.length > 0) sca._2.head else '\0', + if (sca._2.length > 1) sca._2.tail else Array.empty[Char], + ca + ) + } + + val t = { + (c: @annotation.switch) match { + case 'a' | 'i' | 'o' | 'u' => + if (l.length == 0) shift(1, o :+ c) + else shift(1, o :+ 'a') + case 'b' | 'c' | 'f' | 'j' | 'l' | 'n' | 'r' | 't' | 'v' | 'x' => shift(1, o :+ c) + case 'd' => + if (r.length >= 1 && r.head == 'g') shift(2, o :+ 'g') else shift(1, o :+ c) + case 'e' => + if (l.length == 0) shift(1, o :+ c) + else if (r.length >= 1 && r.head == 'v') shift(2, o ++ Array('a', 'f')) + else shift(1, o :+ 'a') + case 'g' => + if (r.length >= 2 && r.head == 'h' && r(1) == 't') shift(3, o ++ Array('g', 't')) + else shift(1, o :+ c) + case 'h' => + if (l.length == 0) shift(1, o :+ c) + else if (!(LowercaseVowel isSuperset l.last) || (r.length >= 1 && !(LowercaseVowel isSuperset r.head))) + shift(1, o) + else shift(1, o :+ c) + case 'k' => if (r.length >= 1 && r.head == 'n') shift(2, o :+ 'n') else shift(1, o :+ 'c') + case 'm' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'n') + case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ c) + case 'q' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 'g') + case 's' => + if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(3, o :+ c) + else if (r.length >= 1 && r.head == 'h') shift(2, o :+ c) + else shift(1, o :+ c) + case 'w' => + if (l.length >= 1 && (LowercaseVowel isSuperset l.last)) shift(1, o) + else if (r.length >= 1 && r.head == 'r') shift(2, o :+ 'r') + else shift(1, o :+ c) + case 'y' => + if (l.length >= 1 && r.length >= 2 && r.head == 'w') shift(2, o :+ 'a') + else if (r.length >= 1 && r.head == 'w') shift(2, o :+ c) + else if (l.length >= 1 && r.length >= 1) shift(1, o :+ 'a') + else shift(1, o :+ c) + case 'z' => if (l.length == 0) shift(1, o :+ c) else shift(1, o :+ 's') + case _ => shift(1, o) + } + } + + transcode(t._1, t._2, t._3, t._4) + } + + private val transcodeHead: (Array[Char] => Array[Char]) = (ca) => + if (ca.length == 0) ca + else + (ca.head: @annotation.switch) match { + case 'm' if ca.length >= 3 && ca(1) == 'a' && ca(2) == 'c' => + Array('m', 'c') ++ ca.takeRight(ca.length - 3) + case 'p' if ca.length >= 2 && ca(1) == 'f' =>'f' +: ca.takeRight(ca.length - 2) + case _ => ca + } + + private val transcodeLast: (Array[Char] => Array[Char]) = (ca) => + if (ca.length >= 2) { + val lc = ca(ca.length - 1) + val lcm1 = ca(ca.length - 2) + lazy val t2 = ca.take(ca.length - 2) + + (lc: @annotation.switch) match { + case 'd' if lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd' + case 'e' if lcm1 == 'e' || lcm1 == 'i' || lcm1 =='y' => t2 :+ 'y' + case 't' if lcm1 == 'd' || lcm1 == 'n' || lcm1 == 'r' => t2 :+ 'd' + case 'x' if lcm1 == 'e' => t2 ++ Array('e', 'c') + case 'x' if lcm1 == 'i' => t2 ++ Array('i', 'c') + case _ => ca + } + } else ca +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala new file mode 100755 index 0000000..488f261 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -0,0 +1,24 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object RefinedNysiisMetric extends StringMetric[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = { + val unequal = (c1: Char, c2: Char) => { + val lc1 = c1.toLower + val lc2 = c2.toLower + + (if (lc1 == 'k') 'c' else lc1) != (if (lc2 == 'k') 'c' else lc2) + } + + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (unequal(a.head, b.head)) Some(false) + else RefinedNysiisAlgorithm.compute(a).filter(_.length > 0).flatMap { rny1 => + RefinedNysiisAlgorithm.compute(b).filter(_.length > 0).map(rny1.sameElements(_)) + } + } + + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala new file mode 100755 index 0000000..e8f3af6 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithm.scala @@ -0,0 +1,59 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object RefinedSoundexAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else Some(transcode(a, Array(a.head.toLower))) + + override def compute(a: String): Option[String] = compute(a.toCharArray).map(_.mkString) + + @annotation.tailrec + private val transcode: ((Array[Char], Array[Char]) => Array[Char]) = (i, o) => + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => (mc: @annotation.switch) match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' => '0' + case 'b' | 'p' => '1' + case 'f' | 'v' => '2' + case 'c' | 'k' | 's' => '3' + case 'g' | 'j' => '4' + case 'q' | 'x' | 'z' => '5' + case 'd' | 't' => '6' + case 'l' => '7' + case 'm' | 'n' => '8' + case 'r' => '9' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match { + case 'a' | 'e' | 'h' | 'i' | 'o' | 'u' | 'w' | 'y' if pc != '0' => '0' + case 'b' | 'p' if pc != '1' => '1' + case 'f' | 'v' if pc != '2' => '2' + case 'c' | 'k' | 's' if pc != '3' => '3' + case 'g' | 'j' if pc != '4' => '4' + case 'q' | 'x' | 'z' if pc != '5' => '5' + case 'd' | 't' if pc != '6' => '6' + case 'l' if pc != '7' => '7' + case 'm' | 'n' if pc != '8' => '8' + case 'r' if pc != '9' => '9' + case _ => '\0' + } + val a = + // Code twice. + if (o.length == 1) m2(c) + // Code once. + else m1( + c, + (o.last: @annotation.switch) match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => o.last + case _ => m2(o.last) + } + ) + + transcode(i.tail, if (a != '\0') o :+ a else o) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala new file mode 100755 index 0000000..289fe29 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object RefinedSoundexMetric extends StringMetric[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (a.head.toLower != b.head.toLower) Some(false) + else RefinedSoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { rse1 => + RefinedSoundexAlgorithm.compute(b).filter(_.length > 0).map(rse1.sameElements(_)) + } + + override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala new file mode 100755 index 0000000..b211908 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala @@ -0,0 +1,57 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object SoundexAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else { + val fc = a.head.toLower + + Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0')) + } + + override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) + + @annotation.tailrec + private val transcode: ((Array[Char], Char, Array[Char]) => Array[Char]) = (i, pc, o) => + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => (mc: @annotation.switch) match { + case 'b' | 'f' | 'p' | 'v' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' + case 'd' | 't' => '3' + case 'l' => '4' + case 'm' | 'n' => '5' + case 'r' => '6' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match { + case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' + case 'd' | 't' if pc != '3' => '3' + case 'l' if pc != '4' => '4' + case 'm' | 'n' if pc != '5' => '5' + case 'r' if pc != '6' => '6' + case _ => '\0' + } + val a = pc match { + // Code twice. + case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) + // Code once. + case _ => m1( + c, + (o.last: @annotation.switch) match { + case '1' | '2' | '3' | '4' | '5' | '6' => o.last + case _ => m2(o.last) + } + ) + } + + if (o.length == 3 && a != '\0') o :+ a + else transcode(i.tail, c, if (a != '\0') o :+ a else o) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala new file mode 100755 index 0000000..eca32db --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala @@ -0,0 +1,16 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object SoundexMetric extends StringMetric[Boolean] { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compare(a: Array[Char], b: Array[Char]): Option[Boolean] = + if (a.length == 0 || !(Alpha isSuperset a.head) || b.length == 0 || !(Alpha isSuperset b.head)) None + else if (a.head.toLower != b.head.toLower) Some(false) + else SoundexAlgorithm.compute(a).filter(_.length > 0).flatMap { se1 => + SoundexAlgorithm.compute(b).filter(_.length > 0).map(se1.sameElements(_)) + } + + final override def compare(a: String, b: String): Option[Boolean] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala new file mode 100755 index 0000000..0ad3915 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -0,0 +1,27 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +/** + * An implementation of the Dice/Sorensen metric. This implementation differs in that n-gram size is required. + * Traditionally, the algorithm uses bigrams. + */ +final case class DiceSorensenMetric(n: Int) extends StringMetric[Double] { + import com.rockymadden.stringmetric.Tokenize.NGramTokenizer + import com.rockymadden.stringmetric.MatchTuple + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) + + (2d * ms) / (ca1bg.length + ca2bg.length) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala new file mode 100755 index 0000000..4a90f32 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/HammingMetric.scala @@ -0,0 +1,18 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object HammingMetric extends StringMetric[Int] { + import com.rockymadden.stringmetric.CompareTuple + + override def compare(a: Array[Char], b: Array[Char]): Option[Int] = + if (a.length == 0 || b.length == 0 || a.length != b.length) None + else if (a.sameElements(b)) Some(0) + else Some(hamming(a, b)) + + override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) + + private val hamming: (CompareTuple[Char] => Int) = (ct) => + if (ct._1.length == 0) 0 + else ct._1.zip(ct._2).count(t => t._1 != t._2) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala new file mode 100755 index 0000000..6ec5db4 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -0,0 +1,20 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +final case class JaccardMetric(n: Int) extends StringMetric[Double] { + import com.rockymadden.stringmetric.Tokenize.NGramTokenizer + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => + val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length + + i.toDouble / (ca1bg.length + ca2bg.length - i) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala new file mode 100755 index 0000000..575d67a --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroMetric.scala @@ -0,0 +1,66 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric +import scala.Some + +/** + * An implementation of the Jaro metric. One differing detail in this implementation is that if a character is matched + * in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios. + */ +case object JaroMetric extends StringMetric[Double] { + import com.rockymadden.stringmetric.{CompareTuple, MatchTuple} + import scala.collection.mutable.{ArrayBuffer, HashSet} + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(1d) + else { + val mt = `match`(a, b) + val ms = scoreMatches(mt._1, mt._2) + + if (ms == 0) Some(0d) + else { + val ts = scoreTranspositions(mt._1, mt._2) + + Some(((ms.toDouble / a.length) + (ms.toDouble / b.length) + ((ms.toDouble - ts) / ms)) / 3) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private val `match`: (CompareTuple[Char] => MatchTuple[Char]) = (ct) => { + lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) + val one = ArrayBuffer.empty[Int] + val two = HashSet.empty[Int] + var i = 0 + var bi = false + + while (i < ct._1.length && !bi) { + val start = if (i - window <= 0) 0 else i - window + val end = if (i + window >= ct._2.length - 1) ct._2.length - 1 else i + window + + if (start > ct._2.length - 1) bi = !bi + else { + var ii = start + var bii = false + + while (ii <= end && !bii) { + if (!two.contains(ii) && ct._1(i) == ct._2(ii)) { + one += i + two += ii + bii = !bii + } else ii += 1 + } + + i += 1 + } + } + + (one.toArray.map(ct._1(_)), two.toArray.sortWith(_ < _).map(ct._2(_))) + } + + private val scoreMatches: (MatchTuple[Char] => Int) = (mt) => mt._1.length + + private val scoreTranspositions: (MatchTuple[Char] => Int) = (mt) => + (mt._1.zip(mt._2).count(t => t._1 != t._2) / 2d).floor.toInt +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala new file mode 100755 index 0000000..e83f73f --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala @@ -0,0 +1,23 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +/** + * An implementation of the Jaro-Winkler metric. One differing detail in this implementation is that if a character is + * matched in string2, it cannot be matched upon again. This results in a more penalized distance in these scenarios + * (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722). + */ +case object JaroWinklerMetric extends StringMetric[Double] { + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + JaroMetric.compare(a, b).map { + case 0d => 0d + case 1d => 1d + case jaro => { + val prefix = a.zip(b).takeWhile(t => t._1 == t._2) + + jaro + ((if (prefix.length <= 4) prefix.length else 4) * 0.1d * (1 - jaro)) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala new file mode 100755 index 0000000..fb90cdc --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetric.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object LevenshteinMetric extends StringMetric[Int] { + import com.rockymadden.stringmetric.CompareTuple + + override def compare(a: Array[Char], b: Array[Char]): Option[Int] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(0) + else Some(levenshtein(a, b)) + + override def compare(a: String, b: String): Option[Int] = compare(a.toCharArray, b.toCharArray) + + private val levenshtein: (CompareTuple[Char] => Int) = (ct) => { + val m = Array.fill[Int](ct._1.length + 1, ct._2.length + 1)(-1) + + def distance(t: (Int, Int)): Int = t match { + case (r, 0) => r + case (0, c) => c + case (r, c) if m(r)(c) != -1 => m(r)(c) + case (r, c) => { + val min = + if (ct._1(r - 1) == ct._2(c - 1)) distance(r - 1, c - 1) + else math.min( + math.min( + distance(r - 1, c) + 1, // Delete (left). + distance(r, c - 1) + 1 // Insert (up). + ), + distance(r - 1, c - 1) + 1 // Substitute (left-up). + ) + + m(r)(c) = min + min + } + } + + distance(ct._1.length, ct._2.length) + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala new file mode 100755 index 0000000..8c194ce --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -0,0 +1,24 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +final case class NGramMetric(n: Int) extends StringMetric[Double] { + import com.rockymadden.stringmetric.MatchTuple + import com.rockymadden.stringmetric.Tokenize.NGramTokenizer + import scala.math + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => + val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) + + ms.toDouble / math.max(ca1bg.length, ca2bg.length) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala new file mode 100755 index 0000000..8f0418b --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -0,0 +1,24 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +final case class OverlapMetric(n: Int) extends StringMetric[Double] { + import com.rockymadden.stringmetric.MatchTuple + import com.rockymadden.stringmetric.Tokenize.NGramTokenizer + import scala.math + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare. + else if (a.sameElements(b)) Some(1d) + else NGramTokenizer(n).tokenize(a).flatMap { ca1bg => + NGramTokenizer(n).tokenize(b).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) + + ms.toDouble / math.min(ca1bg.length, ca2bg.length) + } + } + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private val scoreMatches: (MatchTuple[String] => Int) = (mt) => mt._1.intersect(mt._2).length +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala new file mode 100755 index 0000000..fa113bc --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetric.scala @@ -0,0 +1,43 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +case object RatcliffObershelpMetric extends StringMetric[Double] { + import com.rockymadden.stringmetric.CompareTuple + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(1d) + else Some(2d * commonSequences(a, b).foldLeft(0)(_ + _.length) / (a.length + b.length)) + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private def longestCommonSubsequence(ct: CompareTuple[Char]) = { + val m = Array.ofDim[Int](ct._1.length + 1, ct._2.length + 1) + var lrc = (0, 0, 0) // Length, row, column. + + for (r <- 0 to ct._1.length - 1; c <- 0 to ct._2.length - 1) { + if (ct._1(r) == ct._2(c)) { + val l = m(r)(c) + 1 + m(r + 1)(c + 1) = l + if (l > lrc._1) lrc = (l, r + 1, c + 1) + } + } + + lrc + } + + private val commonSequences: (CompareTuple[Char] => Array[Array[Char]]) = (ct) => { + val lcs = longestCommonSubsequence(ct) + + if (lcs._1 == 0) Array.empty + else { + val sct1 = (ct._1.take(lcs._2 - lcs._1), ct._1.takeRight(ct._1.length - lcs._2)) + val sct2 = (ct._2.take(lcs._3 - lcs._1), ct._2.takeRight(ct._2.length - lcs._3)) + + Array(ct._1.slice(lcs._2 - lcs._1, lcs._2)) ++ + commonSequences(sct1._1, sct2._1) ++ + commonSequences(sct1._2, sct2._2) + } + } +} diff --git a/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala b/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala new file mode 100755 index 0000000..ae6f49c --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetric.scala @@ -0,0 +1,36 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.Metric.StringMetric + +final case class WeightedLevenshteinMetric(delete: BigDecimal, insert: BigDecimal, substitute: BigDecimal) + extends StringMetric[Double] { + + import com.rockymadden.stringmetric.CompareTuple + + override def compare(a: Array[Char], b: Array[Char]): Option[Double] = + if (a.length == 0 || b.length == 0) None + else if (a.sameElements(b)) Some(0d) + else Some(weightedLevenshtein((a, b), (delete, insert, substitute)).toDouble) + + override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray) + + private val weightedLevenshtein: ((CompareTuple[Char], (BigDecimal, BigDecimal, BigDecimal)) => BigDecimal) = + (ct, w) => { + val m = Array.ofDim[BigDecimal](ct._1.length + 1, ct._2.length + 1) + + for (r <- 0 to ct._1.length) m(r)(0) = w._1 * r + for (c <- 0 to ct._2.length) m(0)(c) = w._2 * c + + for (r <- 1 to ct._1.length; c <- 1 to ct._2.length) { + m(r)(c) = + if (ct._1(r - 1) == ct._2(c - 1)) m(r - 1)(c - 1) + else (m(r - 1)(c) + w._1).min( // Delete (left). + (m(r)(c - 1) + w._2).min( // Insert (up). + m(r - 1)(c - 1) + w._3 // Substitute (left-up). + ) + ) + } + + m(ct._1.length)(ct._2.length) + } +} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala new file mode 100644 index 0000000..d727145 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/AlgorithmSpec.scala @@ -0,0 +1,34 @@ +package com.rockymadden.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AlgorithmSpec extends ScalaTest { + import phonetic._ + import Algorithm._ + import Transform.StringTransform + + "StringAlgorithm" should provide { + "compute method and companion object pass through" in { + StringAlgorithm.computeWithMetaphone("testone").get should + equal (MetaphoneAlgorithm.compute("testone".toCharArray).get) + StringAlgorithm.computeWithNysiis("testone").get should + equal (NysiisAlgorithm.compute("testone".toCharArray).get) + StringAlgorithm.computeWithRefinedNysiis("testone").get should + equal (RefinedNysiisAlgorithm.compute("testone".toCharArray).get) + StringAlgorithm.computeWithRefinedSoundex("testone").get should + equal (RefinedSoundexAlgorithm.compute("testone".toCharArray).get) + StringAlgorithm.computeWithSoundex("testone").get should + equal (SoundexAlgorithm.compute("testone".toCharArray).get) + } + } + + "StringAlgorithmDecorator" should provide { + "withTransform()" in { + (MetaphoneAlgorithm withTransform StringTransform.filterAlpha).compute("abc123").get should + equal (MetaphoneAlgorithm.compute("abc").get) + } + } +} + diff --git a/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala new file mode 100755 index 0000000..5a6a1dc --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/AlphabetSpec.scala @@ -0,0 +1,95 @@ +package com.rockymadden.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class AlphabetSpec extends ScalaTest { "Alphabet" should provide { + import Alphabet.{Alpha, Vowel} + + "an overloaded isSuperset method which accepts Char" when passed { + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset '0' should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset 'a' should be (true) + Alpha isSuperset 'A' should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset 'y' should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset 'a' should be (true) + Vowel isSuperset 'A' should be (true) + } + } + } + "an overloaded isSuperset method which accepts Array[Char]" when passed { + "empty argument" should returns { + "false" in { + Alpha isSuperset Array.empty[Char] should be (false) + } + } + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset "hi!".toCharArray should be (false) + Alpha isSuperset "helloworld!".toCharArray should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset "hi".toCharArray should be (true) + Alpha isSuperset "helloworld".toCharArray should be (true) + Alpha isSuperset "HI".toCharArray should be (true) + Alpha isSuperset "HELLOWORLD".toCharArray should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset "y".toCharArray should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset "a".toCharArray should be (true) + Vowel isSuperset "A".toCharArray should be (true) + } + } + } + "an overloaded isSuperset method which accepts String" when passed { + "empty argument" should returns { + "false" in { + Alpha isSuperset "" should be (false) + } + } + "non-alphabet argument" should returns { + "false" in { + Alpha isSuperset "helloworld!" should be (false) + } + } + "alphabet argument" should returns { + "true" in { + Alpha isSuperset "helloworld" should be (true) + Alpha isSuperset "HELLOWORLD" should be (true) + } + } + "non-vowel argument" should returns { + "false" in { + Vowel isSuperset "y" should be (false) + } + } + "vowel argument" should returns { + "true" in { + Vowel isSuperset "a" should be (true) + Vowel isSuperset "A" should be (true) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala new file mode 100644 index 0000000..3b9021d --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/MetricSpec.scala @@ -0,0 +1,56 @@ +package com.rockymadden.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetricSpec extends ScalaTest { + import phonetic._ + import similarity._ + import Metric._ + import Transform.StringTransform + + "StringMetric standalone object" should provide { + "compare method and companion object pass through" in { + StringMetric.compareWithDiceSorensen(1)("testone", "testtwo").get should + equal (DiceSorensenMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithHamming("testone", "testtwo").get should + equal (HammingMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithJaccard(1)("testone", "testtwo").get should + equal (JaccardMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithJaro("testone", "testtwo").get should + equal (JaroMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithJaroWinkler("testone", "testtwo").get should + equal (JaroWinklerMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithLevenshtein("testone", "testtwo").get should + equal (LevenshteinMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithMetaphone("testone", "testtwo").get should + equal (MetaphoneMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithNGram(1)("testone", "testtwo").get should + equal (NGramMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithNysiis("testone", "testtwo").get should + equal (NysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithOverlap(1)("testone", "testtwo").get should + equal (OverlapMetric(1).compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithRefinedNysiis("testone", "testtwo").get should + equal (RefinedNysiisMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithRefinedSoundex("testone", "testtwo").get should + equal (RefinedSoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithSoundex("testone", "testtwo").get should + equal (SoundexMetric.compare("testone".toCharArray, "testtwo".toCharArray).get) + StringMetric.compareWithWeightedLevenshtein(1, 2, 3)("testone", "testtwo").get should + equal (WeightedLevenshteinMetric(1, 2, 3).compare("testone".toCharArray, "testtwo".toCharArray).get) + } + } + + "StringMetricDecorator" should provide { + "withTransform()" in { + (MetaphoneMetric withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get should + equal (true) + (DiceSorensenMetric(1) withTransform StringTransform.filterAlpha).compare("abc123", "abc456").get should + equal (1.0) + + (MetaphoneMetric withTransform (StringTransform.filterAlpha andThen StringTransform.filterUpperCase)).compare("abc123", "abc456") + } + } +} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/ScalaTest.scala b/core/src/test/scala/com/rockymadden/stringmetric/ScalaTest.scala new file mode 100755 index 0000000..5f4ab62 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/ScalaTest.scala @@ -0,0 +1,18 @@ +package com.rockymadden.stringmetric + +import org.scalatest.{BeforeAndAfter, ParallelTestExecution, WordSpec} +import org.scalatest.matchers.ShouldMatchers + +trait ScalaTest extends WordSpec with ShouldMatchers with BeforeAndAfter with ParallelTestExecution { + def allows = afterWord("allow") + + def executes = afterWord("execute") + + def passed = afterWord("passed") + + def provide = afterWord("provide") + + def returns = afterWord("return") + + def throws = afterWord("throw") +} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala new file mode 100755 index 0000000..cfba0f7 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala @@ -0,0 +1,45 @@ +package com.rockymadden.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class TokenizeSpec extends ScalaTest { "NGramTokenizer" should provide { + import Tokenize._ + + "tokenize method" when passed { + "empty argument" should returns { + "None" in { + NGramTokenizer(1).tokenize("").isDefined should be (false) + } + } + "invalid n argument" should returns { + "None" in { + NGramTokenizer(0).tokenize("").isDefined should be (false) + NGramTokenizer(-1).tokenize("").isDefined should be (false) + } + } + "valid argument" should returns { + "Array[String]" in { + NGramTokenizer(1).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( + Array( + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" + ) + ) + NGramTokenizer(2).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( + Array( + "ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op", + "pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz" + ) + ) + NGramTokenizer(3).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal ( + Array( + "abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno", + "nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz" + ) + ) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala new file mode 100644 index 0000000..5e79c62 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/TransformSpec.scala @@ -0,0 +1,181 @@ +package com.rockymadden.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class TransformSpec extends ScalaTest { "StringTransform" should provide { + import Transform._ + + "filterAlpha()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterAlpha( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ("aBc".toCharArray) + } + } + } + "filterNotAlpha()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotAlpha( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("123" + 0x250.toChar).toCharArray + ) + } + } + } + "filterAlphaNumeric()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterAlphaNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ("aBc123".toCharArray) + } + } + } + "filterNotAlphaNumeric()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotAlphaNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("" + 0x250.toChar).toCharArray + ) + } + } + } + "filterAscii()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterAscii( + ("aBc" + 0x80.toChar).toCharArray + ) should equal ("aBc".toCharArray) + } + } + } + "filterNotAscii()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotAscii( + ("aBc" + 0x100.toChar).toCharArray + ) should equal ( + ("" + 0x100.toChar).toCharArray + ) + } + } + } + "filterExtendedAscii()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterExtendedAscii( + ("aBc" + 0x100.toChar).toCharArray + ) should equal ("aBc".toCharArray) + } + } + } + "filterNotExtendedAscii()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotExtendedAscii( + ("aBc" + 0x250.toChar).toCharArray + ) should equal ( + ("" + 0x250.toChar).toCharArray + ) + } + } + } + "filterLatin()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterLatin( + ("aBc" + 0x250.toChar).toCharArray + ) should equal ("aBc".toCharArray) + } + } + } + "filterNotLatin()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotLatin( + ("aBc" + 0x300.toChar).toCharArray + ) should equal ( + ("" + 0x300.toChar).toCharArray + ) + } + } + } + "filterLowerCase()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterLowerCase( + "aBc123" + 0x250.toChar + ) should equal ("ac".toCharArray) + } + } + } + "filterNotLowerCase()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotLowerCase( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("B123" + 0x250.toChar).toCharArray + ) + } + } + } + "filterNumeric()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ("123".toCharArray) + } + } + } + "filterNotNumeric()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotNumeric( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("aBc" + 0x250.toChar).toCharArray + ) + } + } + } + "filterUpperCase()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterUpperCase( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ("B".toCharArray) + } + } + } + "filterNotUpperCase()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.filterNotUpperCase( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("ac123" + 0x250.toChar).toCharArray + ) + } + } + } + "ignoreAlphaCase()" when passed { + "String" should returns { + "transformed String" in { + StringTransform.ignoreAlphaCase( + ("aBc123" + 0x250.toChar).toCharArray + ) should equal ( + ("abc123" + 0x250.toChar).toCharArray + ) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala new file mode 100755 index 0000000..6f56b6d --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneAlgorithmSpec.scala @@ -0,0 +1,211 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetaphoneAlgorithmSpec extends ScalaTest { "MetaphoneAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + MetaphoneAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + MetaphoneAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // z + MetaphoneAlgorithm.compute("z").get should equal ("s") + MetaphoneAlgorithm.compute("zz").get should equal ("s") + + // y + MetaphoneAlgorithm.compute("y").isDefined should be (false) + MetaphoneAlgorithm.compute("zy").get should equal ("s") + MetaphoneAlgorithm.compute("zyz").get should equal ("ss") + MetaphoneAlgorithm.compute("zya").get should equal ("sy") + + // x + MetaphoneAlgorithm.compute("x").get should equal ("s") + MetaphoneAlgorithm.compute("zx").get should equal ("sks") + MetaphoneAlgorithm.compute("zxz").get should equal ("skss") + + // w + MetaphoneAlgorithm.compute("w").isDefined should be (false) + MetaphoneAlgorithm.compute("zw").get should equal ("s") + MetaphoneAlgorithm.compute("zwz").get should equal ("ss") + MetaphoneAlgorithm.compute("zwa").get should equal ("sw") + + // v + MetaphoneAlgorithm.compute("v").get should equal ("f") + MetaphoneAlgorithm.compute("zv").get should equal ("sf") + MetaphoneAlgorithm.compute("zvz").get should equal ("sfs") + + // u + MetaphoneAlgorithm.compute("u").get should equal ("u") + MetaphoneAlgorithm.compute("zu").get should equal ("s") + + // t + MetaphoneAlgorithm.compute("t").get should equal ("t") + MetaphoneAlgorithm.compute("ztiaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("ztioz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zthz").get should equal ("s0s") + MetaphoneAlgorithm.compute("ztchz").get should equal ("sxs") + MetaphoneAlgorithm.compute("ztz").get should equal ("sts") + + // s + MetaphoneAlgorithm.compute("s").get should equal ("s") + MetaphoneAlgorithm.compute("zshz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zsioz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zsiaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zs").get should equal ("ss") + MetaphoneAlgorithm.compute("zsz").get should equal ("sss") + + // r + MetaphoneAlgorithm.compute("r").get should equal ("r") + MetaphoneAlgorithm.compute("zr").get should equal ("sr") + MetaphoneAlgorithm.compute("zrz").get should equal ("srs") + + // q + MetaphoneAlgorithm.compute("q").get should equal ("k") + MetaphoneAlgorithm.compute("zq").get should equal ("sk") + MetaphoneAlgorithm.compute("zqz").get should equal ("sks") + + // p + MetaphoneAlgorithm.compute("p").get should equal ("p") + MetaphoneAlgorithm.compute("zp").get should equal ("sp") + MetaphoneAlgorithm.compute("zph").get should equal ("sf") + MetaphoneAlgorithm.compute("zpz").get should equal ("sps") + + // o + MetaphoneAlgorithm.compute("o").get should equal ("o") + MetaphoneAlgorithm.compute("zo").get should equal ("s") + + // n + MetaphoneAlgorithm.compute("n").get should equal ("n") + MetaphoneAlgorithm.compute("zn").get should equal ("sn") + MetaphoneAlgorithm.compute("znz").get should equal ("sns") + + // m + MetaphoneAlgorithm.compute("m").get should equal ("m") + MetaphoneAlgorithm.compute("zm").get should equal ("sm") + MetaphoneAlgorithm.compute("zmz").get should equal ("sms") + + // l + MetaphoneAlgorithm.compute("l").get should equal ("l") + MetaphoneAlgorithm.compute("zl").get should equal ("sl") + MetaphoneAlgorithm.compute("zlz").get should equal ("sls") + + // k + MetaphoneAlgorithm.compute("k").get should equal ("k") + MetaphoneAlgorithm.compute("zk").get should equal ("sk") + MetaphoneAlgorithm.compute("zck").get should equal ("sk") + + // j + MetaphoneAlgorithm.compute("j").get should equal ("j") + MetaphoneAlgorithm.compute("zj").get should equal ("sj") + MetaphoneAlgorithm.compute("zjz").get should equal ("sjs") + + // i + MetaphoneAlgorithm.compute("i").get should equal ("i") + MetaphoneAlgorithm.compute("zi").get should equal ("s") + + // h + MetaphoneAlgorithm.compute("h").get should equal ("h") // php wrongly says nothing + MetaphoneAlgorithm.compute("zh").get should equal ("sh") // php wrongly says s + MetaphoneAlgorithm.compute("zah").get should equal ("s") + MetaphoneAlgorithm.compute("zchh").get should equal ("sx") + MetaphoneAlgorithm.compute("ha").get should equal ("h") + + // g + MetaphoneAlgorithm.compute("g").get should equal ("k") + MetaphoneAlgorithm.compute("zg").get should equal ("sk") + MetaphoneAlgorithm.compute("zgh").get should equal ("skh") // php wrongly says sf + MetaphoneAlgorithm.compute("zghz").get should equal ("shs") // php wrongly says sfs + MetaphoneAlgorithm.compute("zgha").get should equal ("sh") // php wrongly says sf others wrongly say skh + MetaphoneAlgorithm.compute("zgn").get should equal ("sn") + MetaphoneAlgorithm.compute("zgns").get should equal ("skns") + MetaphoneAlgorithm.compute("zgned").get should equal ("snt") // others wrongly says sknt + MetaphoneAlgorithm.compute("zgneds").get should equal ("sknts") // php wrongly says snts + MetaphoneAlgorithm.compute("zgi").get should equal ("sj") + MetaphoneAlgorithm.compute("zgiz").get should equal ("sjs") + MetaphoneAlgorithm.compute("zge").get should equal ("sj") + MetaphoneAlgorithm.compute("zgez").get should equal ("sjs") + MetaphoneAlgorithm.compute("zgy").get should equal ("sj") + MetaphoneAlgorithm.compute("zgyz").get should equal ("sjs") + MetaphoneAlgorithm.compute("zgz").get should equal ("sks") + + // f + MetaphoneAlgorithm.compute("f").get should equal ("f") + MetaphoneAlgorithm.compute("zf").get should equal ("sf") + MetaphoneAlgorithm.compute("zfz").get should equal ("sfs") + + // e + MetaphoneAlgorithm.compute("e").get should equal ("e") + MetaphoneAlgorithm.compute("ze").get should equal ("s") + + // d + MetaphoneAlgorithm.compute("d").get should equal ("t") + MetaphoneAlgorithm.compute("fudge").get should equal ("fjj") // php wrongly says fj + MetaphoneAlgorithm.compute("dodgy").get should equal ("tjj") // php wrongly says tj others wrongly say tjjy + MetaphoneAlgorithm.compute("dodgi").get should equal ("tjj") // php wrongly says tj + MetaphoneAlgorithm.compute("zd").get should equal ("st") + MetaphoneAlgorithm.compute("zdz").get should equal ("sts") + + // c + MetaphoneAlgorithm.compute("c").get should equal ("k") + MetaphoneAlgorithm.compute("zcia").get should equal ("sx") + MetaphoneAlgorithm.compute("zciaz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zch").get should equal ("sx") + MetaphoneAlgorithm.compute("zchz").get should equal ("sxs") + MetaphoneAlgorithm.compute("zci").get should equal ("ss") + MetaphoneAlgorithm.compute("zciz").get should equal ("sss") + MetaphoneAlgorithm.compute("zce").get should equal ("ss") + MetaphoneAlgorithm.compute("zcez").get should equal ("sss") + MetaphoneAlgorithm.compute("zcy").get should equal ("ss") + MetaphoneAlgorithm.compute("zcyz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsci").get should equal ("ss") + MetaphoneAlgorithm.compute("zsciz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsce").get should equal ("ss") + MetaphoneAlgorithm.compute("zscez").get should equal ("sss") + MetaphoneAlgorithm.compute("zscy").get should equal ("ss") + MetaphoneAlgorithm.compute("zscyz").get should equal ("sss") + MetaphoneAlgorithm.compute("zsch").get should equal ("sskh") // php wrongly says ssx + MetaphoneAlgorithm.compute("zc").get should equal ("sk") + MetaphoneAlgorithm.compute("zcz").get should equal ("sks") + + // b + MetaphoneAlgorithm.compute("b").get should equal ("b") + MetaphoneAlgorithm.compute("zb").get should equal ("sb") + MetaphoneAlgorithm.compute("zbz").get should equal ("sbs") + MetaphoneAlgorithm.compute("zmb").get should equal ("sm") + + // a + MetaphoneAlgorithm.compute("a").get should equal ("a") + MetaphoneAlgorithm.compute("za").get should equal ("s") + + // Miscellaneous. + MetaphoneAlgorithm.compute("dumb").get should equal ("tm") + MetaphoneAlgorithm.compute("smith").get should equal ("sm0") + MetaphoneAlgorithm.compute("school").get should equal ("skhl") // php wrongly says sxl + MetaphoneAlgorithm.compute("merci").get should equal ("mrs") + MetaphoneAlgorithm.compute("cool").get should equal ("kl") + MetaphoneAlgorithm.compute("aebersold").get should equal ("ebrslt") + MetaphoneAlgorithm.compute("gnagy").get should equal ("nj") + MetaphoneAlgorithm.compute("knuth").get should equal ("n0") + MetaphoneAlgorithm.compute("pniewski").get should equal ("nsk") + MetaphoneAlgorithm.compute("wright").get should equal ("rht") // php wrongly says rft + MetaphoneAlgorithm.compute("phone").get should equal ("fn") + MetaphoneAlgorithm.compute("aggregate").get should equal ("akrkt") + MetaphoneAlgorithm.compute("accuracy").get should equal ("akkrs") + MetaphoneAlgorithm.compute("encyclopedia").get should equal ("ensklpt") + MetaphoneAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hnrfkblttnttbs") + MetaphoneAlgorithm.compute("antidisestablishmentarianism").get should equal ("anttsstblxmntrnsm") + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala new file mode 100755 index 0000000..05401b0 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetricSpec.scala @@ -0,0 +1,39 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class MetaphoneMetricSpec extends ScalaTest { "MetaphoneMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + MetaphoneMetric.compare("", "").isDefined should be (false) + MetaphoneMetric.compare("abc", "").isDefined should be (false) + MetaphoneMetric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + MetaphoneMetric.compare("123", "123").isDefined should be (false) + MetaphoneMetric.compare("123", "").isDefined should be (false) + MetaphoneMetric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + MetaphoneMetric.compare("dumb", "dum").get should be (true) + MetaphoneMetric.compare("smith", "smeth").get should be (true) + MetaphoneMetric.compare("merci", "mercy").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + MetaphoneMetric.compare("dumb", "gum").get should be (false) + MetaphoneMetric.compare("smith", "kiss").get should be (false) + MetaphoneMetric.compare("merci", "burpy").get should be (false) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala new file mode 100755 index 0000000..db8e073 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisAlgorithmSpec.scala @@ -0,0 +1,189 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NysiisAlgorithmSpec extends ScalaTest { "NysiisAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + NysiisAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + NysiisAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + NysiisAlgorithm.compute("a").get should equal ("a") + NysiisAlgorithm.compute("aa").get should equal ("a") + + // b + NysiisAlgorithm.compute("b").get should equal ("b") + NysiisAlgorithm.compute("bb").get should equal ("bb") + + // c + NysiisAlgorithm.compute("c").get should equal ("c") + NysiisAlgorithm.compute("cc").get should equal ("cc") + + // d + NysiisAlgorithm.compute("d").get should equal ("d") + NysiisAlgorithm.compute("dd").get should equal ("dd") + + // e + NysiisAlgorithm.compute("e").get should equal ("e") + NysiisAlgorithm.compute("ee").get should equal ("y") + + // f + NysiisAlgorithm.compute("f").get should equal ("f") + NysiisAlgorithm.compute("ff").get should equal ("ff") + + // g + NysiisAlgorithm.compute("g").get should equal ("g") + NysiisAlgorithm.compute("gg").get should equal ("gg") + + // h + NysiisAlgorithm.compute("h").get should equal ("h") + NysiisAlgorithm.compute("hh").get should equal ("hh") + + // i + NysiisAlgorithm.compute("i").get should equal ("i") + NysiisAlgorithm.compute("ii").get should equal ("i") + + // j + NysiisAlgorithm.compute("j").get should equal ("j") + NysiisAlgorithm.compute("jj").get should equal ("jj") + + // k + NysiisAlgorithm.compute("k").get should equal ("c") + NysiisAlgorithm.compute("kk").get should equal ("cc") + + // l + NysiisAlgorithm.compute("l").get should equal ("l") + NysiisAlgorithm.compute("ll").get should equal ("ll") + + // m + NysiisAlgorithm.compute("m").get should equal ("m") + NysiisAlgorithm.compute("mm").get should equal ("mn") + + // n + NysiisAlgorithm.compute("n").get should equal ("n") + NysiisAlgorithm.compute("nn").get should equal ("nn") + + // o + NysiisAlgorithm.compute("o").get should equal ("o") + NysiisAlgorithm.compute("oo").get should equal ("o") + + // p + NysiisAlgorithm.compute("p").get should equal ("p") + NysiisAlgorithm.compute("pp").get should equal ("pp") + + // q + NysiisAlgorithm.compute("q").get should equal ("q") + NysiisAlgorithm.compute("qq").get should equal ("qg") + + // r + NysiisAlgorithm.compute("r").get should equal ("r") + NysiisAlgorithm.compute("rr").get should equal ("rr") + + // s + NysiisAlgorithm.compute("s").get should equal ("s") + NysiisAlgorithm.compute("ss").get should equal ("s") + + // t + NysiisAlgorithm.compute("t").get should equal ("t") + NysiisAlgorithm.compute("tt").get should equal ("tt") + + // u + NysiisAlgorithm.compute("u").get should equal ("u") + NysiisAlgorithm.compute("uu").get should equal ("u") + + // v + NysiisAlgorithm.compute("v").get should equal ("v") + NysiisAlgorithm.compute("vv").get should equal ("vv") + + // w + NysiisAlgorithm.compute("w").get should equal ("w") + NysiisAlgorithm.compute("ww").get should equal ("ww") + + // x + NysiisAlgorithm.compute("x").get should equal ("x") + NysiisAlgorithm.compute("xx").get should equal ("xx") + + // y + NysiisAlgorithm.compute("y").get should equal ("y") + NysiisAlgorithm.compute("yy").get should equal ("yy") + + // z + NysiisAlgorithm.compute("z").get should equal ("z") + NysiisAlgorithm.compute("zz").get should equal ("z") + + // Head cases. + NysiisAlgorithm.compute("mac").get should equal ("mc") + NysiisAlgorithm.compute("kn").get should equal ("nn") + NysiisAlgorithm.compute("k").get should equal ("c") + NysiisAlgorithm.compute("ph").get should equal ("ff") + NysiisAlgorithm.compute("pf").get should equal ("ff") + NysiisAlgorithm.compute("sch").get should equal ("s") // dropby wrongly says ss + + // Last cases. + NysiisAlgorithm.compute("ee").get should equal ("y") + NysiisAlgorithm.compute("ie").get should equal ("y") + NysiisAlgorithm.compute("dt").get should equal ("d") + NysiisAlgorithm.compute("rt").get should equal ("d") + NysiisAlgorithm.compute("rd").get should equal ("d") + NysiisAlgorithm.compute("nt").get should equal ("d") + NysiisAlgorithm.compute("nd").get should equal ("d") + + // Core cases. + NysiisAlgorithm.compute("eev").get should equal ("eaf") + NysiisAlgorithm.compute("zev").get should equal ("zaf") + NysiisAlgorithm.compute("kkn").get should equal ("cn") + NysiisAlgorithm.compute("sschn").get should equal ("ssn") + NysiisAlgorithm.compute("pph").get should equal ("pf") + + // Miscellaneous. + NysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") + NysiisAlgorithm.compute("phone").get should equal ("ffan") + NysiisAlgorithm.compute("aggregate").get should equal ("agragat") + NysiisAlgorithm.compute("accuracy").get should equal ("acaracy") + NysiisAlgorithm.compute("encyclopedia").get should equal ("encyclapad") + NysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") + NysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") + + // Dropby. + NysiisAlgorithm.compute("macintosh").get should equal ("mcant") + NysiisAlgorithm.compute("knuth").get should equal ("nnat") + NysiisAlgorithm.compute("koehn").get should equal ("can") // dropby wrongly says c + NysiisAlgorithm.compute("phillipson").get should equal ("ffalapsan") + NysiisAlgorithm.compute("pfeister").get should equal ("ffastar") + NysiisAlgorithm.compute("schoenhoeft").get should equal ("ssanaft") + NysiisAlgorithm.compute("mckee").get should equal ("mcy") + NysiisAlgorithm.compute("heitschmedt").get should equal ("hatsnad") + NysiisAlgorithm.compute("bart").get should equal ("bad") + NysiisAlgorithm.compute("hurd").get should equal ("had") + NysiisAlgorithm.compute("hunt").get should equal ("had") + NysiisAlgorithm.compute("westerlund").get should equal ("wastarlad") + NysiisAlgorithm.compute("casstevens").get should equal ("castafan") + NysiisAlgorithm.compute("vasquez").get should equal ("vasg") + NysiisAlgorithm.compute("frazier").get should equal ("frasar") + NysiisAlgorithm.compute("bowman").get should equal ("banan") + NysiisAlgorithm.compute("mcknight").get should equal ("mcnagt") + NysiisAlgorithm.compute("rickert").get should equal ("racad") + NysiisAlgorithm.compute("deutsch").get should equal ("dat") // dropby wrongly says dats + NysiisAlgorithm.compute("westphal").get should equal ("wastfal") + NysiisAlgorithm.compute("shriver").get should equal ("shravar") + NysiisAlgorithm.compute("kuhl").get should equal ("cal") // dropby wrongly says c + NysiisAlgorithm.compute("rawson").get should equal ("rasan") + NysiisAlgorithm.compute("jiles").get should equal ("jal") + NysiisAlgorithm.compute("carraway").get should equal ("caray") + NysiisAlgorithm.compute("yamada").get should equal ("yanad") + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala new file mode 100755 index 0000000..7e52101 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/NysiisMetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NysiisMetricSpec extends ScalaTest { + "NysiisMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + NysiisMetric.compare("", "").isDefined should be (false) + NysiisMetric.compare("abc", "").isDefined should be (false) + NysiisMetric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + NysiisMetric.compare("123", "123").isDefined should be (false) + NysiisMetric.compare("123", "").isDefined should be (false) + NysiisMetric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + NysiisMetric.compare("ham", "hum").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + NysiisMetric.compare("dumb", "gum").get should be (false) + } + } + } + } +} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala new file mode 100755 index 0000000..9602f16 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisAlgorithmSpec.scala @@ -0,0 +1,206 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedNysiisAlgorithmSpec extends ScalaTest { "RefinedNysiisAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + RefinedNysiisAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + RefinedNysiisAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + RefinedNysiisAlgorithm.compute("a").get should equal ("a") + RefinedNysiisAlgorithm.compute("aa").get should equal ("a") + + // b + RefinedNysiisAlgorithm.compute("b").get should equal ("b") + RefinedNysiisAlgorithm.compute("bb").get should equal ("b") + + // c + RefinedNysiisAlgorithm.compute("c").get should equal ("c") + RefinedNysiisAlgorithm.compute("cc").get should equal ("c") + + // d + RefinedNysiisAlgorithm.compute("d").get should equal ("d") + RefinedNysiisAlgorithm.compute("dd").get should equal ("d") + + // e + RefinedNysiisAlgorithm.compute("e").get should equal ("e") + RefinedNysiisAlgorithm.compute("ee").get should equal ("y") + + // f + RefinedNysiisAlgorithm.compute("f").get should equal ("f") + RefinedNysiisAlgorithm.compute("ff").get should equal ("f") + + // g + RefinedNysiisAlgorithm.compute("g").get should equal ("g") + RefinedNysiisAlgorithm.compute("gg").get should equal ("g") + + // h + RefinedNysiisAlgorithm.compute("h").get should equal ("h") + RefinedNysiisAlgorithm.compute("hh").get should equal ("h") + + // i + RefinedNysiisAlgorithm.compute("i").get should equal ("i") + RefinedNysiisAlgorithm.compute("ii").get should equal ("i") + + // j + RefinedNysiisAlgorithm.compute("j").get should equal ("j") + RefinedNysiisAlgorithm.compute("jj").get should equal ("j") + + // k + RefinedNysiisAlgorithm.compute("k").get should equal ("c") + RefinedNysiisAlgorithm.compute("kk").get should equal ("c") + + // l + RefinedNysiisAlgorithm.compute("l").get should equal ("l") + RefinedNysiisAlgorithm.compute("ll").get should equal ("l") + + // m + RefinedNysiisAlgorithm.compute("m").get should equal ("m") + RefinedNysiisAlgorithm.compute("mm").get should equal ("mn") + + // n + RefinedNysiisAlgorithm.compute("n").get should equal ("n") + RefinedNysiisAlgorithm.compute("nn").get should equal ("n") + + // o + RefinedNysiisAlgorithm.compute("o").get should equal ("o") + RefinedNysiisAlgorithm.compute("oo").get should equal ("o") + + // p + RefinedNysiisAlgorithm.compute("p").get should equal ("p") + RefinedNysiisAlgorithm.compute("pp").get should equal ("p") + + // q + RefinedNysiisAlgorithm.compute("q").get should equal ("q") + RefinedNysiisAlgorithm.compute("qq").get should equal ("qg") + + // r + RefinedNysiisAlgorithm.compute("r").get should equal ("r") + RefinedNysiisAlgorithm.compute("rr").get should equal ("r") + + // s + RefinedNysiisAlgorithm.compute("s").get should equal ("s") + RefinedNysiisAlgorithm.compute("ss").get should equal ("s") + + // t + RefinedNysiisAlgorithm.compute("t").get should equal ("t") + RefinedNysiisAlgorithm.compute("tt").get should equal ("t") + + // u + RefinedNysiisAlgorithm.compute("u").get should equal ("u") + RefinedNysiisAlgorithm.compute("uu").get should equal ("u") + + // v + RefinedNysiisAlgorithm.compute("v").get should equal ("v") + RefinedNysiisAlgorithm.compute("vv").get should equal ("v") + + // w + RefinedNysiisAlgorithm.compute("w").get should equal ("w") + RefinedNysiisAlgorithm.compute("ww").get should equal ("w") + + // x + RefinedNysiisAlgorithm.compute("x").get should equal ("x") + RefinedNysiisAlgorithm.compute("xx").get should equal ("x") + + // y + RefinedNysiisAlgorithm.compute("y").get should equal ("y") + RefinedNysiisAlgorithm.compute("yy").get should equal ("y") + RefinedNysiisAlgorithm.compute("ybyb").get should equal ("ybab") + + // z + RefinedNysiisAlgorithm.compute("z").get should equal ("z") + RefinedNysiisAlgorithm.compute("zz").get should equal ("z") + + // Head cases. + RefinedNysiisAlgorithm.compute("mac").get should equal ("mc") + RefinedNysiisAlgorithm.compute("pf").get should equal ("f") + + // Last cases. + RefinedNysiisAlgorithm.compute("ix").get should equal ("ic") + RefinedNysiisAlgorithm.compute("ex").get should equal ("ec") + RefinedNysiisAlgorithm.compute("ye").get should equal ("y") + RefinedNysiisAlgorithm.compute("ee").get should equal ("y") + RefinedNysiisAlgorithm.compute("ie").get should equal ("y") + RefinedNysiisAlgorithm.compute("dt").get should equal ("d") + RefinedNysiisAlgorithm.compute("rt").get should equal ("d") + RefinedNysiisAlgorithm.compute("rd").get should equal ("d") + RefinedNysiisAlgorithm.compute("nt").get should equal ("d") + RefinedNysiisAlgorithm.compute("nd").get should equal ("d") + + // Core cases. + RefinedNysiisAlgorithm.compute("bevb").get should equal ("bafb") + RefinedNysiisAlgorithm.compute("bghtb").get should equal ("bgtb") + RefinedNysiisAlgorithm.compute("bdgb").get should equal ("bgb") + RefinedNysiisAlgorithm.compute("bphb").get should equal ("bfb") + RefinedNysiisAlgorithm.compute("bknb").get should equal ("bnb") + RefinedNysiisAlgorithm.compute("bshb").get should equal ("bsb") + RefinedNysiisAlgorithm.compute("bschb").get should equal ("bsb") + RefinedNysiisAlgorithm.compute("bywb").get should equal ("bab") + RefinedNysiisAlgorithm.compute("byw").get should equal ("by") + RefinedNysiisAlgorithm.compute("ywb").get should equal ("yb") + RefinedNysiisAlgorithm.compute("bwrb").get should equal ("brb") + + // Transcode cases. + RefinedNysiisAlgorithm.compute("bay").get should equal ("by") + + // Miscellaneous. + RefinedNysiisAlgorithm.compute("macdonald").get should equal ("mcdanald") + RefinedNysiisAlgorithm.compute("phone").get should equal ("fan") + RefinedNysiisAlgorithm.compute("aggregate").get should equal ("agragat") + RefinedNysiisAlgorithm.compute("accuracy").get should equal ("acaracy") + RefinedNysiisAlgorithm.compute("encyclopedia").get should equal ("encaclapad") + RefinedNysiisAlgorithm.compute("honorificabilitudinitatibus").get should equal ("hanarafacabalatadanatatab") + RefinedNysiisAlgorithm.compute("antidisestablishmentarianism").get should equal ("antadasastablasnantaranasn") + + // Dropby. + RefinedNysiisAlgorithm.compute("edwards").get should equal ("edwad") + RefinedNysiisAlgorithm.compute("parez").get should equal ("par") + RefinedNysiisAlgorithm.compute("macintosh").get should equal ("mcantas") + RefinedNysiisAlgorithm.compute("phillipson").get should equal ("falapsan") + RefinedNysiisAlgorithm.compute("haddix").get should equal ("hadac") + RefinedNysiisAlgorithm.compute("essex").get should equal ("esac") + RefinedNysiisAlgorithm.compute("moye").get should equal ("my") + RefinedNysiisAlgorithm.compute("mckee").get should equal ("mcy") + RefinedNysiisAlgorithm.compute("mackie").get should equal ("mcy") + RefinedNysiisAlgorithm.compute("heitschmidt").get should equal ("hatsnad") + RefinedNysiisAlgorithm.compute("bart").get should equal ("bad") + RefinedNysiisAlgorithm.compute("hurd").get should equal ("had") + RefinedNysiisAlgorithm.compute("hunt").get should equal ("had") + RefinedNysiisAlgorithm.compute("westerlund").get should equal ("wastarlad") + RefinedNysiisAlgorithm.compute("evers").get should equal ("evar") + RefinedNysiisAlgorithm.compute("devito").get should equal ("dafat") + RefinedNysiisAlgorithm.compute("rawson").get should equal ("rasan") + RefinedNysiisAlgorithm.compute("shoulders").get should equal ("saldar") + RefinedNysiisAlgorithm.compute("leighton").get should equal ("lagtan") + RefinedNysiisAlgorithm.compute("wooldridge").get should equal ("waldrag") + RefinedNysiisAlgorithm.compute("oliphant").get should equal ("olafad") + RefinedNysiisAlgorithm.compute("hatchett").get should equal ("hatcat") + RefinedNysiisAlgorithm.compute("mcknight").get should equal ("mcnagt") + RefinedNysiisAlgorithm.compute("rickert").get should equal ("racad") + RefinedNysiisAlgorithm.compute("bowman").get should equal ("banan") + RefinedNysiisAlgorithm.compute("vasquez").get should equal ("vasg") + RefinedNysiisAlgorithm.compute("bashaw").get should equal ("bas") + RefinedNysiisAlgorithm.compute("schoenhoeft").get should equal ("sanaft") // dropby wrongly says scanaft + RefinedNysiisAlgorithm.compute("heywood").get should equal ("had") + RefinedNysiisAlgorithm.compute("hayman").get should equal ("hanan") + RefinedNysiisAlgorithm.compute("seawright").get should equal ("saragt") + RefinedNysiisAlgorithm.compute("kratzer").get should equal ("cratsar") + RefinedNysiisAlgorithm.compute("canaday").get should equal ("canady") + RefinedNysiisAlgorithm.compute("crepeau").get should equal ("crap") + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala new file mode 100755 index 0000000..8c6d9c1 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetricSpec.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedNysiisMetricSpec extends ScalaTest { "RefinedNysiisMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + RefinedNysiisMetric.compare("", "").isDefined should be (false) + RefinedNysiisMetric.compare("abc", "").isDefined should be (false) + RefinedNysiisMetric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + RefinedNysiisMetric.compare("123", "123").isDefined should be (false) + RefinedNysiisMetric.compare("123", "").isDefined should be (false) + RefinedNysiisMetric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + RefinedNysiisMetric.compare("ham", "hum").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + RefinedNysiisMetric.compare("dumb", "gum").get should be (false) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala new file mode 100755 index 0000000..ca39da8 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexAlgorithmSpec.scala @@ -0,0 +1,160 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedSoundexAlgorithmSpec extends ScalaTest { "RefinedSoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + RefinedSoundexAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + RefinedSoundexAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + RefinedSoundexAlgorithm.compute("a").get should equal ("a0") + RefinedSoundexAlgorithm.compute("aa").get should equal ("a0") + + // b + RefinedSoundexAlgorithm.compute("b").get should equal ("b1") + RefinedSoundexAlgorithm.compute("bb").get should equal ("b1") + + // c + RefinedSoundexAlgorithm.compute("c").get should equal ("c3") + RefinedSoundexAlgorithm.compute("cc").get should equal ("c3") + + // d + RefinedSoundexAlgorithm.compute("d").get should equal ("d6") + RefinedSoundexAlgorithm.compute("dd").get should equal ("d6") + + // e + RefinedSoundexAlgorithm.compute("e").get should equal ("e0") + RefinedSoundexAlgorithm.compute("ee").get should equal ("e0") + + // f + RefinedSoundexAlgorithm.compute("f").get should equal ("f2") + RefinedSoundexAlgorithm.compute("ff").get should equal ("f2") + + // g + RefinedSoundexAlgorithm.compute("g").get should equal ("g4") + RefinedSoundexAlgorithm.compute("gg").get should equal ("g4") + + // h + RefinedSoundexAlgorithm.compute("h").get should equal ("h0") + RefinedSoundexAlgorithm.compute("hh").get should equal ("h0") + + // i + RefinedSoundexAlgorithm.compute("i").get should equal ("i0") + RefinedSoundexAlgorithm.compute("ii").get should equal ("i0") + + // j + RefinedSoundexAlgorithm.compute("j").get should equal ("j4") + RefinedSoundexAlgorithm.compute("jj").get should equal ("j4") + + // k + RefinedSoundexAlgorithm.compute("k").get should equal ("k3") + RefinedSoundexAlgorithm.compute("kk").get should equal ("k3") + + // l + RefinedSoundexAlgorithm.compute("l").get should equal ("l7") + RefinedSoundexAlgorithm.compute("ll").get should equal ("l7") + + // m + RefinedSoundexAlgorithm.compute("m").get should equal ("m8") + RefinedSoundexAlgorithm.compute("mm").get should equal ("m8") + + // n + RefinedSoundexAlgorithm.compute("n").get should equal ("n8") + RefinedSoundexAlgorithm.compute("nn").get should equal ("n8") + + // o + RefinedSoundexAlgorithm.compute("o").get should equal ("o0") + RefinedSoundexAlgorithm.compute("oo").get should equal ("o0") + + // p + RefinedSoundexAlgorithm.compute("p").get should equal ("p1") + RefinedSoundexAlgorithm.compute("pp").get should equal ("p1") + + // q + RefinedSoundexAlgorithm.compute("q").get should equal ("q5") + RefinedSoundexAlgorithm.compute("qq").get should equal ("q5") + + // r + RefinedSoundexAlgorithm.compute("r").get should equal ("r9") + RefinedSoundexAlgorithm.compute("rr").get should equal ("r9") + + // s + RefinedSoundexAlgorithm.compute("s").get should equal ("s3") + RefinedSoundexAlgorithm.compute("ss").get should equal ("s3") + + // t + RefinedSoundexAlgorithm.compute("t").get should equal ("t6") + RefinedSoundexAlgorithm.compute("tt").get should equal ("t6") + + // u + RefinedSoundexAlgorithm.compute("u").get should equal ("u0") + RefinedSoundexAlgorithm.compute("uu").get should equal ("u0") + + // v + RefinedSoundexAlgorithm.compute("v").get should equal ("v2") + RefinedSoundexAlgorithm.compute("vv").get should equal ("v2") + + // w + RefinedSoundexAlgorithm.compute("w").get should equal ("w0") + RefinedSoundexAlgorithm.compute("ww").get should equal ("w0") + + // x + RefinedSoundexAlgorithm.compute("x").get should equal ("x5") + RefinedSoundexAlgorithm.compute("xx").get should equal ("x5") + + // y + RefinedSoundexAlgorithm.compute("y").get should equal ("y0") + RefinedSoundexAlgorithm.compute("yy").get should equal ("y0") + + // z + RefinedSoundexAlgorithm.compute("z").get should equal ("z5") + RefinedSoundexAlgorithm.compute("zz").get should equal ("z5") + + // Starting with letter then numbers. + RefinedSoundexAlgorithm.compute("x123456").get should equal ("x5") + RefinedSoundexAlgorithm.compute("a123456").get should equal ("a0") + RefinedSoundexAlgorithm.compute("f123456").get should equal ("f2") + + // Miscellaneous. + RefinedSoundexAlgorithm.compute("braz").get should equal ("b1905") + RefinedSoundexAlgorithm.compute("broz").get should equal ("b1905") + RefinedSoundexAlgorithm.compute("caren").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("carren").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("coram").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("corran").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("curreen").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("curwen").get should equal ("c30908") + RefinedSoundexAlgorithm.compute("hairs").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hark").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hars").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hayers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("heers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("hiers").get should equal ("h093") + RefinedSoundexAlgorithm.compute("lambard").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambart").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lambird").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lampaert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lampart").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lamport").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("limbert").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("lombard").get should equal ("l7081096") + RefinedSoundexAlgorithm.compute("nolton").get should equal ("n807608") + RefinedSoundexAlgorithm.compute("noulton").get should equal ("n807608") + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala new file mode 100755 index 0000000..84f547a --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetricSpec.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RefinedSoundexMetricSpec extends ScalaTest { "RefinedSoundexMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + RefinedSoundexMetric.compare("", "").isDefined should be (false) + RefinedSoundexMetric.compare("abc", "").isDefined should be (false) + RefinedSoundexMetric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + RefinedSoundexMetric.compare("123", "123").isDefined should be (false) + RefinedSoundexMetric.compare("123", "").isDefined should be (false) + RefinedSoundexMetric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + RefinedSoundexMetric.compare("robert", "rupert").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + RefinedSoundexMetric.compare("robert", "rubin").get should be (false) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala new file mode 100755 index 0000000..157a24b --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithmSpec.scala @@ -0,0 +1,159 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexAlgorithmSpec extends ScalaTest { "SoundexAlgorithm" should provide { + "compute method" when passed { + "empty argument" should returns { + "None" in { + SoundexAlgorithm.compute("").isDefined should be (false) + } + } + "non-phonetic argument" should returns { + "None" in { + SoundexAlgorithm.compute("123").isDefined should be (false) + } + } + "phonetic argument" should returns { + "Some" in { + // a + SoundexAlgorithm.compute("a").get should equal ("a000") + SoundexAlgorithm.compute("aa").get should equal ("a000") + + // b + SoundexAlgorithm.compute("b").get should equal ("b000") + SoundexAlgorithm.compute("bb").get should equal ("b000") + + // c + SoundexAlgorithm.compute("c").get should equal ("c000") + SoundexAlgorithm.compute("cc").get should equal ("c000") + + // d + SoundexAlgorithm.compute("d").get should equal ("d000") + SoundexAlgorithm.compute("dd").get should equal ("d000") + + // e + SoundexAlgorithm.compute("e").get should equal ("e000") + SoundexAlgorithm.compute("ee").get should equal ("e000") + + // f + SoundexAlgorithm.compute("f").get should equal ("f000") + SoundexAlgorithm.compute("ff").get should equal ("f000") + + // g + SoundexAlgorithm.compute("g").get should equal ("g000") + SoundexAlgorithm.compute("gg").get should equal ("g000") + + // h + SoundexAlgorithm.compute("h").get should equal ("h000") + SoundexAlgorithm.compute("hh").get should equal ("h000") + + // i + SoundexAlgorithm.compute("i").get should equal ("i000") + SoundexAlgorithm.compute("ii").get should equal ("i000") + + // j + SoundexAlgorithm.compute("j").get should equal ("j000") + SoundexAlgorithm.compute("jj").get should equal ("j000") + + // k + SoundexAlgorithm.compute("k").get should equal ("k000") + SoundexAlgorithm.compute("kk").get should equal ("k000") + + // l + SoundexAlgorithm.compute("l").get should equal ("l000") + SoundexAlgorithm.compute("ll").get should equal ("l000") + + // m + SoundexAlgorithm.compute("m").get should equal ("m000") + SoundexAlgorithm.compute("mm").get should equal ("m000") + + // n + SoundexAlgorithm.compute("n").get should equal ("n000") + SoundexAlgorithm.compute("nn").get should equal ("n000") + + // o + SoundexAlgorithm.compute("o").get should equal ("o000") + SoundexAlgorithm.compute("oo").get should equal ("o000") + + // p + SoundexAlgorithm.compute("p").get should equal ("p000") + SoundexAlgorithm.compute("pp").get should equal ("p000") + + // q + SoundexAlgorithm.compute("q").get should equal ("q000") + SoundexAlgorithm.compute("qq").get should equal ("q000") + + // r + SoundexAlgorithm.compute("r").get should equal ("r000") + SoundexAlgorithm.compute("rr").get should equal ("r000") + + // s + SoundexAlgorithm.compute("s").get should equal ("s000") + SoundexAlgorithm.compute("ss").get should equal ("s000") + + // t + SoundexAlgorithm.compute("t").get should equal ("t000") + SoundexAlgorithm.compute("tt").get should equal ("t000") + + // u + SoundexAlgorithm.compute("u").get should equal ("u000") + SoundexAlgorithm.compute("uu").get should equal ("u000") + + // v + SoundexAlgorithm.compute("v").get should equal ("v000") + SoundexAlgorithm.compute("vv").get should equal ("v000") + + // w + SoundexAlgorithm.compute("w").get should equal ("w000") + SoundexAlgorithm.compute("ww").get should equal ("w000") + + // x + SoundexAlgorithm.compute("x").get should equal ("x000") + SoundexAlgorithm.compute("xx").get should equal ("x000") + + // y + SoundexAlgorithm.compute("y").get should equal ("y000") + SoundexAlgorithm.compute("yy").get should equal ("y000") + + // z + SoundexAlgorithm.compute("z").get should equal ("z000") + SoundexAlgorithm.compute("zz").get should equal ("z000") + + // Starting with letter then numbers. + SoundexAlgorithm.compute("x123456").get should equal ("x000") + SoundexAlgorithm.compute("a123456").get should equal ("a000") + SoundexAlgorithm.compute("f123456").get should equal ("f000") + + // Miscellaneous. + SoundexAlgorithm.compute("abc").get should equal ("a120") + SoundexAlgorithm.compute("xyz").get should equal ("x200") + SoundexAlgorithm.compute("robert").get should equal ("r163") + SoundexAlgorithm.compute("rupert").get should equal ("r163") + SoundexAlgorithm.compute("rubin").get should equal ("r150") + SoundexAlgorithm.compute("ashcraft").get should equal ("a261") + SoundexAlgorithm.compute("tymczak").get should equal ("t522") + SoundexAlgorithm.compute("pfister").get should equal ("p236") + SoundexAlgorithm.compute("euler").get should equal ("e460") + SoundexAlgorithm.compute("gauss").get should equal ("g200") + SoundexAlgorithm.compute("hilbert").get should equal ("h416") + SoundexAlgorithm.compute("knuth").get should equal ("k530") + SoundexAlgorithm.compute("lloyd").get should equal ("l300") + SoundexAlgorithm.compute("lukasiewicz").get should equal ("l222") + SoundexAlgorithm.compute("ashcroft").get should equal ("a261") + SoundexAlgorithm.compute("tymczak").get should equal ("t522") + SoundexAlgorithm.compute("pfister").get should equal ("p236") + SoundexAlgorithm.compute("ellery").get should equal ("e460") + SoundexAlgorithm.compute("ghosh").get should equal ("g200") + SoundexAlgorithm.compute("heilbronn").get should equal ("h416") + SoundexAlgorithm.compute("kant").get should equal ("k530") + SoundexAlgorithm.compute("ladd").get should equal ("l300") + SoundexAlgorithm.compute("lissajous").get should equal ("l222") + SoundexAlgorithm.compute("fusedale").get should equal ("f234") + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala new file mode 100755 index 0000000..b903add --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/phonetic/SoundexMetricSpec.scala @@ -0,0 +1,35 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class SoundexMetricSpec extends ScalaTest { "SoundexMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + SoundexMetric.compare("", "").isDefined should be (false) + SoundexMetric.compare("abc", "").isDefined should be (false) + SoundexMetric.compare("", "xyz").isDefined should be (false) + } + } + "non-phonetic arguments" should returns { + "None" in { + SoundexMetric.compare("123", "123").isDefined should be (false) + SoundexMetric.compare("123", "").isDefined should be (false) + SoundexMetric.compare("", "123").isDefined should be (false) + } + } + "phonetically similar arguments" should returns { + "Boolean indicating true" in { + SoundexMetric.compare("robert", "rupert").get should be (true) + } + } + "phonetically dissimilar arguments" should returns { + "Boolean indicating false" in { + SoundexMetric.compare("robert", "rubin").get should be (false) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala new file mode 100755 index 0000000..b7a3b58 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetricSpec.scala @@ -0,0 +1,60 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class DiceSorensenMetricSpec extends ScalaTest { "DiceSorensenMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + DiceSorensenMetric(1).compare("", "").isDefined should be (false) + DiceSorensenMetric(1).compare("abc", "").isDefined should be (false) + DiceSorensenMetric(1).compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + DiceSorensenMetric(1).compare("abc", "abc").get should be (1) + DiceSorensenMetric(2).compare("abc", "abc").get should be (1) + DiceSorensenMetric(2).compare("abc", "abc").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + DiceSorensenMetric(1).compare("abc", "xyz").get should be (0) + DiceSorensenMetric(2).compare("abc", "xyz").get should be (0) + DiceSorensenMetric(3).compare("abc", "xyz").get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + DiceSorensenMetric(2).compare("n", "naght").isDefined should be (false) + DiceSorensenMetric(2).compare("night", "n").isDefined should be (false) + DiceSorensenMetric(3).compare("ni", "naght").isDefined should be (false) + DiceSorensenMetric(3).compare("night", "na").isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + DiceSorensenMetric(1).compare("night", "nacht").get should be (0.6) + DiceSorensenMetric(1).compare("night", "naght").get should be (0.8) + DiceSorensenMetric(1).compare("context", "contact").get should be (0.7142857142857143) + + DiceSorensenMetric(2).compare("night", "nacht").get should be (0.25) + DiceSorensenMetric(2).compare("night", "naght").get should be (0.5) + DiceSorensenMetric(2).compare("context", "contact").get should be (0.5) + DiceSorensenMetric(2).compare("contextcontext", "contact").get should be (0.3157894736842105) + DiceSorensenMetric(2).compare("context", "contactcontact").get should be (0.3157894736842105) + DiceSorensenMetric(2).compare("ht", "nacht").get should be (0.4) + DiceSorensenMetric(2).compare("xp", "nacht").get should be (0) + DiceSorensenMetric(2).compare("ht", "hththt").get should be (0.3333333333333333) + + DiceSorensenMetric(3).compare("night", "nacht").get should be (0) + DiceSorensenMetric(3).compare("night", "naght").get should be (0.3333333333333333) + DiceSorensenMetric(3).compare("context", "contact").get should be (0.4) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala new file mode 100755 index 0000000..abb56db --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/HammingMetricSpec.scala @@ -0,0 +1,37 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class HammingMetricSpec extends ScalaTest { "HammingMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + HammingMetric.compare("", "").isDefined should be (false) + HammingMetric.compare("abc", "").isDefined should be (false) + HammingMetric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + HammingMetric.compare("abc", "abc").get should be (0) + HammingMetric.compare("123", "123").get should be (0) + } + } + "unequal arguments" should returns { + "Int indicating distance" in { + HammingMetric.compare("abc", "xyz").get should be (3) + HammingMetric.compare("123", "456").get should be (3) + } + } + "valid arguments" should returns { + "Int indicating distance" in { + HammingMetric.compare("toned", "roses").get should be (3) + HammingMetric.compare("1011101", "1001001").get should be (2) + HammingMetric.compare("2173896", "2233796").get should be (3) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala new file mode 100755 index 0000000..4c04193 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaccardMetricSpec.scala @@ -0,0 +1,60 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaccardMetricSpec extends ScalaTest { "JaccardMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + JaccardMetric(1).compare("", "").isDefined should be (false) + JaccardMetric(1).compare("abc", "").isDefined should be (false) + JaccardMetric(1).compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + JaccardMetric(1).compare("abc", "abc").get should be (1) + JaccardMetric(2).compare("abc", "abc").get should be (1) + JaccardMetric(3).compare("abc", "abc").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + JaccardMetric(1).compare("abc", "xyz").get should be (0) + JaccardMetric(2).compare("abc", "xyz").get should be (0) + JaccardMetric(3).compare("abc", "xyz").get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + JaccardMetric(2).compare("n", "naght").isDefined should be (false) + JaccardMetric(2).compare("night", "n").isDefined should be (false) + JaccardMetric(3).compare("ni", "naght").isDefined should be (false) + JaccardMetric(3).compare("night", "na").isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + JaccardMetric(1).compare("night", "nacht").get should be (0.42857142857142855) + JaccardMetric(1).compare("night", "naght").get should be (0.6666666666666666) + JaccardMetric(1).compare("context", "contact").get should be (0.5555555555555556) + + JaccardMetric(2).compare("night", "nacht").get should be (0.14285714285714285) + JaccardMetric(2).compare("night", "naght").get should be (0.3333333333333333) + JaccardMetric(2).compare("context", "contact").get should be (0.3333333333333333) + JaccardMetric(2).compare("contextcontext", "contact").get should be (0.1875) + JaccardMetric(2).compare("context", "contactcontact").get should be (0.1875) + JaccardMetric(2).compare("ht", "nacht").get should be (0.25) + JaccardMetric(2).compare("xp", "nacht").get should be (0) + JaccardMetric(2).compare("ht", "hththt").get should be (0.2) + + JaccardMetric(3).compare("night", "nacht").get should be (0) + JaccardMetric(3).compare("night", "naght").get should be (0.2) + JaccardMetric(3).compare("context", "contact").get should be (0.25) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala new file mode 100755 index 0000000..26ae38e --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroMetricSpec.scala @@ -0,0 +1,51 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaroMetricSpec extends ScalaTest { "JaroMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + JaroMetric.compare("", "").isDefined should be (false) + JaroMetric.compare("abc", "").isDefined should be (false) + JaroMetric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + JaroMetric.compare("a", "a").get should be (1) + JaroMetric.compare("abc", "abc").get should be (1) + JaroMetric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + JaroMetric.compare("abc", "xyz").get should be (0) + JaroMetric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + JaroMetric.compare("aa", "a").get should be (0.8333333333333334) + JaroMetric.compare("a", "aa").get should be (0.8333333333333334) + JaroMetric.compare("veryveryverylong", "v").get should be (0.6875) + JaroMetric.compare("v", "veryveryverylong").get should be (0.6875) + JaroMetric.compare("martha", "marhta").get should be (0.9444444444444445) + JaroMetric.compare("dwayne", "duane").get should be (0.8222222222222223) + JaroMetric.compare("dixon", "dicksonx").get should be (0.7666666666666666) + JaroMetric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) + JaroMetric.compare("jones", "johnson").get should be (0.7904761904761904) + JaroMetric.compare("henka", "henkan").get should be (0.9444444444444445) + JaroMetric.compare("fvie", "ten").get should be (0) + + JaroMetric.compare("zac ephron", "zac efron").get should be > + JaroMetric.compare("zac ephron", "kai ephron").get + JaroMetric.compare("brittney spears", "britney spears").get should be > + JaroMetric.compare("brittney spears", "brittney startzman").get + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala new file mode 100755 index 0000000..d645456 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetricSpec.scala @@ -0,0 +1,51 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaroWinklerMetricSpec extends ScalaTest { "JaroWinklerMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + JaroWinklerMetric.compare("", "").isDefined should be (false) + JaroWinklerMetric.compare("abc", "").isDefined should be (false) + JaroWinklerMetric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + JaroWinklerMetric.compare("a", "a").get should be (1) + JaroWinklerMetric.compare("abc", "abc").get should be (1) + JaroWinklerMetric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + JaroWinklerMetric.compare("abc", "xyz").get should be (0) + JaroWinklerMetric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + JaroWinklerMetric.compare("aa", "a").get should be (0.8500000000000001) + JaroWinklerMetric.compare("a", "aa").get should be (0.8500000000000001) + JaroWinklerMetric.compare("veryveryverylong", "v").get should be (0.71875) + JaroWinklerMetric.compare("v", "veryveryverylong").get should be (0.71875) + JaroWinklerMetric.compare("martha", "marhta").get should be (0.9611111111111111) + JaroWinklerMetric.compare("dwayne", "duane").get should be (0.8400000000000001) + JaroWinklerMetric.compare("dixon", "dicksonx").get should be (0.8133333333333332) + JaroWinklerMetric.compare("abcvwxyz", "cabvwxyz").get should be (0.9583333333333334) + JaroWinklerMetric.compare("jones", "johnson").get should be (0.8323809523809523) + JaroWinklerMetric.compare("henka", "henkan").get should be (0.9666666666666667) + JaroWinklerMetric.compare("fvie", "ten").get should be (0) + + JaroWinklerMetric.compare("zac ephron", "zac efron").get should be > + JaroWinklerMetric.compare("zac ephron", "kai ephron").get + JaroWinklerMetric.compare("brittney spears", "britney spears").get should be > + JaroWinklerMetric.compare("brittney spears", "brittney startzman").get + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala new file mode 100755 index 0000000..8e1538b --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/LevenshteinMetricSpec.scala @@ -0,0 +1,50 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class LevenshteinMetricSpec extends ScalaTest { "LevenshteinMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + LevenshteinMetric.compare("", "").isDefined should be (false) + LevenshteinMetric.compare("abc", "").isDefined should be (false) + LevenshteinMetric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + LevenshteinMetric.compare("abc", "abc").get should be (0) + LevenshteinMetric.compare("123", "123").get should be (0) + } + } + "unequal arguments" should returns { + "Int indicating distance" in { + LevenshteinMetric.compare("abc", "xyz").get should be (3) + LevenshteinMetric.compare("123", "456").get should be (3) + } + } + "valid arguments" should returns { + "Int indicating distance" in { + LevenshteinMetric.compare("abc", "a").get should be (2) + LevenshteinMetric.compare("a", "abc").get should be (2) + LevenshteinMetric.compare("abc", "c").get should be (2) + LevenshteinMetric.compare("c", "abc").get should be (2) + LevenshteinMetric.compare("sitting", "kitten").get should be (3) + LevenshteinMetric.compare("kitten", "sitting").get should be (3) + LevenshteinMetric.compare("cake", "drake").get should be (2) + LevenshteinMetric.compare("drake", "cake").get should be (2) + LevenshteinMetric.compare("saturday", "sunday").get should be (3) + LevenshteinMetric.compare("sunday", "saturday").get should be (3) + LevenshteinMetric.compare("book", "back").get should be (2) + LevenshteinMetric.compare("dog", "fog").get should be (1) + LevenshteinMetric.compare("foq", "fog").get should be (1) + LevenshteinMetric.compare("fvg", "fog").get should be (1) + LevenshteinMetric.compare("encyclopedia", "encyclopediaz").get should be (1) + LevenshteinMetric.compare("encyclopediz", "encyclopediaz").get should be (1) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala new file mode 100755 index 0000000..5e1abb8 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/NGramMetricSpec.scala @@ -0,0 +1,60 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NGramMetricSpec extends ScalaTest { "NGramMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + NGramMetric(1).compare("", "").isDefined should be (false) + NGramMetric(1).compare("abc", "").isDefined should be (false) + NGramMetric(1).compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + NGramMetric(1).compare("abc", "abc").get should be (1) + NGramMetric(2).compare("abc", "abc").get should be (1) + NGramMetric(3).compare("abc", "abc").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + NGramMetric(1).compare("abc", "xyz").get should be (0) + NGramMetric(2).compare("abc", "xyz").get should be (0) + NGramMetric(3).compare("abc", "xyz").get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + NGramMetric(2).compare("n", "naght").isDefined should be (false) + NGramMetric(2).compare("night", "n").isDefined should be (false) + NGramMetric(3).compare("ni", "naght").isDefined should be (false) + NGramMetric(3).compare("night", "na").isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + NGramMetric(1).compare("night", "nacht").get should be (0.6) + NGramMetric(1).compare("night", "naght").get should be (0.8) + NGramMetric(1).compare("context", "contact").get should be (0.7142857142857143) + + NGramMetric(2).compare("night", "nacht").get should be (0.25) + NGramMetric(2).compare("night", "naght").get should be (0.5) + NGramMetric(2).compare("context", "contact").get should be (0.5) + NGramMetric(2).compare("contextcontext", "contact").get should be (0.23076923076923078) + NGramMetric(2).compare("context", "contactcontact").get should be (0.23076923076923078) + NGramMetric(2).compare("ht", "nacht").get should be (0.25) + NGramMetric(2).compare("xp", "nacht").get should be (0) + NGramMetric(2).compare("ht", "hththt").get should be (0.2) + + NGramMetric(3).compare("night", "nacht").get should be (0) + NGramMetric(3).compare("night", "naght").get should be (0.3333333333333333) + NGramMetric(3).compare("context", "contact").get should be (0.4) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala new file mode 100755 index 0000000..ec1e7b2 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/OverlapMetricSpec.scala @@ -0,0 +1,62 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class OverlapMetricSpec extends ScalaTest { "OverlapMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + OverlapMetric(1).compare("", "").isDefined should be (false) + OverlapMetric(1).compare("abc", "").isDefined should be (false) + OverlapMetric(1).compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "1" in { + OverlapMetric(1).compare("abc", "abc").get should be (1) + OverlapMetric(2).compare("abc", "abc").get should be (1) + OverlapMetric(3).compare("abc", "abc").get should be (1) + } + } + "unequal arguments" should returns { + "0" in { + OverlapMetric(1).compare("abc", "xyz").get should be (0) + OverlapMetric(2).compare("abc", "xyz").get should be (0) + OverlapMetric(3).compare("abc", "xyz").get should be (0) + } + } + "invalid arguments" should returns { + "None" in { + OverlapMetric(2).compare("n", "naght").isDefined should be (false) + OverlapMetric(2).compare("night", "n").isDefined should be (false) + OverlapMetric(3).compare("ni", "naght").isDefined should be (false) + OverlapMetric(3).compare("night", "na").isDefined should be (false) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + OverlapMetric(1).compare("bob", "bobman").get should be (1) + OverlapMetric(1).compare("bob", "manbobman").get should be (1) + OverlapMetric(1).compare("night", "nacht").get should be (0.6) + OverlapMetric(1).compare("night", "naght").get should be (0.8) + OverlapMetric(1).compare("context", "contact").get should be (0.7142857142857143) + + OverlapMetric(2).compare("night", "nacht").get should be (0.25) + OverlapMetric(2).compare("night", "naght").get should be (0.5) + OverlapMetric(2).compare("context", "contact").get should be (0.5) + OverlapMetric(2).compare("contextcontext", "contact").get should be (0.5) + OverlapMetric(2).compare("context", "contactcontact").get should be (0.5) + OverlapMetric(2).compare("ht", "nacht").get should be (1) + OverlapMetric(2).compare("xp", "nacht").get should be (0) + OverlapMetric(2).compare("ht", "hththt").get should be (1) + + OverlapMetric(3).compare("night", "nacht").get should be (0) + OverlapMetric(3).compare("night", "naght").get should be (0.3333333333333333) + OverlapMetric(3).compare("context", "contact").get should be (0.4) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala new file mode 100755 index 0000000..30bbf49 --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala @@ -0,0 +1,40 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class RatcliffObershelpMetricSpec extends ScalaTest { "RatcliffObershelpMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + RatcliffObershelpMetric.compare("", "").isDefined should be (false) + RatcliffObershelpMetric.compare("abc", "").isDefined should be (false) + RatcliffObershelpMetric.compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + RatcliffObershelpMetric.compare("abc", "abc").get should be (1) + RatcliffObershelpMetric.compare("123", "123").get should be (1) + } + } + "unequal arguments" should returns { + "Double indicating distance" in { + RatcliffObershelpMetric.compare("abc", "xyz").get should be (0) + RatcliffObershelpMetric.compare("123", "456").get should be (0) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + RatcliffObershelpMetric.compare("aleksander", "alexandre").get should be (0.7368421052631579) + RatcliffObershelpMetric.compare("alexandre", "aleksander").get should be (0.7368421052631579) + RatcliffObershelpMetric.compare("pennsylvania", "pencilvaneya").get should be (0.6666666666666666) + RatcliffObershelpMetric.compare("pencilvaneya", "pennsylvania").get should be (0.6666666666666666) + RatcliffObershelpMetric.compare("abcefglmn", "abefglmo").get should be (0.8235294117647058) + RatcliffObershelpMetric.compare("abefglmo", "abcefglmn").get should be (0.8235294117647058) + } + } + } +}} diff --git a/core/src/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala new file mode 100755 index 0000000..070458a --- /dev/null +++ b/core/src/test/scala/com/rockymadden/stringmetric/similarity/WeightedLevenshteinMetricSpec.scala @@ -0,0 +1,48 @@ +package com.rockymadden.stringmetric.similarity + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class WeightedLevenshteinMetricSpec extends ScalaTest { "WeightedLevenshteinMetric" should provide { + "compare method" when passed { + "empty arguments" should returns { + "None" in { + WeightedLevenshteinMetric(10, 0.1, 1).compare("", "").isDefined should be (false) + WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "").isDefined should be (false) + WeightedLevenshteinMetric(10, 0.1, 1).compare("", "xyz").isDefined should be (false) + } + } + "equal arguments" should returns { + "0" in { + WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "abc").get should be (0) + WeightedLevenshteinMetric(10, 0.1, 1).compare("123", "123").get should be (0) + } + } + "unequal arguments" should returns { + "Double indicating distance" in { + WeightedLevenshteinMetric(10, 0.1, 1).compare("abc", "xyz").get should be (3) + WeightedLevenshteinMetric(10, 0.1, 1).compare("123", "456").get should be (3) + } + } + "valid arguments" should returns { + "Double indicating distance" in { + WeightedLevenshteinMetric(10, 0.1, 1).compare("az", "z").get should be (10) + WeightedLevenshteinMetric(10, 0.1, 1).compare("z", "az").get should be (0.1) + WeightedLevenshteinMetric(10, 0.1, 1).compare("a", "z").get should be (1) + WeightedLevenshteinMetric(10, 0.1, 1).compare("z", "a").get should be (1) + WeightedLevenshteinMetric(10, 0.1, 1).compare("ab", "yz").get should be (2) + WeightedLevenshteinMetric(10, 0.1, 1).compare("yz", "ab").get should be (2) + WeightedLevenshteinMetric(10, 0.1, 1).compare("0", "0123456789").get should be (0.9) + WeightedLevenshteinMetric(10, 0.1, 1).compare("0123456789", "0").get should be (90) + WeightedLevenshteinMetric(10, 0.1, 1).compare("book", "back").get should be (2) + WeightedLevenshteinMetric(10, 0.1, 1).compare("back", "book").get should be (2) + WeightedLevenshteinMetric(10, 0.1, 1).compare("hosp", "hospital").get should be (0.4) + WeightedLevenshteinMetric(10, 0.1, 1).compare("hospital", "hosp").get should be (40) + WeightedLevenshteinMetric(10, 0.1, 1).compare("clmbs blvd", "columbus boulevard").get should be (0.8) + WeightedLevenshteinMetric(10, 0.1, 1).compare("columbus boulevard", "clmbs blvd").get should be (80) + } + } + } +}} diff --git a/deploy.gradle b/deploy.gradle deleted file mode 100755 index e298309..0000000 --- a/deploy.gradle +++ /dev/null @@ -1,68 +0,0 @@ -apply plugin: 'maven' -apply plugin: 'scala' -apply plugin: 'signing' - -def isMavenDeployable = hasProperty('mavenRepositoryUrl') && - hasProperty('mavenRepositoryUsername') && - hasProperty('mavenRepositoryPassword') - -if (isMavenDeployable) { - signing { sign configurations.archives } - - task scaladocJar(type: Jar, dependsOn: scaladoc) { - classifier = 'javadoc' - from "${project.buildDir}/docs/scaladoc" - } - - task sourceJar(type: Jar, dependsOn: classes) { - classifier = 'sources' - from sourceSets.main.allSource - } - - artifacts { - archives jar - archives scaladocJar - archives sourceJar - } - - uploadArchives { - repositories { - mavenDeployer { - beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } - - repository(url: mavenRepositoryUrl) { - authentication(userName: mavenRepositoryUsername, password: mavenRepositoryPassword) - } - - pom.project { - description "${parent.project.description}" - groupId "${parent.project.group}" - name "${project.name}" - packaging 'jar' - url "${parent.project.url}" - version "${parent.project.version}" - - developers { - developer { - id 'rockymadden' - name 'Rocky Madden' - } - } - - licenses { - license { - name 'MIT' - url 'http://opensource.org/licenses/MIT' - distribution 'repo' - } - } - - scm { - url "${parent.project.scm}" - connection "${parent.project.scm}" - } - } - } - } - } -} diff --git a/gradle.properties b/gradle.properties deleted file mode 100755 index 6aa5ef5..0000000 --- a/gradle.properties +++ /dev/null @@ -1,2 +0,0 @@ -org.gradle.daemon=true -org.gradle.jvmargs=-XX:MaxPermSize=1024m -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC -XX:+HeapDumpOnOutOfMemoryError -Xmx2048m \ No newline at end of file diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..37b489c --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=0.13.1 diff --git a/project/build.scala b/project/build.scala new file mode 100644 index 0000000..06f9040 --- /dev/null +++ b/project/build.scala @@ -0,0 +1,41 @@ +import sbt._ +import Keys._ + +object Common { + def name = "stringmetric" + def organization = "com.rockymadden.stringmetric" + def scalaVersion = "2.10.3" + def version = "0.26.1" +} + +object CoreBuild extends Build { + lazy val root = Project(Common.name, file(".")).aggregate(core, cli) + + lazy val core: Project = Project("core", file("core"), + settings = Defaults.defaultSettings ++ Seq( + organization := Common.organization, + name := Common.name + "-core", + version := Common.version, + scalaVersion := Common.scalaVersion, + resolvers ++= Seq(DefaultMavenRepository), + libraryDependencies ++= Seq( + "junit" % "junit" % "4.11" % "test", + "org.scalatest" %% "scalatest" % "2.0.M5b" % "test" + ) + ) + ) + + lazy val cli: Project = Project("cli", file("cli"), + settings = Defaults.defaultSettings ++ Seq( + organization := Common.organization, + name := Common.name + "-cli", + version := Common.version, + scalaVersion := Common.scalaVersion, + resolvers ++= Seq(DefaultMavenRepository), + libraryDependencies ++= Seq( + "junit" % "junit" % "4.11" % "test", + "org.scalatest" %% "scalatest" % "2.0.M5b" % "test" + ) + ) + ).dependsOn(core) +} diff --git a/settings.gradle b/settings.gradle deleted file mode 100755 index 31c6743..0000000 --- a/settings.gradle +++ /dev/null @@ -1,6 +0,0 @@ -include 'cli' -include 'core' - -rootProject.name = 'stringmetric' -project(':cli').name = 'stringmetric-cli' -project(':core').name = 'stringmetric-core' -- cgit v1.2.3