From beb0ef90024ab651267b9fd3c708e2cf27743c60 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Sat, 9 Feb 2013 22:13:05 -0700 Subject: Refactored traits so that metrics and algorithms can be decorated with additional functionality. --- readme.md | 111 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 55 insertions(+), 56 deletions(-) (limited to 'readme.md') diff --git a/readme.md b/readme.md index f96eb88..2ff6088 100755 --- a/readme.md +++ b/readme.md @@ -23,13 +23,16 @@ Available on the [Maven Central Repository](http://search.maven.org/#search%7Cga * __artifactId__: stringmetric-core * __artifactId__: stringmetric-cli +## Scaladoc +[Scaladoc](http://rockymadden.com/stringmetric/scaladoc/) is available on the project website. + ## Similarity package Useful for approximate string matching and measurement of string distance. Most metrics calculate the similarity of two strings as a double with a value between 0 and 1. A value of 0 being completely different and a value of 1 being completely similar. __Dice / Sorensen Metric:__ ```scala -println(DiceSorensenMetric.compare("night", "nacht")) -println(DiceSorensenMetric.compare("context", "contact")) +println(DiceSorensenMetric().compare("night", "nacht")) +println(DiceSorensenMetric().compare("context", "contact")) ``` Output: @@ -40,8 +43,8 @@ Output: __Hamming Metric:__ ```scala -println(HammingMetric.compare("toned", "roses")) -println(HammingMetric.compare("1011101", "1001001")) +println(HammingMetric().compare("toned", "roses")) +println(HammingMetric().compare("1011101", "1001001")) ``` Output: _(Note the exception of integers, rather than doubles, being returned.)_ @@ -52,9 +55,9 @@ Output: _(Note the exception of integers, rather than doubles, being returned.)_ __Jaro Metric:__ ```scala -println(JaroMetric.compare("dwayne", "duane")) -println(JaroMetric.compare("jones", "johnson")) -println(JaroMetric.compare("fvie", "ten")) +println(JaroMetric().compare("dwayne", "duane")) +println(JaroMetric().compare("jones", "johnson")) +println(JaroMetric().compare("fvie", "ten")) ``` Output: @@ -66,9 +69,9 @@ Output: __Jaro-Winkler Metric:__ ```scala -println(JaroWinklerMetric.compare("dwayne", "duane")) -println(JaroWinklerMetric.compare("jones", "johnson")) -println(JaroWinklerMetric.compare("fvie", "ten")) +println(JaroWinklerMetric().compare("dwayne", "duane")) +println(JaroWinklerMetric().compare("jones", "johnson")) +println(JaroWinklerMetric().compare("fvie", "ten")) ``` Output: @@ -80,8 +83,8 @@ Output: __Levenshtein Metric:__ ```scala -println(LevenshteinMetric.compare("sitting", "kitten")) -println(LevenshteinMetric.compare("cake", "drake")) +println(LevenshteinMetric().compare("sitting", "kitten")) +println(LevenshteinMetric().compare("cake", "drake")) ``` Output: _(Note the exception of integers, rather than doubles, being returned.)_ @@ -92,9 +95,9 @@ Output: _(Note the exception of integers, rather than doubles, being returned.)_ __N-Gram Metric:__ _(Note you must specify the size of the n-gram you wish to use. This can be done implicitly.)_ ```scala -println(NGramMetric.compare("night", "nacht")(1)) -println(NGramMetric.compare("night", "nacht")(2)) -println(NGramMetric.compare("context", "contact")(2)) +println(NGramMetric().compare("night", "nacht")(1)) +println(NGramMetric().compare("night", "nacht")(2)) +println(NGramMetric().compare("context", "contact")(2)) ``` Output: @@ -106,9 +109,9 @@ Output: __N-Gram Algorithm:__ _(Note you must specify the size of the n-gram you wish to use. This can be done implicitly.)_ ```scala -println(NGramAlgorithm.compute("abcdefghijklmnopqrstuvwxyz")(1)) -println(NGramAlgorithm.compute("abcdefghijklmnopqrstuvwxyz")(2)) -println(NGramAlgorithm.compute("abcdefghijklmnopqrstuvwxyz")(3)) +println(NGramAlgorithm().compute("abcdefghijklmnopqrstuvwxyz")(1)) +println(NGramAlgorithm().compute("abcdefghijklmnopqrstuvwxyz")(2)) +println(NGramAlgorithm().compute("abcdefghijklmnopqrstuvwxyz")(3)) ``` Output: @@ -120,8 +123,8 @@ Array("abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm __Ratcliff/Obershelp Metric:__ ```scala -println(RatcliffObershelpMetric.compare("aleksander", "alexandre")) -println(RatcliffObershelpMetric.compare("pennsylvania", "pencilvaneya")) +println(RatcliffObershelpMetric().compare("aleksander", "alexandre")) +println(RatcliffObershelpMetric().compare("pennsylvania", "pencilvaneya")) ``` Output: @@ -132,9 +135,9 @@ Output: __Weighted Levenshtein Metric:__ _(Note you must specify the weight of each operation. Delete, insert, and then substitute. This can be done implicitly.)_ ```scala -println(WeightedLevenshteinMetric.compare("book", "back")(10, 0.1, 1)) -println(WeightedLevenshteinMetric.compare("hosp", "hospital")(10, 0.1, 1)) -println(WeightedLevenshteinMetric.compare("hospital", "hosp")(10, 0.1, 1)) +println(WeightedLevenshteinMetric().compare("book", "back")(10, 0.1, 1)) +println(WeightedLevenshteinMetric().compare("hosp", "hospital")(10, 0.1, 1)) +println(WeightedLevenshteinMetric().compare("hospital", "hosp")(10, 0.1, 1)) ``` Output: _(Note that while a double is returned, it can be outside the range of 0 to 1, based upon the weights used.)_ @@ -145,12 +148,12 @@ Output: _(Note that while a double is returned, it can be outside the range of 0 ``` ## Phonetic package -Useful for indexing by word pronunciation and performing sounds-like comparisons. All metrics return a boolean value indicating if the two strings sound the same, per the algorithm used. All metrics have a algorithm counterpart which provide the means to perform indexing by word pronunciation. +Useful for indexing by word pronunciation and performing sounds-like comparisons. All metrics return a boolean value indicating if the two strings sound the same, per the algorithm used. All metrics have an algorithm counterpart which provide the means to perform indexing by word pronunciation. __Metaphone Metric:__ ```scala -println(MetaphoneMetric.compare("merci", "mercy")) -println(MetaphoneMetric.compare("dumb", "gum")) +println(MetaphoneMetric().compare("merci", "mercy")) +println(MetaphoneMetric().compare("dumb", "gum")) ``` Output: @@ -161,8 +164,8 @@ false __Metaphone Algorithm:__ ```scala -println(MetaphoneAlgorithm.compute("dumb")) -println(MetaphoneAlgorithm.compute("knuth")) +println(MetaphoneAlgorithm().compute("dumb")) +println(MetaphoneAlgorithm().compute("knuth")) ``` Output: @@ -173,8 +176,8 @@ n0 __NYSIIS Metric:__ ```scala -println(NysiisMetric.compare("ham", "hum")) -println(NysiisMetric.compare("dumb", "gum")) +println(NysiisMetric().compare("ham", "hum")) +println(NysiisMetric().compare("dumb", "gum")) ``` Output: @@ -185,8 +188,8 @@ false __NYSIIS Algorithm:__ ```scala -println(NysiisAlgorithm.compute("macintosh")) -println(NysiisAlgorithm.compute("knuth")) +println(NysiisAlgorithm().compute("macintosh")) +println(NysiisAlgorithm().compute("knuth")) ``` Output: @@ -197,8 +200,8 @@ nnat __Refined NYSIIS Metric:__ ```scala -println(RefinedNysiisMetric.compare("ham", "hum")) -println(RefinedNysiisMetric.compare("dumb", "gum")) +println(RefinedNysiisMetric().compare("ham", "hum")) +println(RefinedNysiisMetric().compare("dumb", "gum")) ``` Output: @@ -209,8 +212,8 @@ false __Refined NYSIIS Algorithm:__ ```scala -println(RefinedNysiisAlgorithm.compute("macintosh")) -println(RefinedNysiisAlgorithm.compute("westerlund")) +println(RefinedNysiisAlgorithm().compute("macintosh")) +println(RefinedNysiisAlgorithm().compute("westerlund")) ``` Output: @@ -221,8 +224,8 @@ wastarlad __Refined Soundex Metric:__ ```scala -println(RefinedSoundexMetric.compare("robert", "rupert")) -println(RefinedSoundexMetric.compare("robert", "rubin")) +println(RefinedSoundexMetric().compare("robert", "rupert")) +println(RefinedSoundexMetric().compare("robert", "rubin")) ``` Output: @@ -233,8 +236,8 @@ false __Refined Soundex Algorithm:__ ```scala -println(RefinedSoundexAlgorithm.compute("hairs")) -println(RefinedSoundexAlgorithm.compute("lambert")) +println(RefinedSoundexAlgorithm().compute("hairs")) +println(RefinedSoundexAlgorithm().compute("lambert")) ``` Output: @@ -245,8 +248,8 @@ l7081096 __Soundex Metric:__ ```scala -println(SoundexMetric.compare("robert", "rupert")) -println(SoundexMetric.compare("robert", "rubin")) +println(SoundexMetric().compare("robert", "rupert")) +println(SoundexMetric().compare("robert", "rubin")) ``` Output: @@ -257,8 +260,8 @@ false __Soundex Algorithm:__ ```scala -println(SoundexAlgorithm.compute("rupert")) -println(SoundexAlgorithm.compute("lukasiewicz")) +println(SoundexAlgorithm().compute("rupert")) +println(SoundexAlgorithm().compute("lukasiewicz")) ``` Output: @@ -267,30 +270,29 @@ r163 l222 ``` -## Filter package -Useful for filtering strings prior to evaluation (e.g. ignore case, ignore non-alpha, ignore spaces). Filters can be used implicitly. +## Decorating +It is possible to decorate algorithms and metrics with additional functionality. The most common decorations are filters, which are useful for filtering strings prior to evaluation (e.g. ignore case, ignore non-alpha, ignore spaces). Basic example with no filtering: ```scala -JaroWinklerMetric.compare("string1", "string2") +JaroWinklerMetric().compare("string1", "string2") ``` Basic example with single filter: ```scala -JaroWinklerMetric.compare("string1", "string2")(new StringFilterDelegate with AsciiLetterCaseStringFilter) +(new JaroWinklerMetric with AsciiLetterCaseStringFilter).compare("string1", "string2") ``` Basic example with stacked filter. Filters are applied in reverse order: ```scala -JaroWinklerMetric.compare("string1", "string2")(new StringFilterDelegate with AsciiLetterCaseStringFilter with AsciiLetterOnlyStringFilter) +(new JaroWinklerMetric with AsciiLetterCaseStringFilter with AsciiLetterOnlyStringFilter).compare("string1", "string2") ``` ## Convenience objects -The StringMetric, StringAlgorithm, and StringFilter convenience objects are available to make interactions with the library easier: +The StringMetricLike, StringAlgorithmLike, and StringFilterLike standalone convenience objects are available to make interactions with the library easier: ```scala -StringMetric.compareWithJaroWinkler("string1", "string2") -StringMetric.compareWithJaroWinkler("string1", "string2")(StringFilter.asciiLetterCase) -StringAlgorithm.computeWithMetaphone("string1", "string2") +StringMetricLike.compareWithJaroWinkler("string1", "string2") +StringAlgorithmLike.computeWithMetaphone("string1", "string2") ``` ## Command line interfaces @@ -356,9 +358,6 @@ tk ## License [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0) -## Documentation -[Scaladoc](http://rockymadden.com/stringmetric/scaladoc/) is available on the project website. Further documentation may also be found via the [GitHub wiki](https://github.com/rockymadden/stringmetric/wiki). - ## Bugs and Issues Please submit bugs and issues via [GitHub issues](https://github.com/rockymadden/stringmetric/issues). -- cgit v1.2.3