diff options
author | Rocky Madden <git@rockymadden.com> | 2012-10-06 23:19:20 -0600 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-10-06 23:19:20 -0600 |
commit | dadd1221ec7c1301b3cc2dfc178dba2091e1f9b8 (patch) | |
tree | ce698016721cdc2636d66742d705b232ad1c9fe9 | |
download | stringmetric-dadd1221ec7c1301b3cc2dfc178dba2091e1f9b8.tar.gz stringmetric-dadd1221ec7c1301b3cc2dfc178dba2091e1f9b8.tar.bz2 stringmetric-dadd1221ec7c1301b3cc2dfc178dba2091e1f9b8.zip |
Created repository.v0.0.0
18 files changed, 723 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..7f0a79e --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +# Core ignores +*.diff +*.err +*.orig +*.log +*.rej +*.swo +*.swp +*.vi +*~ +*# +.git/ + +# OS ignores +.DS_Store +thumbs.db + +# Editor ignores +.cache +.project +.settings +.settings/ +bin/ + +# Build ignores +build/ + +# Project ignores +.classpath
\ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100755 index 0000000..3275a88 --- /dev/null +++ b/build.gradle @@ -0,0 +1,8 @@ +group = 'org.hashtree.stringmetric' +version = '0.0.0' + +allprojects { + repositories { + mavenCentral() + } +}
\ No newline at end of file diff --git a/cli/build.gradle b/cli/build.gradle new file mode 100755 index 0000000..6cd8aea --- /dev/null +++ b/cli/build.gradle @@ -0,0 +1,106 @@ +apply plugin: 'eclipse' +apply plugin: 'scala' + +dependencies { + compile project(':stringmetric-core') + compile 'org.scala-lang:scala-compiler:2.9.2' + compile 'org.scala-lang:scala-library:2.9.2' + + scalaTools 'org.scala-lang:scala-compiler:2.9.2' + scalaTools 'org.scala-lang:scala-library:2.9.2' + + testCompile 'junit:junit:4.10' + testCompile 'org.scalatest:scalatest_2.9.2:1.8' +} + +sourceSets { + main { + output.resourcesDir "${project.buildDir}/classes/main" + + java { + srcDir 'source/core/java' + } + resources { + srcDir 'source/core/resource' + } + scala { + srcDir 'source/core/scala' + } + } + test { + output.resourcesDir "${project.buildDir}/classes/test" + + java { + srcDir 'source/test/java' + } + resources { + srcDir 'source/test/resource' + } + scala { + srcDir 'source/test/scala' + } + } +} + +task tar(description: 'Assembles a compressed tar archive of all core source files.', dependsOn: [':stringmetric-cli:jar', ':stringmetric-core:jar']) { + ext.sourcePath = "${project.projectDir}/source/core/scala" + ext.outputPath = "${project.buildDir}" + ext.workingPath = "${project.buildDir}/${project.name}" + + inputs.dir new File(sourcePath) + outputs.dir new File(outputPath, 'generated') + outputs.upToDateWhen { + new File(workingPath).isDirectory() + } + + doLast { + // Clean up working directory and tar from last execution, should they exist. + ant.delete(dir: workingPath, failOnError: false) + ant.delete(file: "${project.buildDir}/${project.name}.tar.gz", failOnError: false) + + // Create project working directory. + ant.mkdir(dir: workingPath) + + // Create scalascript header file. + ant.echo(file: "${workingPath}/scalascript.sh", message: '#!/bin/bash\ndir="`dirname \\"$0\\"`"\ndir="`( cd \\"$dir\\" && pwd )`"\ncp=`echo $dir/*.jar|sed \'s/ /:/g\'`\nexec scala -classpath "$cp" -savecompiled "$0" "$@"\n!#\n//') + + // Copy source files to working directory. + ant.copy(toDir: workingPath, force: true, overwrite: true) { + fileset(dir: sourcePath) { + exclude(name: '**/cli/*.scala') + exclude(name: '**/package.scala') + } + filterchain { + concatfilter(prepend: "${workingPath}/scalascript.sh") + } + } + + // Delete scalascript header file. + ant.delete(file: "${workingPath}/scalascript.sh") + + // Flatten and remove file extension. + ant.move(toDir: workingPath) { + fileset(dir: workingPath) + chainedmapper { + mapper(type: 'flatten') + mapper(from: '*.scala', to: '*', type: 'glob') + } + } + + // Clean up emtpy folder(s) from flatten. + ant.delete(dir: "${workingPath}/org", includeEmptyDirs: true) + + // Copy project jars into place. + ant.copy(toDir: workingPath, force: true, overwrite: true) { + fileset(dir: "${project.buildDir}/libs") + } + ant.copy(toDir: workingPath, force: true, overwrite: true) { + fileset(dir: "${project(':stringmetric-core').buildDir}/libs") + } + + // Assemble compressed tar. + ant.tar(compression: 'gzip', destFile: "${project.buildDir}/${project.name}.tar.gz") { + tarfileset(dir: workingPath, fileMode: 755, prefix: project.name) + } + } +}
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/OptionMapUtility.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/OptionMapUtility.scala new file mode 100755 index 0000000..926ba8b --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/OptionMapUtility.scala @@ -0,0 +1,55 @@ +package org.hashtree.stringmetric.cli + +import scala.collection.immutable.HashMap + +/** Utility standalone for OptionMap based operations. */ +object OptionMapUtility { + def toOptionMap(arguments: Array[String]): OptionMap = { + toOptionMap(arguments.toList) + } + + def toOptionMap(arguments: List[String]): OptionMap = { + next(new HashMap[Symbol, String](), arguments) + } + + private[this] def next(optionMap: OptionMap, arguments: List[String]): OptionMap = { + val double = """^(--[a-zA-Z0-9]+)(\=[a-zA-Z0-9\.\-\_]+)?""".r + val single = """^(-[a-zA-Z0-9]+)(\=[a-zA-Z0-9\.\-\_]+)?""".r + val less = """([a-zA-Z0-9\/\-\_\$\.]+)""".r + + arguments match { + // Empty List, return OptionMap. + case Nil => optionMap + // Double dash options, without value. + case double(name, null) :: tail => { + next(optionMap + (Symbol(name.tail.tail) -> ""), tail) + } + // Double dash options, with value. + case double(name, value) :: tail => { + next(optionMap + (Symbol(name.tail.tail) -> value.tail), tail) + } + // Single dash options, without value. + case single(name, null) :: tail => { + next(optionMap + (Symbol(name.tail) -> ""), tail) + } + // Single dash options, with value. Value is discarded. + case single(name, value) :: tail => { + next(optionMap + (Symbol(name.tail) -> ""), tail) + } + // Dashless options. + case less(value) :: tail if value.head != '-' => { + if (optionMap.contains('dashless)) { + val dashless = optionMap('dashless) + " " + value.trim + + next((optionMap - 'dashless) + ('dashless -> dashless), tail) + } else { + next(optionMap + ('dashless -> value.trim), tail) + } + } + // Invalid option, ignore. + case _ :: tail => { + next(optionMap, tail) + } + } + } +}
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/Command.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/Command.scala new file mode 100755 index 0000000..bac7786 --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/Command.scala @@ -0,0 +1,34 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.cli.OptionMap + +/** Defines the traits and provides basic implementations of a command. Commands are always implemented as objects. */ +trait Command { + def main(args: Array[String]): Unit + + def help(): Unit + + final def error(error: Throwable)(implicit options: OptionMap): Unit = { + if (! isUnitTest(options)) { + println(error.getMessage) + sys.exit(1) + } else { + throw error + } + } + + def execute(options: OptionMap): Unit + + final def exit(implicit options: OptionMap): Unit = { + if (! isUnitTest(options)) sys.exit(0) + } + + protected[this] def isUnitTest(options: OptionMap): Boolean = { + (options.contains('ut) || (options.contains('unitTest) && options.get('unitTest) != "false")) + } + + protected[this] def isDebug(options: OptionMap): Boolean = { + (options.contains('d) || (options.contains('debug) && options.get('debug) != "false")) + } +} + diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala new file mode 100755 index 0000000..63ab69c --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetric.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.JaroWinklerMetric +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.cli.command._ + +/** + * The jaroWinklerMetric [[org.hashtree.stringmetric.cli.command.Command]]. Compares two strings to calculate the + * Jaro-Winkler distance. + */ +object jaroWinklerMetric extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1) { + execute(options) + exit(options) + // Invalid syntax. + } else { + throw new IllegalArgumentException("Expected valid syntax. See --help.") + } + } catch { + case e => error(e)(options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Compares two strings to calculate the Jaro-Winkler distance." + ls + ls + + "Syntax:" + ls + + tab + "jaroWinklerMetric [Options] string1 string2..." + ls + ls + + "Options:" + ls + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + ) + } + + override def execute(options: OptionMap): Unit = { + val strings = options('dashless).split(" ") + + println(JaroWinklerMetric.compare(strings(0), strings(1)).toString) + } +}
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/command/package.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/package.scala new file mode 100755 index 0000000..b0610ba --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/command/package.scala @@ -0,0 +1,6 @@ +package org.hashtree.stringmetric.cli + +/** Provides core command functionality. */ +package object command { + implicit val optionMap: OptionMap = OptionMapUtility.toOptionMap(Array("--unitTest=false")) +}
\ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/package.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/package.scala new file mode 100755 index 0000000..a8c1c01 --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/package.scala @@ -0,0 +1,6 @@ +package org.hashtree.stringmetric + +/** Provides core CLI functionality. */ +package object cli { + type OptionMap = Map[Symbol, String] +}
\ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/OptionMapUtilitySpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/OptionMapUtilitySpec.scala new file mode 100755 index 0000000..a5a8eb1 --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/OptionMapUtilitySpec.scala @@ -0,0 +1,151 @@ +package org.hashtree.stringmetric.cli + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class OptionMapUtilitySpec extends ScalaTest { + "OptionMapUtility" should provide { + "overloaded toOptionMap method" when passed { + "Array with a single valid double dashed option" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("--help")) + + options('help) should equal ("") + } + } + "List with a single valid double dashed option" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(List("--help")) + + options('help) should equal ("") + } + } + "Array with a multiple valid double dashed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("--help", "--test=test")) + + options('help) should equal ("") + options('test) should equal ("test") + } + } + "List with a multiple valid double dashed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(List("--help", "--test=test")) + + options('help) should equal ("") + options('test) should equal ("test") + } + } + "Array with invalid double dashed options" should returns { + "empty Map" in { + val options = OptionMapUtility.toOptionMap(Array("--help#", "--test%=test")) + + options.keysIterator.length should be (0) + } + } + "List with invalid double dashed options" should returns { + "empty Map" in { + val options = OptionMapUtility.toOptionMap(List("--help#", "--test%=test")) + + options.keysIterator.length should be (0) + } + } + "Array with a single valid single dashed option" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("-h")) + + options('h) should equal ("") + } + } + "List with a single valid single dashed option" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(List("-h")) + + options('h) should equal ("") + } + } + "Array with multiple valid single dashed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("-h", "-i")) + + options('h) should equal ("") + options('i) should equal ("") + } + } + "List with multiple valid single dashed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(List("-h", "-i")) + + options('h) should equal ("") + options('i) should equal ("") + } + } + "Array with an invalid single dashed options" should returns { + "empty Map" in { + val options = OptionMapUtility.toOptionMap(Array("-h-i", "-i#gloo")) + + options.keysIterator.length should be (0) + } + } + "List with an invalid single dashed options" should returns { + "empty Map" in { + val options = OptionMapUtility.toOptionMap(List("-h-i", "-i#gloo")) + + options.keysIterator.length should be (0) + } + } + "Array with a single nameless option" should returns { + "single key populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("filename0")) + + options('dashless).count(_ == ' ') should be (0) + } + } + "List with a single nameless option" should returns { + "single key populated Map" in { + val options = OptionMapUtility.toOptionMap(List("filename0")) + + options('dashless).count(_ == ' ') should be (0) + } + } + "Array with multiple single nameless options" should returns { + "single key populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("filename0", "filename1", "filename2")) + + options('dashless).count(_ == ' ') should be (2) + } + } + "List with multiple single nameless options" should returns { + "single key populated Map" in { + val options = OptionMapUtility.toOptionMap(List("filename0", "filename1", "filename2")) + + options('dashless).count(_ == ' ') should be (2) + } + } + "Array with mixed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(Array("-q", "--help", "--test=test", "-go", "filename0", "filename1", "filename2")) + + options('q) should equal ("") + options('help) should equal ("") + options('test) should equal ("test") + options('go) should equal ("") + options('dashless).count(_ == ' ') should be (2) + } + } + "List with mixed options" should returns { + "populated Map" in { + val options = OptionMapUtility.toOptionMap(List("-q", "--help", "--test=test", "-go", "filename0", "filename1", "filename2")) + + options('q) should equal ("") + options('help) should equal ("") + options('test) should equal ("test") + options('go) should equal ("") + options('dashless).count(_ == ' ') should be (2) + } + } + } + } +}
\ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetricSpec.scala new file mode 100755 index 0000000..071454a --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/command/jaroWinklerMetricSpec.scala @@ -0,0 +1,39 @@ +package org.hashtree.stringmetric.cli.command + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class jaroWinklerMetricSpec extends ScalaTest { + "jaroWinklerMetric" should provide { + "main method" when passed { + "valid dashless arguments" should executes { + "print the distance" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + jaroWinklerMetric.main(Array("--unitTest", "--debug", "abc", "abc")) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + jaroWinklerMetric.main(Array("--unitTest", "--debug", "abc", "xyz")) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + jaroWinklerMetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +}
\ No newline at end of file diff --git a/core/build.gradle b/core/build.gradle new file mode 100755 index 0000000..55fc38f --- /dev/null +++ b/core/build.gradle @@ -0,0 +1,42 @@ +apply plugin: 'eclipse' +apply plugin: 'scala' + +dependencies { + compile 'org.scala-lang:scala-compiler:2.9.2' + compile 'org.scala-lang:scala-library:2.9.2' + + scalaTools 'org.scala-lang:scala-compiler:2.9.2' + scalaTools 'org.scala-lang:scala-library:2.9.2' + + testCompile 'junit:junit:4.10' + testCompile 'org.scalatest:scalatest_2.9.2:1.8' +} + +sourceSets { + main { + output.resourcesDir "${project.buildDir}/classes/main" + + java { + srcDir 'source/core/java' + } + resources { + srcDir 'source/core/resource' + } + scala { + srcDir 'source/core/scala' + } + } + test { + output.resourcesDir "${project.buildDir}/classes/test" + + java { + srcDir 'source/test/java' + } + resources { + srcDir 'source/test/resource' + } + scala { + srcDir 'source/test/scala' + } + } +}
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala new file mode 100755 index 0000000..4311379 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/JaroWinklerMetric.scala @@ -0,0 +1,76 @@ +package org.hashtree.stringmetric + +import scala.collection.mutable.ArrayBuffer +import scala.math +import scala.util.control.Breaks.{ break, breakable } + +/** + * An implementation of the Jaro-Winkler string metric. One differing detail in this implementation is that if a + * character is matched in string2, it cannot be matched upon again. This results in a more penalized distance in these + * scenarios (e.g. comparing henka and henkan distance is 0.9666 versus the typical 0.9722). + */ +object JaroWinklerMetric extends StringMetric { + override def compare(s1: String, s2: String): Float = { + val ca1 = s1.replaceAllLiterally(" ", "").toLowerCase.toCharArray + val ca2 = s2.replaceAllLiterally(" ", "").toLowerCase.toCharArray + + // Return 0 if either character array lacks length. + if (ca1.length == 0 || ca2.length == 0) return 0f + + val (m1, m2) = matchChars(ca1, ca2) + val matchesScore = scoreMatches(m1, m2) + val transpositionsScore = scoreTranspositions(m1, m2) + + // Return 0 if matches score is 0. + if (matchesScore == 0) return 0f + + val prefix = ca1.zip(ca2).takeWhile(t => t._1 == t._2).map(_._1).mkString + val jaro = ( + (matchesScore.toFloat / ca1.length) + + (matchesScore.toFloat / ca2.length) + + ((matchesScore.toFloat - transpositionsScore) / matchesScore.toFloat) + ) / 3 + + jaro + ((if (prefix.length <= 4) prefix.length else 4) * (.1f * (1 - jaro))) + } + + private[this] def matchChars(ca1: Array[Char], ca2: Array[Char]): Tuple2[Array[Char], Array[Char]] = { + val window = math.abs((math.max(ca1.length, ca2.length) / 2f).floor.toInt - 1) + val a1Indices = ArrayBuffer[Int]() + val a2Indices = ArrayBuffer[Int]() + + breakable { + for (i <- 0 until ca1.length) { + val start = if (i - window <= 0) 0 else i - window + val end = if (i + window >= ca2.length - 1) ca2.length - 1 else i + window + + if (start > ca2.length) break + + breakable { + for (ii <- start to end if ! a2Indices.contains(ii)) { + if (ca1(i) == ca2(ii)) { + a1Indices.append(i) + a2Indices.append(ii) + + break + } + } + } + } + } + + (a1Indices.map(i => ca1(i)).toArray, a2Indices.sortWith(_ < _).map(i => ca2(i)).toArray) + } + + private[this] def scoreMatches(mca1: Array[Char], mca2: Array[Char]): Int = { + require(mca1.length == mca2.length) + + mca1.length + } + + private[this] def scoreTranspositions(mca1: Array[Char], mca2: Array[Char]): Int = { + require(mca1.length == mca2.length) + + (mca1.zip(mca2).filter(t => t._1 != t._2).length / 2f).floor.toInt + } +}
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/Metric.scala b/core/source/core/scala/org/hashtree/stringmetric/Metric.scala new file mode 100755 index 0000000..2d570c2 --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/Metric.scala @@ -0,0 +1,6 @@ +package org.hashtree.stringmetric + +/** Marks those which leverage traits of a metric. */ +trait Metric[T] { + def compare(t1: T, t2: T): AnyVal +}
\ No newline at end of file diff --git a/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala new file mode 100755 index 0000000..792aeba --- /dev/null +++ b/core/source/core/scala/org/hashtree/stringmetric/StringMetric.scala @@ -0,0 +1,6 @@ +package org.hashtree.stringmetric + +/** Marks those which leverage traits of a string based Metric. */ +trait StringMetric extends Metric[String] { + +}
\ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/JaroWinklerMetricSpec.scala b/core/source/test/scala/org/hashtree/stringmetric/JaroWinklerMetricSpec.scala new file mode 100755 index 0000000..6e044a0 --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/JaroWinklerMetricSpec.scala @@ -0,0 +1,54 @@ +package org.hashtree.stringmetric + +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class JaroWinklerMetricSpec extends ScalaTest { + "JaroWinklerMetric" should provide { + "compare method" when passed { + "valid arguments" should returns { + "Float indicating distance" in { + JaroWinklerMetric.compare("abc", "abc") should be (1.0f) + JaroWinklerMetric.compare("abc", "xyz") should be (0.0f) + JaroWinklerMetric.compare("abc", "") should be (0.0f) + JaroWinklerMetric.compare("", "xyz") should be (0.0f) + JaroWinklerMetric.compare("", "") should be (0.0f) + JaroWinklerMetric.compare("a", "a") should be (1.0f) + + JaroWinklerMetric.compare("aa", "a") should be (0.84999996f) + JaroWinklerMetric.compare("a", "aa") should be (0.84999996f) + + JaroWinklerMetric.compare("veryveryverylong", "v") should be (0.71875f) + JaroWinklerMetric.compare("v", "veryveryverylong") should be (0.71875f) + + JaroWinklerMetric.compare("martha", "marhta") should be (0.96111107f) + JaroWinklerMetric.compare("dwayne", "duane") should be (0.84000003f) + JaroWinklerMetric.compare("dixon", "dicksonx") should be (0.81333333f) + JaroWinklerMetric.compare("abcvwxyz", "cabvwxyz") should be (0.9583333f) + JaroWinklerMetric.compare("jones", "johnson") should be (0.8323809f) + JaroWinklerMetric.compare("henka", "henkan") should be (0.96666664f) + JaroWinklerMetric.compare("fvie", "ten") should be (0.0f) + + JaroWinklerMetric.compare("zac ephron", "zac efron") should be > + JaroWinklerMetric.compare("zac ephron", "kai ephron") + JaroWinklerMetric.compare("brittney spears", "britney spears") should be > + JaroWinklerMetric.compare("brittney spears", "brittney startzman") + + JaroWinklerMetric.compare("m a r t h a", "m a r h t a") should be (0.96111107f) + JaroWinklerMetric.compare("d w a y n e", "d u a n e") should be (0.84000003f) + JaroWinklerMetric.compare("d i x o n", "d i c k s o n x") should be (0.81333333f) + JaroWinklerMetric.compare("a b c v w x y z", "c a b v w x y z") should be (0.9583333f) + JaroWinklerMetric.compare("j o n e s", "j o h n s o n") should be (0.8323809f) + JaroWinklerMetric.compare("h e n k a", "h e n k a n") should be (0.96666664f) + JaroWinklerMetric.compare("f v i e", "t e n") should be (0.0f) + + JaroWinklerMetric.compare("z a c e p h r o n", "z a c e f r o n") should be > + JaroWinklerMetric.compare("z a c e p h r o n", "k a i e p h r o n") + JaroWinklerMetric.compare("b r i t t n e y s p e a r s", "b r i t n e y s p e a r s") should be > + JaroWinklerMetric.compare("b r i t t n e y s p e a r s", "b r i t t n e y s t a r t z m a n") + } + } + } + } +}
\ No newline at end of file diff --git a/core/source/test/scala/org/hashtree/stringmetric/ScalaTest.scala b/core/source/test/scala/org/hashtree/stringmetric/ScalaTest.scala new file mode 100755 index 0000000..a9cd5e1 --- /dev/null +++ b/core/source/test/scala/org/hashtree/stringmetric/ScalaTest.scala @@ -0,0 +1,18 @@ +package org.hashtree.stringmetric + +import org.scalatest.WordSpec +import org.scalatest.matchers.ShouldMatchers + +trait ScalaTest extends WordSpec with ShouldMatchers { + def allows = afterWord("allow") + + def executes = afterWord("execute") + + def passed = afterWord("passed") + + def provide = afterWord("provide") + + def returns = afterWord("return") + + def throws = afterWord("throw") +}
\ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100755 index 0000000..492a179 --- /dev/null +++ b/readme.md @@ -0,0 +1,29 @@ +#stringmetric +A collection of string metrics built with Scala. Includes a light-weight core API and CLI based interface for each string metric. The following string metrics are currently supported: + +* Jaro-Winkler + +## Building the API +gradle jar + +## Building the CLI +gradle tar + +## Using the API +`// Import the metric of choice.` +`import org.hashtree.stringmetric.JaroWinklerMetric` + +`// Invoke the compare method on the metric.` +`val distance = JaroWinklerMetric.compare("string1", "string2")` + +`// Do something. In this case, distance is between 1.0f and 0.0f.` +`if (distance >= 0.9) println("It's likely you're a match!")` + +## Using the CLI +Uncompress the built tar and ensure you have ability to execute the commands. Execute the metric of choice via the command line: + +`jaroWinklerMetric --help` +`jaroWinklerMetric abc xyz` + +## Requirements +* Scala 2.9.2
\ No newline at end of file diff --git a/settings.gradle b/settings.gradle new file mode 100755 index 0000000..e99d74b --- /dev/null +++ b/settings.gradle @@ -0,0 +1,6 @@ +include 'cli' +include 'core' + +rootProject.name = 'stringmetric' +project(':cli').name = 'stringmetric-cli' +project(':core').name = 'stringmetric-core'
\ No newline at end of file |