From 46902b79562e7df92b05c73346be79b6e80d8c03 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Sun, 4 Nov 2012 19:30:17 -0700 Subject: Created WeightedLevenshtein metric, command, specs, and supporting code. --- .../hashtree/stringmetric/cli/ParseUtility.scala | 14 +++ .../cli/similarity/weightedLevenshteinMetric.scala | 74 +++++++++++++ .../stringmetric/cli/ParseUtilitySpec.scala | 60 ++++++++++ .../similarity/weightedLevenshteinMetricSpec.scala | 123 +++++++++++++++++++++ 4 files changed, 271 insertions(+) create mode 100755 cli/source/core/scala/org/hashtree/stringmetric/cli/ParseUtility.scala create mode 100755 cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetric.scala create mode 100755 cli/source/test/scala/org/hashtree/stringmetric/cli/ParseUtilitySpec.scala create mode 100755 cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetricSpec.scala (limited to 'cli/source') diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/ParseUtility.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/ParseUtility.scala new file mode 100755 index 0000000..94f22c6 --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/ParseUtility.scala @@ -0,0 +1,14 @@ +package org.hashtree.stringmetric.cli + +import scala.math.BigDecimal + +/** Utility standalone for parse based operations. */ +object ParseUtility { + def parseBigDecimal(string: String): Option[BigDecimal] = try { Some(BigDecimal(string)) } catch { case _ => None } + + def parseDouble(string: String): Option[Double] = try { Some(string.toDouble) } catch { case _ => None } + + def parseFloat(string: String): Option[Float] = try { Some(string.toFloat) } catch { case _ => None } + + def parseInt(string: String): Option[Int] = try { Some(string.toInt) } catch { case _ => None } +} \ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetric.scala new file mode 100755 index 0000000..ac10754 --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetric.scala @@ -0,0 +1,74 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.StringFilterDelegate +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.filter.AsciiLetterCaseStringFilter +import org.hashtree.stringmetric.similarity.WeightedLevenshteinMetric +import scala.math.BigDecimal + +/** + * The weightedLevenshteinMetric [[org.hashtree.stringmetric.cli.Command]]. Compares the number of characters that two + * strings are different from one another via insertion, deletion, and substitution. Allows the invoker to indicate + * the weight each operation takes. + */ +object weightedLevenshteinMetric extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1 && + options.contains('deleteWeight) && ParseUtility.parseDouble(options('deleteWeight)).isDefined && + options.contains('insertWeight) && ParseUtility.parseDouble(options('insertWeight)).isDefined && + options.contains('substituteWeight) && ParseUtility.parseDouble(options('substituteWeight)).isDefined + ) { + execute(options) + exit(options) + // Invalid syntax. + } else throw new IllegalArgumentException("Expected valid syntax. See --help.") + } catch { + case e => error(e, options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Compares the number of characters that two strings are different from one another via insertion, deletion, " + + "and substitution. Allows the invoker to indicate the weight each operation takes." + ls + ls + + "Syntax:" + ls + + tab + "weightedLevenshteinMetric [Options] --deleteWeight=[double] --insertWeight=[double] --substituteWeight=[double] string1 string2..." + ls + ls + + "Options:" + ls + + tab + "--deleteWeight" + ls + + tab + tab + "The weight given to delete operations." + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + + tab + "--insertWeight" + ls + + tab + tab + "The weight given to insert operations." + + tab + "--substituteWeight" + ls + + tab + tab + "The weight given to substitute operations." + ) + } + + override def execute(options: OptionMap): Unit = { + val strings = options('dashless).split(" ") + val weights = Tuple3[BigDecimal, BigDecimal, BigDecimal]( + ParseUtility.parseBigDecimal(options('deleteWeight)).get, + ParseUtility.parseBigDecimal(options('insertWeight)).get, + ParseUtility.parseBigDecimal(options('substituteWeight)).get + ) + + println( + WeightedLevenshteinMetric.compare( + strings(0), + strings(1) + )(weights)(new StringFilterDelegate with AsciiLetterCaseStringFilter).getOrElse("not comparable").toString + ) + } +} \ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/ParseUtilitySpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/ParseUtilitySpec.scala new file mode 100755 index 0000000..686f3da --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/ParseUtilitySpec.scala @@ -0,0 +1,60 @@ +package org.hashtree.stringmetric.cli + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner +import scala.math.BigDecimal + +@RunWith(classOf[JUnitRunner]) +final class ParseUtilitySpec extends ScalaTest { + "ParseUtility" should provide { + "parseBigDecimal method" when passed { + "invalid argument" should returns { + "None" in { + ParseUtility.parseBigDecimal("one").isDefined should be (false) + } + } + "valid argument" should returns { + "Some(BigDecimal)" in { + ParseUtility.parseBigDecimal("1").get should equal (BigDecimal(1)) + } + } + } + "parseDouble method" when passed { + "invalid argument" should returns { + "None" in { + ParseUtility.parseDouble("one").isDefined should be (false) + } + } + "valid argument" should returns { + "Some(Double)" in { + ParseUtility.parseDouble("1").get should be (1d) + } + } + } + "parseFloat method" when passed { + "invalid argument" should returns { + "None" in { + ParseUtility.parseFloat("one").isDefined should be (false) + } + } + "valid argument" should returns { + "Some(Float)" in { + ParseUtility.parseFloat("1").get should be (1f) + } + } + } + "parseInt method" when passed { + "invalid argument" should returns { + "None" in { + ParseUtility.parseInt("one").isDefined should be (false) + } + } + "valid argument" should returns { + "Some(Int)" in { + ParseUtility.parseInt("1").get should be (1) + } + } + } + } +} \ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetricSpec.scala new file mode 100755 index 0000000..43e22c4 --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/weightedLevenshteinMetricSpec.scala @@ -0,0 +1,123 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class weightedLevenshteinMetricSpec extends ScalaTest { + "weightedLevenshteinMetric" should provide { + "main method" when passed { + "valid dashless arguments and valid weight arguments" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=1", + "--substituteWeight=1", + "aBc", + "abc" + ) + ) + ) + + out.toString should equal ("0.0\n") + out.reset() + + Console.withOut(out)( + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=2", + "--insertWeight=2", + "--substituteWeight=1", + "aBc", + "xyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + + Console.withOut(out)( + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=2", + "--insertWeight=1", + "--substituteWeight=2", + "xyz", + "xyzxyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + + Console.withOut(out)( + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=2", + "--substituteWeight=2", + "xyzxyz", + "xyz" + ) + ) + ) + + out.toString should equal ("3.0\n") + out.reset() + } + } + "valid dashless arguments and invalid weight arguments" should throws { + "IllegalArgumentException" in { + evaluating { + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--substituteWeight=1", + "aBc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + + evaluating { + weightedLevenshteinMetric.main( + Array( + "--unitTest", + "--debug", + "--deleteWeight=1", + "--insertWeight=q", + "--substituteWeight=1", + "aBc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + weightedLevenshteinMetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3