From c9b9f06a7e1cdb9d661f210565fd91117fbcbf21 Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Fri, 9 Nov 2012 14:26:36 -0700 Subject: Created N-Gram metric and algorithm commands and spec. --- .../cli/similarity/nGramAlgorithm.scala | 59 +++++++++++++++++++ .../stringmetric/cli/similarity/nGramMetric.scala | 61 +++++++++++++++++++ .../cli/similarity/nGramAlgorithmSpec.scala | 66 +++++++++++++++++++++ .../cli/similarity/nGramMetricSpec.scala | 68 ++++++++++++++++++++++ 4 files changed, 254 insertions(+) create mode 100755 cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithm.scala create mode 100755 cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramMetric.scala create mode 100755 cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithmSpec.scala create mode 100755 cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramMetricSpec.scala (limited to 'cli') diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithm.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithm.scala new file mode 100755 index 0000000..7bdd9aa --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithm.scala @@ -0,0 +1,59 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.StringFilterDelegate +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.filter.AsciiLetterCaseStringFilter +import org.hashtree.stringmetric.similarity.NGramAlgorithm + +/** + * The nGramAlgorithm [[org.hashtree.stringmetric.cli.Command]]. Returns the N-Gram representation of the passed string. + */ +object nGramAlgorithm extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 0 && + options.contains('n) && ParseUtility.parseInt(options('n)).isDefined + ) { + execute(options) + exit(options) + // Invalid syntax. + } else throw new IllegalArgumentException("Expected valid syntax. See --help.") + } catch { + case e => error(e, options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Returns the N-Gram representation of the passed string." + ls + ls + + "Syntax:" + ls + + tab + "nGramAlgorithm [Options] string..." + ls + ls + + "Options:" + ls + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + + tab + "--n" + ls + + tab + tab + "The n." + ) + } + + override def execute(options: OptionMap): Unit = { + val n = ParseUtility.parseInt(options('n)).get + val ngram = NGramAlgorithm.compute(options('dashless))(n) + (new StringFilterDelegate with AsciiLetterCaseStringFilter) + + ngram match { + case Some(a) => println(a.mkString("|")) + case None => println("not computable") + } + } +} \ No newline at end of file diff --git a/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramMetric.scala b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramMetric.scala new file mode 100755 index 0000000..39ca1b6 --- /dev/null +++ b/cli/source/core/scala/org/hashtree/stringmetric/cli/similarity/nGramMetric.scala @@ -0,0 +1,61 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.StringFilterDelegate +import org.hashtree.stringmetric.cli._ +import org.hashtree.stringmetric.filter.AsciiLetterCaseStringFilter +import org.hashtree.stringmetric.similarity.NGramMetric + +/** + * The nGramMetric [[org.hashtree.stringmetric.cli.Command]]. Compares the similarity of two strings using an N-Gram + * similarity index. + */ +object nGramMetric extends Command { + override def main(args: Array[String]): Unit = { + val options = OptionMapUtility.toOptionMap(args) + + try { + // Help. + if (options.contains('h) || options.contains('help)) { + help() + exit(options) + // Execute. + } else if (options.contains('dashless) && options('dashless).count(_ == ' ') == 1 && + options.contains('n) && ParseUtility.parseInt(options('n)).isDefined + ) { + execute(options) + exit(options) + // Invalid syntax. + } else throw new IllegalArgumentException("Expected valid syntax. See --help.") + } catch { + case e => error(e, options) + } + } + + override def help(): Unit = { + val ls = sys.props("line.separator") + val tab = " " + + println( + "Compares the similarity of two strings using an N-Gram similarity index." + ls + ls + + "Syntax:" + ls + + tab + "nGramMetric [Options] string1 string2..." + ls + ls + + "Options:" + ls + + tab + "-h, --help" + ls + + tab + tab + "Outputs description, syntax, and options." + + tab + "--n" + ls + + tab + tab + "The n." + ) + } + + override def execute(options: OptionMap): Unit = { + val strings = options('dashless).split(" ") + val n = ParseUtility.parseInt(options('n)).get + + println( + NGramMetric.compare( + strings(0), + strings(1) + )(n)(new StringFilterDelegate with AsciiLetterCaseStringFilter).getOrElse("not comparable").toString + ) + } +} \ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithmSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithmSpec.scala new file mode 100755 index 0000000..5997a9c --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramAlgorithmSpec.scala @@ -0,0 +1,66 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class nGramAlgorithmSpec extends ScalaTest { + "nGramAlgorithm" should provide { + "main method" when passed { + "valid dashless argument and valid n argument" should executes { + "print N-Gram representation" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + nGramAlgorithm.main( + Array( + "--unitTest", + "--debug", + "--n=1", + "aBc" + ) + ) + ) + + out.toString should equal ("a|B|c\n") + out.reset() + + Console.withOut(out)( + nGramAlgorithm.main( + Array( + "--unitTest", + "--debug", + "--n=2", + "aBc" + ) + ) + ) + + out.toString should equal ("aB|Bc\n") + out.reset() + } + } + "valid dashless argument and invalid n argument" should throws { + "IllegalArgumentException" in { + evaluating { + nGramAlgorithm.main( + Array( + "--unitTest", + "aBc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + } + } + "no dashless argument" should throws { + "IllegalArgumentException" in { + evaluating { + nGramAlgorithm.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +} \ No newline at end of file diff --git a/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramMetricSpec.scala b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramMetricSpec.scala new file mode 100755 index 0000000..18e8a50 --- /dev/null +++ b/cli/source/test/scala/org/hashtree/stringmetric/cli/similarity/nGramMetricSpec.scala @@ -0,0 +1,68 @@ +package org.hashtree.stringmetric.cli.similarity + +import org.hashtree.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class nGramMetricSpec extends ScalaTest { + "nGramMetric" should provide { + "main method" when passed { + "valid dashless arguments and valid n argument" should executes { + "print if they are a match" in { + val out = new java.io.ByteArrayOutputStream() + + Console.withOut(out)( + nGramMetric.main( + Array( + "--unitTest", + "--debug", + "--n=1", + "aBc", + "abc" + ) + ) + ) + + out.toString should equal ("1.0\n") + out.reset() + + Console.withOut(out)( + nGramMetric.main( + Array( + "--unitTest", + "--debug", + "--n=1", + "aBc", + "xyz" + ) + ) + ) + + out.toString should equal ("0.0\n") + out.reset() + } + } + "valid dashless arguments and invalid n argument" should throws { + "IllegalArgumentException" in { + evaluating { + nGramMetric.main( + Array( + "--unitTest", + "aBc", + "abc" + ) + ) + } should produce [IllegalArgumentException] + } + } + "no dashless arguments" should throws { + "IllegalArgumentException" in { + evaluating { + nGramMetric.main(Array("--unitTest", "--debug")) + } should produce [IllegalArgumentException] + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3