diff options
Diffstat (limited to 'core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala')
-rwxr-xr-x | core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala new file mode 100755 index 0000000..56fdc13 --- /dev/null +++ b/core/source/test/scala/com/rockymadden/stringmetric/tokenization/NGramTokenizerSpec.scala @@ -0,0 +1,69 @@ +package com.rockymadden.stringmetric.tokenization + +import com.rockymadden.stringmetric.ScalaTest +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +final class NGramTokenizerSpec extends ScalaTest { + import NGramTokenizerSpec.Tokenizer + + "NGramTokenizer" should provide { + "tokenize method" when passed { + "empty argument" should returns { + "None" in { + Tokenizer.tokenize("")(1).isDefined should be (false) + } + } + "invalid n argument" should throws { + "IllegalArgumentException" in { + evaluating { + Tokenizer.tokenize("")(0).isDefined should be (false) + } should produce [IllegalArgumentException] + + evaluating { + Tokenizer.tokenize("")(-1).isDefined should be (false) + } should produce [IllegalArgumentException] + } + } + "valid argument" should returns { + "Array[String]" in { + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal ( + Array( + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" + ) + ) + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(2).get should equal ( + Array( + "ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op", + "pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz" + ) + ) + Tokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(3).get should equal ( + Array( + "abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno", + "nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz" + ) + ) + } + } + } + } + "NGramTokenizer companion object" should provide { + "pass-through tokenize method" should returns { + "same value as class" in { + NGramTokenizer.tokenize("abcdefghijklmnopqrstuvwxyz")(1).get should equal ( + Array( + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" + ) + ) + } + } + } +} + +object NGramTokenizerSpec { + private final val Tokenizer = NGramTokenizer() +} |