summaryrefslogtreecommitdiff
path: root/core/source/test/scala/com/rockymadden/stringmetric/TokenizeSpec.scala
blob: cfba0f7f3e7069f0bcb4bad50238430b254b463d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
package com.rockymadden.stringmetric

import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
final class TokenizeSpec extends ScalaTest { "NGramTokenizer" should provide {
	import Tokenize._

	"tokenize method" when passed {
		"empty argument" should returns {
			"None" in {
				NGramTokenizer(1).tokenize("").isDefined should be (false)
			}
		}
		"invalid n argument" should returns {
			"None" in {
				NGramTokenizer(0).tokenize("").isDefined should be (false)
				NGramTokenizer(-1).tokenize("").isDefined should be (false)
			}
		}
		"valid argument" should returns {
			"Array[String]" in {
				NGramTokenizer(1).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal (
					Array(
						"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
						"s", "t", "u", "v", "w", "x", "y", "z"
					)
				)
				NGramTokenizer(2).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal (
					Array(
						"ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl", "lm", "mn", "no", "op",
						"pq", "qr", "rs", "st", "tu", "uv", "vw", "wx", "xy", "yz"
					)
				)
				NGramTokenizer(3).tokenize("abcdefghijklmnopqrstuvwxyz").get should equal (
					Array(
						"abc", "bcd", "cde", "def", "efg", "fgh", "ghi", "hij", "ijk", "jkl", "klm", "lmn", "mno",
						"nop", "opq", "pqr", "qrs", "rst", "stu", "tuv", "uvw", "vwx", "wxy", "xyz"
					)
				)
			}
		}
	}
}}