summaryrefslogtreecommitdiff
path: root/cli/source/main/scala/com/rockymadden/stringmetric/cli/tokenize/ngramtokenizer.scala
blob: cbd33d7e75b40b59cbf75cb9addeae18889c7a9e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package com.rockymadden.stringmetric.cli.tokenize

import com.rockymadden.stringmetric.cli._
import com.rockymadden.stringmetric.tokenize.NGramTokenizer

case object ngramtokenizer extends Command(
	(opts) =>
		"Returns the N-Gram representation of the passed string." + Ls + Ls +
		"Syntax:" + Ls +
		Tab + "ngramtokenizer [Options] string..." + Ls + Ls +
		"Options:" + Ls +
		Tab + "-h, --help" + Ls +
		Tab + Tab + "Outputs description, syntax, and opts." +
		Tab + "--n" + Ls +
		Tab + Tab + "The n.",
	(opts) => opts.contains('dashless) && (opts('dashless): Array[String]).length == 1 &&
		opts.contains('n) && (opts('n): Int) >= 1,
	(opts) => NGramTokenizer(opts('n)).tokenize(opts('dashless)) match {
		case Some(c) => {
			val sb = new StringBuilder

			Range(0, c.length).foreach { i =>
				sb.append(c(i))
				if (i < c.length - 1) sb.append("|")
			}

			sb.result()
		}
		case None => "not computable"
	}
)