blob: c62f17cc35e03bd07025680968b67f6c37092e09 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
/* NSC -- new Scala compiler
* Copyright 2005-2011 LAMP/EPFL
* @author Paul Phillips
*/
package scala.tools
package cmd
package program
import nsc._
import scala.reflect.Chars.char2uescape
import io._
import ast.parser.Tokens._
/** Given paths on the command line, tokenizes any scala files found
* and prints one token per line.
*/
object Tokens {
private val tokensUsage = "Usage: tokens [options] <path1 path2 ...>\n\nOptions:"
private val tokensUnary = List(
"verbose" -> "be more verbose",
"freq" -> "combine token lists and sort by frequency",
"stats" -> "output some stats"
)
private val tokensBinary = List(
"sliding" -> "print tokens in groups of given size"
)
private val tokensInfo = Simple.scalaProgramInfo("tokens", tokensUsage)
private lazy val TokensSpec = Simple(tokensInfo, tokensUnary, tokensBinary, null)
def sanitize(x: Any): String = sanitize(x.toString)
def sanitize(str: String): String = str flatMap (x => if (x.isControl) char2uescape(x) else x.toString)
def main(args0: Array[String]): Unit = {
if (args0.isEmpty)
return println(TokensSpec.helpMsg)
val runner = TokensSpec instance args0
import runner._
val files = (residualArgs flatMap walk).distinct
if (parsed isSet "--verbose")
println("Tokenizing: " + (files map (_.name) mkString " "))
if (parsed isSet "--stats")
println("Stats not yet implemented.")
def raw = files flatMap fromScalaSource
def tokens: List[Any] =
if (parsed isSet "--sliding") raw sliding parsed("--sliding").toInt map (_ map sanitize mkString " ") toList
else raw
def output =
if (parsed isSet "--freq")
(tokens groupBy (x => x) mapValues (_.length)).toList sortBy (-_._2) map (x => x._2 + " " + x._1)
else
tokens
output foreach println
}
def fromPaths(paths: String*): List[Any] =
(paths.toList flatMap walk).distinct flatMap fromScalaSource
/** Given a path, returns all .scala files underneath it.
*/
private def walk(arg: String): List[File] = {
def traverse = Path(arg) ifDirectory (_.deepList()) getOrElse Iterator(File(arg))
Path onlyFiles traverse filter (_ hasExtension "scala") toList
}
def fromScalaString(code: String): List[Any] = {
val f = File.makeTemp("tokens")
f writeAll code
fromScalaSource(f)
}
/** Tokenizes a single scala file.
*/
def fromScalaSource(file: Path): List[Any] = fromScalaSource(file.path)
def fromScalaSource(file: String): List[Any] = {
val global = new Global(new Settings())
import global._
import syntaxAnalyzer.{ UnitScanner, token2string }
val in = new UnitScanner(new CompilationUnit(getSourceFile(file)))
in.init()
Iterator continually {
val token = in.token match {
case IDENTIFIER | BACKQUOTED_IDENT => in.name
case CHARLIT | INTLIT | LONGLIT => in.intVal
case DOUBLELIT | FLOATLIT => in.floatVal
case STRINGLIT => "\"" + in.strVal + "\""
case SEMI | NEWLINE => ";"
case NEWLINES => ";;"
case COMMA => ","
case EOF => null
case x => token2string(x)
}
in.nextToken()
token
} takeWhile (_ != null) toList
}
}
|