summaryrefslogtreecommitdiff
path: root/src/compiler/scala/tools/cmd/program/Tokens.scala
blob: c62f17cc35e03bd07025680968b67f6c37092e09 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/* NSC -- new Scala compiler
 * Copyright 2005-2011 LAMP/EPFL
 * @author Paul Phillips
 */

package scala.tools
package cmd
package program

import nsc._
import scala.reflect.Chars.char2uescape
import io._
import ast.parser.Tokens._

/** Given paths on the command line, tokenizes any scala files found
 *  and prints one token per line.
 */
object Tokens {
  private val tokensUsage = "Usage: tokens [options] <path1 path2 ...>\n\nOptions:"
  private val tokensUnary = List(
    "verbose" -> "be more verbose",
    "freq"    -> "combine token lists and sort by frequency",
    "stats"   -> "output some stats"
  )
  private val tokensBinary = List(
    "sliding" -> "print tokens in groups of given size"
  )
  private val tokensInfo = Simple.scalaProgramInfo("tokens", tokensUsage)
  private lazy val TokensSpec = Simple(tokensInfo, tokensUnary, tokensBinary, null)

  def sanitize(x: Any): String = sanitize(x.toString)
  def sanitize(str: String): String = str flatMap (x => if (x.isControl) char2uescape(x) else x.toString)

  def main(args0: Array[String]): Unit = {
    if (args0.isEmpty)
      return println(TokensSpec.helpMsg)

    val runner = TokensSpec instance args0
    import runner._

    val files = (residualArgs flatMap walk).distinct
    if (parsed isSet "--verbose")
      println("Tokenizing: " + (files map (_.name) mkString " "))

    if (parsed isSet "--stats")
      println("Stats not yet implemented.")

    def raw = files flatMap fromScalaSource
    def tokens: List[Any] =
      if (parsed isSet "--sliding") raw sliding parsed("--sliding").toInt map (_ map sanitize mkString " ") toList
      else raw

    def output =
      if (parsed isSet "--freq")
        (tokens groupBy (x => x) mapValues (_.length)).toList sortBy (-_._2) map (x => x._2 + " " + x._1)
      else
        tokens

    output foreach println
  }

  def fromPaths(paths: String*): List[Any] =
    (paths.toList flatMap walk).distinct flatMap fromScalaSource

  /** Given a path, returns all .scala files underneath it.
   */
  private def walk(arg: String): List[File] = {
    def traverse = Path(arg) ifDirectory (_.deepList()) getOrElse Iterator(File(arg))

    Path onlyFiles traverse filter (_ hasExtension "scala") toList
  }

  def fromScalaString(code: String): List[Any] = {
    val f = File.makeTemp("tokens")
    f writeAll code
    fromScalaSource(f)
  }

  /** Tokenizes a single scala file.
   */
  def fromScalaSource(file: Path): List[Any] = fromScalaSource(file.path)
  def fromScalaSource(file: String): List[Any] = {
    val global = new Global(new Settings())
    import global._
    import syntaxAnalyzer.{ UnitScanner, token2string }

    val in = new UnitScanner(new CompilationUnit(getSourceFile(file)))
    in.init()

    Iterator continually {
      val token = in.token match {
        case IDENTIFIER | BACKQUOTED_IDENT  => in.name
        case CHARLIT | INTLIT | LONGLIT     => in.intVal
        case DOUBLELIT | FLOATLIT           => in.floatVal
        case STRINGLIT                      => "\"" + in.strVal + "\""
        case SEMI | NEWLINE                 => ";"
        case NEWLINES                       => ";;"
        case COMMA                          => ","
        case EOF                            => null
        case x                              => token2string(x)
      }
      in.nextToken()
      token
    } takeWhile (_ != null) toList
  }
}