From c0d91d66fb7cf06fb7d2fff7ba3218b7a4dc5e49 Mon Sep 17 00:00:00 2001 From: Jakob Odersky Date: Sun, 8 Apr 2018 13:12:34 -0700 Subject: Initial commit --- yamlesque/src/main/scala/parser.scala | 263 +++++++++++++++++++++++++++++ yamlesque/src/main/scala/printers.scala | 46 +++++ yamlesque/src/main/scala/yamlValues.scala | 23 +++ yamlesque/src/test/scala/ParserTests.scala | 71 ++++++++ 4 files changed, 403 insertions(+) create mode 100644 yamlesque/src/main/scala/parser.scala create mode 100644 yamlesque/src/main/scala/printers.scala create mode 100644 yamlesque/src/main/scala/yamlValues.scala create mode 100644 yamlesque/src/test/scala/ParserTests.scala (limited to 'yamlesque') diff --git a/yamlesque/src/main/scala/parser.scala b/yamlesque/src/main/scala/parser.scala new file mode 100644 index 0000000..c912330 --- /dev/null +++ b/yamlesque/src/main/scala/parser.scala @@ -0,0 +1,263 @@ +package yamlesque + +import annotation.{switch, tailrec} +import scala.collection.mutable.ListBuffer + +object Parser { + + sealed trait TokenKind + object TokenKind { + case object EOF extends TokenKind + case object BAD extends TokenKind + case object DOCSTART extends TokenKind + case object DOCEND extends TokenKind + case object MAPPING extends TokenKind + case object ITEM extends TokenKind + case object IDENTIFIER extends TokenKind + case object COMMENT extends TokenKind + } + import TokenKind._ + + case class Token(val kind: TokenKind, value: String = "") { + var line: Int = 0 + var col: Int = 0 + def setPos(line: Int, col: Int): this.type = { + this.col = col + this.line = line + this + } + override def toString() = { + s"($line, $col): " + super.toString + } + } + + object Chars { + final val LF = '\u000A' + final val CR = '\u000D' + final val SU = '\u001A' + + @inline def isSpace(ch: Char): Boolean = ch match { + case ' ' | '\t' => true + case _ => false + } + + @inline def isBlank(ch: Char): Boolean = ch match { + case ' ' | '\t' | CR | LF | SU => true + case _ => false + } + } + + class Scanner(chars: Iterator[Char]) extends Iterator[Token] { + import Chars._ + + private var ch0: Char = 0 + private var ch1: Char = 0 + private var ch2: Char = 0 + private var pos: Long = 0 + private var line: Int = 0 + private var col: Int = 0 + + private def skipChar(): Unit = { + val ch: Char = if (chars.hasNext) { + chars.next() + } else { + SU + } + pos += 1 + col += 1 + ch0 = ch1 + ch1 = ch2 + ch2 = ch + } + private def skipChars(n: Int): Unit = { + var i = 0 + while (i < n) { skipChar(); i += 1 } + } + def init() = { + skipChars(3) + pos = 0 + col = 0 + line = 0 + } + + private var buffer = new StringBuilder() + private def putChar(): Unit = { + buffer.append(ch0) + skipChars(1) + } + private def tokenValue(): String = { + val str = buffer.result() + buffer.clear() + str + } + + private var token: Token = Token(BAD, "not yet initialized") + + @tailrec private def fetchToken(): Unit = { + ch0 match { + case ':' if isBlank(ch1) => + token = Token(MAPPING).setPos(line, col) + skipChars(1) + case '-' if isBlank(ch1) => + token = Token(ITEM).setPos(line, col) + skipChars(1) + case '-' if ch1 == '-' && ch2 == '-' => + token = Token(DOCSTART).setPos(line, col) + skipChars(3) + case '.' if ch1 == '.' && ch2 == '.' => + token = Token(DOCEND).setPos(line, col) + skipChars(3) + case '#' => + val l = line + val c = col + skipChars(1) + while (ch0 != LF && ch0 != SU) { + putChar() + } + token = Token(COMMENT, tokenValue()).setPos(l, c) + buffer.clear() + case c if isSpace(c) => + skipChars(1) + fetchToken() + case LF => + skipChars(1) + col = 0 + line += 1 + fetchToken() + case CR => + skipChars(1) + if (ch0 == LF) { + skipChars(1) + } + col = 0 + line += 1 + fetchToken() + case SU => + token = Token(EOF).setPos(line, col) + skipChars(1) + case _ => fetchScalar() + } + } + + private def fetchScalar(): Unit = { + def finishScalar() = token = Token(IDENTIFIER, tokenValue()) + @tailrec def fetchRest(): Unit = ch0 match { + case ':' if isBlank(ch1) => + finishScalar() + case LF => + finishScalar() + case SU => + finishScalar() + case c => + putChar() + fetchRest() + } + val l = line + val c = col + fetchRest() + token.setPos(l, c) + } + + override def hasNext: Boolean = true + override def next(): Token = { + fetchToken() + token + } + init() + } + + object Parser { + + class ParseException(val message: String) extends Exception(message) + + def run(tokens: Iterator[Token]): YamlValue = { + var token0 = tokens.next() + var token1 = tokens.next() + + def readNext(): Unit = { + token0 = token1 + token1 = tokens.next() + } + + def fatal(message: String, token: Token) = { + val completeMessage = + s"parse error at line ${token.line}, column ${token.col}: $message" + throw new ParseException(completeMessage) + } + + def wrongKind(found: Token, required: TokenKind*) = { + fatal( + s"token kind not allowed at this position\n" + + s" found: ${found.kind}\n" + + s" required: ${required.mkString(" or ")}\n" + + " " * found.col + found.value + "\n" + + " " * found.col + "^", + found + ) + } + + def nextSequence() = { + val startCol = token0.col + val items = new ListBuffer[YamlValue] + while (startCol <= token0.col && token0.kind != EOF) { + token0.kind match { + case ITEM => + readNext() + items += nextBlock(startCol) + case _ => wrongKind(token0, ITEM) + } + } + YamlSequence(items.toVector) + } + + def nextMapping() = { + val startCol = token0.col + val fields = new ListBuffer[(String, YamlValue)] + while (startCol <= token0.col && token0.kind != EOF) { + token0.kind match { + case IDENTIFIER => + val key = token0.value + readNext() + token0.kind match { + case MAPPING => + readNext() + val value = nextBlock(startCol) + fields += key -> value + case _ => wrongKind(token0, MAPPING) + } + + case _ => wrongKind(token0, IDENTIFIER) + } + } + YamlMapping(fields.toMap) + } + + def nextBlock(startCol: Int): YamlValue = { + if (token0.col < startCol) { + YamlScalar.Empty + } else { + token0.kind match { + case IDENTIFIER => + if (token1.kind == MAPPING && token0.line == token1.line) { + nextMapping() + } else { + val y = YamlScalar(token0.value) + readNext() + y + } + case ITEM => + nextSequence() + case EOF => YamlScalar.Empty + case _ => wrongKind(token0, IDENTIFIER, ITEM) + } + } + } + + nextBlock(0) + } + } + + def parse(data: String): YamlValue = { + Parser.run(new Scanner(data.toIterator)) + } +} diff --git a/yamlesque/src/main/scala/printers.scala b/yamlesque/src/main/scala/printers.scala new file mode 100644 index 0000000..0a1c008 --- /dev/null +++ b/yamlesque/src/main/scala/printers.scala @@ -0,0 +1,46 @@ +package yamlesque + +import annotation.tailrec + +class YamlPrinter(compact: Boolean = true) extends (YamlValue => String) { + + def apply(value: YamlValue): String = { + val str = new StringBuilder() + def p(value: YamlValue, indentation: Int): Unit = value match { + case YamlScalar(value) => + str ++= " " * indentation + str ++= value + str += '\n' + case YamlSequence(items) => + for (item <- items) { + str ++= " " * indentation + item match { + case YamlScalar(v) if compact => + str ++= "- " + str ++= v + str += '\n' + case _ => + str ++= "-\n" + p(item, indentation + 1) + } + } + case YamlMapping(fields) => + for ((key, value) <- fields) { + str ++= " " * indentation + str ++= key + value match { + case YamlScalar(v) if compact => + str ++= ": " + str ++= v + str += '\n' + case _ => + str ++= ":\n" + p(value, indentation + 1) + } + } + } + p(value, 0) + str.toString + } + +} diff --git a/yamlesque/src/main/scala/yamlValues.scala b/yamlesque/src/main/scala/yamlValues.scala new file mode 100644 index 0000000..3bc4f36 --- /dev/null +++ b/yamlesque/src/main/scala/yamlValues.scala @@ -0,0 +1,23 @@ +package yamlesque + +sealed trait YamlValue { + def print: String = YamlValue.DefaultPrinter(this) +} +object YamlValue { + val DefaultPrinter = new YamlPrinter(true) +} + +case class YamlMapping(fields: Map[String, YamlValue]) extends YamlValue +object YamlMapping { + def apply(items: (String, YamlValue)*) = new YamlMapping(Map(items: _*)) +} + +case class YamlSequence(items: Vector[YamlValue]) extends YamlValue +object YamlSequence { + def apply(items: YamlValue*) = new YamlSequence(items.toVector) +} + +case class YamlScalar(value: String) extends YamlValue +object YamlScalar { + final val Empty = YamlScalar("") +} diff --git a/yamlesque/src/test/scala/ParserTests.scala b/yamlesque/src/test/scala/ParserTests.scala new file mode 100644 index 0000000..9965bcd --- /dev/null +++ b/yamlesque/src/test/scala/ParserTests.scala @@ -0,0 +1,71 @@ +package yamlesque + +import utest._ + +object ParserTests extends TestSuite { + + val yaml = YamlMapping( + "key1" -> YamlScalar("value1"), + "key2" -> YamlMapping( + "key1" -> YamlScalar("value1"), + "key2" -> YamlScalar("value1"), + "key3" -> YamlSequence( + YamlScalar("a1"), + YamlSequence( + YamlScalar("a1"), + YamlScalar("a2"), + YamlScalar("a3") + ), + YamlScalar("a3"), + YamlMapping( + "a1" -> YamlScalar("b"), + "a2" -> YamlScalar("b"), + "a3" -> YamlScalar("b"), + "a4" -> YamlScalar("b") + ), + YamlScalar("a4"), + YamlScalar("a4") + ), + "key4" -> YamlScalar("value1"), + "key5" -> YamlScalar("value1"), + "key6" -> YamlScalar("value1") + ), + "key3" -> YamlScalar("value3") + ) + + val string = + s"""| + |key1: value1 + |key2: + | key4: value1 + | key5: value1 + | key1: value1 + | key2: value1 + | key6: value1 + | key3: + | - a1 + | - + | - a1 + | - a2 + | - a3 + | - a3 + | - + | a1: b + | a2: b + | a3: b + | a4: b + | - a4 + | - a4 + |key3: value3 + |""".stripMargin + + val tests = Tests{ + "parse" - { + // assert(Parser.parse(string) == yaml) + } + "printandparse" - { + //assert(Parser.parse(yaml.print) == yaml) + } + } + +} -- cgit v1.2.3