From f1fe50cba0c07d27f109a727c480b02639d140e0 Mon Sep 17 00:00:00 2001 From: Jakob Odersky Date: Sun, 8 Apr 2018 14:31:06 -0700 Subject: Add tests --- yamlesque/src/main/scala/YamlParser.scala | 259 ++++++++++++++++++++++++++++ yamlesque/src/main/scala/package.scala | 7 + yamlesque/src/main/scala/parser.scala | 263 ----------------------------- yamlesque/src/main/scala/yamlValues.scala | 2 +- yamlesque/src/test/scala/ParserTests.scala | 262 ++++++++++++++++++++++------ 5 files changed, 473 insertions(+), 320 deletions(-) create mode 100644 yamlesque/src/main/scala/YamlParser.scala create mode 100644 yamlesque/src/main/scala/package.scala delete mode 100644 yamlesque/src/main/scala/parser.scala diff --git a/yamlesque/src/main/scala/YamlParser.scala b/yamlesque/src/main/scala/YamlParser.scala new file mode 100644 index 0000000..71849f1 --- /dev/null +++ b/yamlesque/src/main/scala/YamlParser.scala @@ -0,0 +1,259 @@ +package yamlesque + +import annotation.{switch, tailrec} +import scala.collection.mutable.ListBuffer + +object YamlParser extends (Iterator[Char] => YamlValue) { + + sealed trait TokenKind + object TokenKind { + case object EOF extends TokenKind + case object BAD extends TokenKind + case object DOCSTART extends TokenKind + case object DOCEND extends TokenKind + case object MAPPING extends TokenKind + case object ITEM extends TokenKind + case object IDENTIFIER extends TokenKind + case object COMMENT extends TokenKind + } + import TokenKind._ + + case class Token(val kind: TokenKind, value: String = "") { + var line: Int = 0 + var col: Int = 0 + def setPos(line: Int, col: Int): this.type = { + this.col = col + this.line = line + this + } + override def toString() = { + s"($line, $col): " + super.toString + } + } + + object Chars { + final val LF = '\u000A' + final val CR = '\u000D' + final val SU = '\u001A' + + @inline def isSpace(ch: Char): Boolean = ch match { + case ' ' | '\t' => true + case _ => false + } + + @inline def isBlank(ch: Char): Boolean = ch match { + case ' ' | '\t' | CR | LF | SU => true + case _ => false + } + } + + class Scanner(chars: Iterator[Char]) extends Iterator[Token] { + import Chars._ + + private var ch0: Char = 0 + private var ch1: Char = 0 + private var ch2: Char = 0 + private var pos: Long = 0 + private var line: Int = 0 + private var col: Int = 0 + + private def skipChar(): Unit = { + val ch: Char = if (chars.hasNext) { + chars.next() + } else { + SU + } + pos += 1 + col += 1 + ch0 = ch1 + ch1 = ch2 + ch2 = ch + } + private def skipChars(n: Int): Unit = { + var i = 0 + while (i < n) { skipChar(); i += 1 } + } + def init() = { + skipChars(3) + pos = 0 + col = 0 + line = 0 + } + + private var buffer = new StringBuilder() + private def putChar(): Unit = { + buffer.append(ch0) + skipChars(1) + } + private def tokenValue(): String = { + val str = buffer.result() + buffer.clear() + str + } + + private var token: Token = Token(BAD, "not yet initialized") + + @tailrec private def fetchToken(): Unit = { + ch0 match { + case ':' if isBlank(ch1) => + token = Token(MAPPING).setPos(line, col) + skipChars(1) + case '-' if isBlank(ch1) => + token = Token(ITEM).setPos(line, col) + skipChars(1) + case '-' if ch1 == '-' && ch2 == '-' => + token = Token(DOCSTART).setPos(line, col) + skipChars(3) + case '.' if ch1 == '.' && ch2 == '.' => + token = Token(DOCEND).setPos(line, col) + skipChars(3) + case '#' => + val l = line + val c = col + skipChars(1) + while (ch0 != LF && ch0 != SU) { + putChar() + } + token = Token(COMMENT, tokenValue()).setPos(l, c) + buffer.clear() + case c if isSpace(c) => + skipChars(1) + fetchToken() + case LF => + skipChars(1) + col = 0 + line += 1 + fetchToken() + case CR => + skipChars(1) + if (ch0 == LF) { + skipChars(1) + } + col = 0 + line += 1 + fetchToken() + case SU => + token = Token(EOF).setPos(line, col) + skipChars(1) + case _ => fetchScalar() + } + } + + private def fetchScalar(): Unit = { + def finishScalar() = token = Token(IDENTIFIER, tokenValue()) + @tailrec def fetchRest(): Unit = ch0 match { + case ':' if isBlank(ch1) => + finishScalar() + case LF => + finishScalar() + case SU => + finishScalar() + case c => + putChar() + fetchRest() + } + val l = line + val c = col + fetchRest() + token.setPos(l, c) + } + + override def hasNext: Boolean = true + override def next(): Token = { + fetchToken() + token + } + init() + } + + def parse(tokens: Iterator[Token]): YamlValue = { + var token0 = tokens.next() + var token1 = tokens.next() + + def readNext(): Unit = { + token0 = token1 + token1 = tokens.next() + } + + def fatal(message: String, token: Token) = { + val completeMessage = + s"parse error at line ${token.line}, column ${token.col}: $message" + throw new ParseException(completeMessage) + } + + def wrongKind(found: Token, required: TokenKind*) = { + fatal( + s"token kind not allowed at this position\n" + + s" found: ${found.kind}\n" + + s" required: ${required.mkString(" or ")}\n" + + " " * found.col + found.value + "\n" + + " " * found.col + "^", + found + ) + } + + def nextSequence() = { + val startCol = token0.col + val items = new ListBuffer[YamlValue] + while (startCol <= token0.col && token0.kind != EOF) { + token0.kind match { + case ITEM => + readNext() + items += nextBlock(startCol) + case _ => wrongKind(token0, ITEM) + } + } + YamlSequence(items.toVector) + } + + def nextMapping() = { + val startCol = token0.col + val fields = new ListBuffer[(String, YamlValue)] + while (startCol <= token0.col && token0.kind != EOF) { + token0.kind match { + case IDENTIFIER => + val key = token0.value + readNext() + token0.kind match { + case MAPPING => + readNext() + val value = nextBlock(startCol) + fields += key -> value + case _ => wrongKind(token0, MAPPING) + } + + case _ => wrongKind(token0, IDENTIFIER) + } + } + YamlMapping(fields.toMap) + } + + def nextBlock(startCol: Int): YamlValue = { + if (token0.col < startCol) { + YamlScalar.Empty + } else { + token0.kind match { + case IDENTIFIER => + if (token1.kind == MAPPING && token0.line == token1.line) { + nextMapping() + } else { + val y = YamlScalar(token0.value) + readNext() + y + } + case ITEM => + nextSequence() + case EOF => YamlScalar.Empty + case _ => wrongKind(token0, IDENTIFIER, ITEM) + } + } + } + + nextBlock(0) + } + + def apply(data: Iterator[Char]): YamlValue = parse(new Scanner(data)) + +} + +class ParseException(val message: String) extends Exception(message) diff --git a/yamlesque/src/main/scala/package.scala b/yamlesque/src/main/scala/package.scala new file mode 100644 index 0000000..fdb05d0 --- /dev/null +++ b/yamlesque/src/main/scala/package.scala @@ -0,0 +1,7 @@ +package yamlesque + +object `package` { + implicit class RichString(val str: String) extends AnyVal { + def parseYaml: YamlValue = YamlParser(str.toIterator) + } +} diff --git a/yamlesque/src/main/scala/parser.scala b/yamlesque/src/main/scala/parser.scala deleted file mode 100644 index c912330..0000000 --- a/yamlesque/src/main/scala/parser.scala +++ /dev/null @@ -1,263 +0,0 @@ -package yamlesque - -import annotation.{switch, tailrec} -import scala.collection.mutable.ListBuffer - -object Parser { - - sealed trait TokenKind - object TokenKind { - case object EOF extends TokenKind - case object BAD extends TokenKind - case object DOCSTART extends TokenKind - case object DOCEND extends TokenKind - case object MAPPING extends TokenKind - case object ITEM extends TokenKind - case object IDENTIFIER extends TokenKind - case object COMMENT extends TokenKind - } - import TokenKind._ - - case class Token(val kind: TokenKind, value: String = "") { - var line: Int = 0 - var col: Int = 0 - def setPos(line: Int, col: Int): this.type = { - this.col = col - this.line = line - this - } - override def toString() = { - s"($line, $col): " + super.toString - } - } - - object Chars { - final val LF = '\u000A' - final val CR = '\u000D' - final val SU = '\u001A' - - @inline def isSpace(ch: Char): Boolean = ch match { - case ' ' | '\t' => true - case _ => false - } - - @inline def isBlank(ch: Char): Boolean = ch match { - case ' ' | '\t' | CR | LF | SU => true - case _ => false - } - } - - class Scanner(chars: Iterator[Char]) extends Iterator[Token] { - import Chars._ - - private var ch0: Char = 0 - private var ch1: Char = 0 - private var ch2: Char = 0 - private var pos: Long = 0 - private var line: Int = 0 - private var col: Int = 0 - - private def skipChar(): Unit = { - val ch: Char = if (chars.hasNext) { - chars.next() - } else { - SU - } - pos += 1 - col += 1 - ch0 = ch1 - ch1 = ch2 - ch2 = ch - } - private def skipChars(n: Int): Unit = { - var i = 0 - while (i < n) { skipChar(); i += 1 } - } - def init() = { - skipChars(3) - pos = 0 - col = 0 - line = 0 - } - - private var buffer = new StringBuilder() - private def putChar(): Unit = { - buffer.append(ch0) - skipChars(1) - } - private def tokenValue(): String = { - val str = buffer.result() - buffer.clear() - str - } - - private var token: Token = Token(BAD, "not yet initialized") - - @tailrec private def fetchToken(): Unit = { - ch0 match { - case ':' if isBlank(ch1) => - token = Token(MAPPING).setPos(line, col) - skipChars(1) - case '-' if isBlank(ch1) => - token = Token(ITEM).setPos(line, col) - skipChars(1) - case '-' if ch1 == '-' && ch2 == '-' => - token = Token(DOCSTART).setPos(line, col) - skipChars(3) - case '.' if ch1 == '.' && ch2 == '.' => - token = Token(DOCEND).setPos(line, col) - skipChars(3) - case '#' => - val l = line - val c = col - skipChars(1) - while (ch0 != LF && ch0 != SU) { - putChar() - } - token = Token(COMMENT, tokenValue()).setPos(l, c) - buffer.clear() - case c if isSpace(c) => - skipChars(1) - fetchToken() - case LF => - skipChars(1) - col = 0 - line += 1 - fetchToken() - case CR => - skipChars(1) - if (ch0 == LF) { - skipChars(1) - } - col = 0 - line += 1 - fetchToken() - case SU => - token = Token(EOF).setPos(line, col) - skipChars(1) - case _ => fetchScalar() - } - } - - private def fetchScalar(): Unit = { - def finishScalar() = token = Token(IDENTIFIER, tokenValue()) - @tailrec def fetchRest(): Unit = ch0 match { - case ':' if isBlank(ch1) => - finishScalar() - case LF => - finishScalar() - case SU => - finishScalar() - case c => - putChar() - fetchRest() - } - val l = line - val c = col - fetchRest() - token.setPos(l, c) - } - - override def hasNext: Boolean = true - override def next(): Token = { - fetchToken() - token - } - init() - } - - object Parser { - - class ParseException(val message: String) extends Exception(message) - - def run(tokens: Iterator[Token]): YamlValue = { - var token0 = tokens.next() - var token1 = tokens.next() - - def readNext(): Unit = { - token0 = token1 - token1 = tokens.next() - } - - def fatal(message: String, token: Token) = { - val completeMessage = - s"parse error at line ${token.line}, column ${token.col}: $message" - throw new ParseException(completeMessage) - } - - def wrongKind(found: Token, required: TokenKind*) = { - fatal( - s"token kind not allowed at this position\n" + - s" found: ${found.kind}\n" + - s" required: ${required.mkString(" or ")}\n" + - " " * found.col + found.value + "\n" + - " " * found.col + "^", - found - ) - } - - def nextSequence() = { - val startCol = token0.col - val items = new ListBuffer[YamlValue] - while (startCol <= token0.col && token0.kind != EOF) { - token0.kind match { - case ITEM => - readNext() - items += nextBlock(startCol) - case _ => wrongKind(token0, ITEM) - } - } - YamlSequence(items.toVector) - } - - def nextMapping() = { - val startCol = token0.col - val fields = new ListBuffer[(String, YamlValue)] - while (startCol <= token0.col && token0.kind != EOF) { - token0.kind match { - case IDENTIFIER => - val key = token0.value - readNext() - token0.kind match { - case MAPPING => - readNext() - val value = nextBlock(startCol) - fields += key -> value - case _ => wrongKind(token0, MAPPING) - } - - case _ => wrongKind(token0, IDENTIFIER) - } - } - YamlMapping(fields.toMap) - } - - def nextBlock(startCol: Int): YamlValue = { - if (token0.col < startCol) { - YamlScalar.Empty - } else { - token0.kind match { - case IDENTIFIER => - if (token1.kind == MAPPING && token0.line == token1.line) { - nextMapping() - } else { - val y = YamlScalar(token0.value) - readNext() - y - } - case ITEM => - nextSequence() - case EOF => YamlScalar.Empty - case _ => wrongKind(token0, IDENTIFIER, ITEM) - } - } - } - - nextBlock(0) - } - } - - def parse(data: String): YamlValue = { - Parser.run(new Scanner(data.toIterator)) - } -} diff --git a/yamlesque/src/main/scala/yamlValues.scala b/yamlesque/src/main/scala/yamlValues.scala index 3bc4f36..afe8e0b 100644 --- a/yamlesque/src/main/scala/yamlValues.scala +++ b/yamlesque/src/main/scala/yamlValues.scala @@ -4,7 +4,7 @@ sealed trait YamlValue { def print: String = YamlValue.DefaultPrinter(this) } object YamlValue { - val DefaultPrinter = new YamlPrinter(true) + val DefaultPrinter = new YamlPrinter(compact = true) } case class YamlMapping(fields: Map[String, YamlValue]) extends YamlValue diff --git a/yamlesque/src/test/scala/ParserTests.scala b/yamlesque/src/test/scala/ParserTests.scala index 9965bcd..b93b159 100644 --- a/yamlesque/src/test/scala/ParserTests.scala +++ b/yamlesque/src/test/scala/ParserTests.scala @@ -4,67 +4,217 @@ import utest._ object ParserTests extends TestSuite { - val yaml = YamlMapping( - "key1" -> YamlScalar("value1"), - "key2" -> YamlMapping( - "key1" -> YamlScalar("value1"), - "key2" -> YamlScalar("value1"), - "key3" -> YamlSequence( - YamlScalar("a1"), + val tests = Tests { + "parse empty string" - { + "".parseYaml ==> YamlScalar.Empty + } + "parse simple scalar" - { + "hello".parseYaml ==> YamlScalar("hello") + } + "parse scalar with space" - { + "hello world".parseYaml ==> YamlScalar("hello world") + } + "parse scalar with a colon" - { + "hello:world".parseYaml ==> YamlScalar("hello:world") + } + "parse scalar with a minus" - { + "hello-world".parseYaml ==> YamlScalar("hello-world") + } + "parse scalar starting with a colon" - { + ":hello world".parseYaml ==> YamlScalar(":hello world") + } + "parse scalar starting with a minus" - { + "-hello world".parseYaml ==> YamlScalar("-hello world") + } + "parse empty list" - { + "-".parseYaml ==> YamlSequence(YamlScalar.Empty) + } + "parse a simple list" - { + "-\n a\n-\n b\n-\n c".parseYaml ==> YamlSequence(YamlScalar("a"), + YamlScalar("b"), + YamlScalar("c")) + } + "parse a simple compact list" - { + "- a\n- b\n - c".parseYaml ==> YamlSequence(YamlScalar("a"), + YamlScalar("b"), + YamlScalar("c")) + } + "fail to parse a list with a non-item token" - { + val e = intercept[ParseException] { + "- a\n- b\n -c".parseYaml // -c is missing a space between '-' and 'c' + } + assert(e.message.contains("token kind")) + } + "parse a nested list" - { + val ls = + s"""|- a0 + |- b0 + |- + | - a1 + | - b1 + | - + | - a2 + | - b2 + |- c0 + |- - a1 + | - b1 + |- - - - a4 + |""".stripMargin + val result = YamlSequence( + YamlScalar("a0"), + YamlScalar("b0"), YamlSequence( YamlScalar("a1"), - YamlScalar("a2"), - YamlScalar("a3") + YamlScalar("b1"), + YamlSequence( + YamlScalar("a2"), + YamlScalar("b2") + ) ), - YamlScalar("a3"), - YamlMapping( - "a1" -> YamlScalar("b"), - "a2" -> YamlScalar("b"), - "a3" -> YamlScalar("b"), - "a4" -> YamlScalar("b") + YamlScalar("c0"), + YamlSequence( + YamlScalar("a1"), + YamlScalar("b1") ), - YamlScalar("a4"), - YamlScalar("a4") - ), - "key4" -> YamlScalar("value1"), - "key5" -> YamlScalar("value1"), - "key6" -> YamlScalar("value1") - ), - "key3" -> YamlScalar("value3") - ) - - val string = - s"""| - |key1: value1 - |key2: - | key4: value1 - | key5: value1 - | key1: value1 - | key2: value1 - | key6: value1 - | key3: - | - a1 - | - - | - a1 - | - a2 - | - a3 - | - a3 - | - - | a1: b - | a2: b - | a3: b - | a4: b - | - a4 - | - a4 - |key3: value3 - |""".stripMargin - - val tests = Tests{ - "parse" - { - // assert(Parser.parse(string) == yaml) + YamlSequence( + YamlSequence( + YamlSequence( + YamlScalar("a4") + ) + ) + ) + ) + ls.parseYaml ==> result + } + "parse a simple mapping" - { + "a:\n b".parseYaml ==> YamlMapping("a" -> YamlScalar("b")) + } + "parse a double mapping" - { + "a:\n b\nc:\n d".parseYaml ==> YamlMapping( + "a" -> YamlScalar("b"), + "c" -> YamlScalar("d") + ) + } + "parse a simple compact mapping" - { + "a: b".parseYaml ==> YamlMapping("a" -> YamlScalar("b")) + } + "parse a double compact mapping" - { + "a: b\nc: d".parseYaml ==> YamlMapping( + "a" -> YamlScalar("b"), + "c" -> YamlScalar("d") + ) + } + "parse a simple mapping without a value" - { + "a:\n".parseYaml ==> YamlMapping( + "a" -> YamlScalar("") + ) } - "printandparse" - { - //assert(Parser.parse(yaml.print) == yaml) + "parse a mapping without a value" - { + "k1: v1\nk2:\nk3: v3".parseYaml ==> YamlMapping( + "k1" -> YamlScalar("v1"), + "k2" -> YamlScalar.Empty, + "k3" -> YamlScalar("v3") + ) + } + "parse a nested mapping" - { + val m = + s"""|k1: + | k11: a + | k12: b + |k2: + | k21: + | k31: + | k41: a + | k22: + | b + |k3: a + |k4: k41: k42: k43: a + |""".stripMargin + m.parseYaml ==> YamlMapping( + "k1" -> YamlMapping( + "k11" -> YamlScalar("a"), + "k12" -> YamlScalar("b") + ), + "k2" -> YamlMapping( + "k21" -> YamlMapping( + "k31" -> YamlMapping( + "k41" -> YamlScalar("a") + ) + ), + "k22" -> YamlScalar("b") + ), + "k3" -> YamlScalar("a"), + "k4" -> YamlMapping( + "k41" -> YamlMapping( + "k42" -> YamlMapping( + "k43" -> YamlScalar("a") + ) + ) + ) + ) + } + "maps and sequences" - { + val yaml = YamlMapping( + "key1" -> YamlScalar("value1"), + "key2" -> YamlMapping( + "key1" -> YamlScalar("value1"), + "key2" -> YamlScalar("value1"), + "key3" -> YamlSequence( + YamlScalar("a1"), + YamlSequence( + YamlScalar("a1"), + YamlScalar("a2"), + YamlScalar("a3") + ), + YamlScalar("a3"), + YamlMapping( + "a1" -> YamlScalar("b"), + "a2" -> YamlScalar("b"), + "a3" -> YamlScalar("b"), + "a4" -> YamlScalar("b") + ), + YamlScalar("a4"), + YamlScalar("a4") + ), + "key4" -> YamlScalar("value1"), + "key5" -> YamlScalar("value1"), + "key6" -> YamlScalar("value1") + ), + "key3" -> YamlScalar("value3") + ) + + val string = + s"""| + |key1: value1 + |key2: + | key4: + | value1 + | key5: value1 + | key1: value1 + | key2: value1 + | key6: value1 + | key3: + | - a1 + | - + | - a1 + | - a2 + | - a3 + | - a3 + | - + | a1: b + | a2: b + | a3: b + | a4: b + | - a4 + | - a4 + |key3: value3 + |""".stripMargin + "parse" - { + string.parseYaml ==> yaml + } + "print and parse" - { + yaml.print.parseYaml ==> yaml + } } } -- cgit v1.2.3