/* * Copyright (C) 2014 Mathias Doenitz * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package spray.json import scala.annotation.{switch, tailrec} import java.lang.{StringBuilder => JStringBuilder} import java.nio.{ByteBuffer, CharBuffer} import java.nio.charset.Charset import scala.collection.immutable.TreeMap /** * Fast, no-dependency parser for JSON as defined by http://tools.ietf.org/html/rfc4627. */ object JsonParser { def apply(input: ParserInput): JsValue = new JsonParser(input).parseJsValue() def apply(input: ParserInput, settings: JsonParserSettings): JsValue = new JsonParser(input, settings).parseJsValue() class ParsingException(val summary: String, val detail: String = "") extends RuntimeException(if (summary.isEmpty) detail else if (detail.isEmpty) summary else summary + ":" + detail) } class JsonParser(input: ParserInput, settings: JsonParserSettings = JsonParserSettings.default) { def this(input: ParserInput) = this(input, JsonParserSettings.default) import JsonParser.ParsingException private[this] val sb = new JStringBuilder private[this] var cursorChar: Char = input.nextChar() private[this] var jsValue: JsValue = _ def parseJsValue(): JsValue = parseJsValue(false) def parseJsValue(allowTrailingInput: Boolean): JsValue = { ws() `value`(settings.maxDepth) if (!allowTrailingInput) require(EOI) jsValue } ////////////////////// GRAMMAR //////////////////////// private final val EOI = '\uFFFF' // compile-time constant // http://tools.ietf.org/html/rfc4627#section-2.1 private def `value`(remainingNesting: Int): Unit = if (remainingNesting == 0) throw new ParsingException( "JSON input nested too deeply", s"JSON input was nested more deeply than the configured limit of maxNesting = ${settings.maxDepth}" ) else { val mark = input.cursor def simpleValue(matched: Boolean, value: JsValue) = if (matched) jsValue = value else fail("JSON Value", mark) (cursorChar: @switch) match { case 'f' => simpleValue(`false`(), JsFalse) case 'n' => simpleValue(`null`(), JsNull) case 't' => simpleValue(`true`(), JsTrue) case '{' => advance(); `object`(remainingNesting) case '[' => advance(); `array`(remainingNesting) case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '-' => `number`() case '"' => `string`(); jsValue = if (sb.length == 0) JsString.empty else JsString(sb.toString) case _ => fail("JSON Value") } } private def `false`() = advance() && ch('a') && ch('l') && ch('s') && ws('e') private def `null`() = advance() && ch('u') && ch('l') && ws('l') private def `true`() = advance() && ch('r') && ch('u') && ws('e') // http://tools.ietf.org/html/rfc4627#section-2.2 private def `object`(remainingNesting: Int): Unit = { ws() jsValue = if (cursorChar != '}') { @tailrec def members(map: Map[String, JsValue]): Map[String, JsValue] = { `string`() require(':') ws() val key = sb.toString `value`(remainingNesting - 1) val nextMap = map.updated(key, jsValue) if (ws(',')) members(nextMap) else nextMap } val map = members(TreeMap.empty[String, JsValue]) require('}') JsObject(map) } else { advance() JsObject.empty } ws() } // http://tools.ietf.org/html/rfc4627#section-2.3 private def `array`(remainingNesting: Int): Unit = { ws() jsValue = if (cursorChar != ']') { val list = Vector.newBuilder[JsValue] @tailrec def values(): Unit = { `value`(remainingNesting - 1) list += jsValue if (ws(',')) values() } values() require(']') JsArray(list.result()) } else { advance() JsArray.empty } ws() } // http://tools.ietf.org/html/rfc4627#section-2.4 private def `number`() = { val start = input.cursor val startChar = cursorChar ch('-') `int`() `frac`() `exp`() val numberLength = input.cursor - start jsValue = if (startChar == '0' && input.cursor - start == 1) JsNumber.zero else if (numberLength <= settings.maxNumberCharacters) JsNumber(input.sliceCharArray(start, input.cursor)) else { val numberSnippet = new String(input.sliceCharArray(start, math.min(input.cursor, start + 20))) throw new ParsingException("Number too long", s"The number starting with '$numberSnippet' had " + s"$numberLength characters which is more than the allowed limit maxNumberCharacters = ${settings.maxNumberCharacters}. If this is legit input " + s"consider increasing the limit." ) } ws() } private def `int`(): Unit = if (!ch('0')) oneOrMoreDigits() private def `frac`(): Unit = if (ch('.')) oneOrMoreDigits() private def `exp`(): Unit = if (ch('e') || ch('E')) { ch('-') || ch('+'); oneOrMoreDigits() } private def oneOrMoreDigits(): Unit = if (DIGIT()) zeroOrMoreDigits() else fail("DIGIT") @tailrec private def zeroOrMoreDigits(): Unit = if (DIGIT()) zeroOrMoreDigits() private def DIGIT(): Boolean = cursorChar >= '0' && cursorChar <= '9' && advance() // http://tools.ietf.org/html/rfc4627#section-2.5 private def `string`(): Unit = { if (cursorChar == '"') cursorChar = input.nextUtf8Char() else fail("'\"'") sb.setLength(0) while (`char`()) cursorChar = input.nextUtf8Char() require('"') ws() } private def `char`() = // simple bloom-filter that quick-matches the most frequent case of characters that are ok to append // (it doesn't match control chars, EOI, '"', '?', '\', 'b' and certain higher, non-ASCII chars) if (((1L << cursorChar) & ((31 - cursorChar) >> 31) & 0x7ffffffbefffffffL) != 0L) appendSB(cursorChar) else cursorChar match { case '"' | EOI => false case '\\' => advance(); `escaped`() case c => (c >= ' ') && appendSB(c) } private def `escaped`() = { def hexValue(c: Char): Int = if ('0' <= c && c <= '9') c - '0' else if ('a' <= c && c <= 'f') c - 87 else if ('A' <= c && c <= 'F') c - 55 else fail("hex digit") def unicode() = { var value = hexValue(cursorChar) advance() value = (value << 4) + hexValue(cursorChar) advance() value = (value << 4) + hexValue(cursorChar) advance() value = (value << 4) + hexValue(cursorChar) appendSB(value.toChar) } (cursorChar: @switch) match { case '"' | '/' | '\\' => appendSB(cursorChar) case 'b' => appendSB('\b') case 'f' => appendSB('\f') case 'n' => appendSB('\n') case 'r' => appendSB('\r') case 't' => appendSB('\t') case 'u' => advance(); unicode() case _ => fail("JSON escape sequence") } } @tailrec private def ws(): Unit = // fast test whether cursorChar is one of " \n\r\t" if (((1L << cursorChar) & ((cursorChar - 64) >> 31) & 0x100002600L) != 0L) { advance(); ws() } ////////////////////////// HELPERS ////////////////////////// private def ch(c: Char): Boolean = if (cursorChar == c) { advance(); true } else false private def ws(c: Char): Boolean = if (ch(c)) { ws(); true } else false private def advance(): Boolean = { cursorChar = input.nextChar(); true } private def appendSB(c: Char): Boolean = { sb.append(c); true } private def require(c: Char): Unit = if (!ch(c)) fail(s"'$c'") private def fail(target: String, cursor: Int = input.cursor, errorChar: Char = cursorChar): Nothing = { val ParserInput.Line(lineNr, col, text) = input.getLine(cursor) val summary = { val unexpected = if (errorChar != EOI) { val c = if (Character.isISOControl(errorChar)) "\\u%04x" format errorChar.toInt else errorChar.toString s"character '$c'" } else "end-of-input" val expected = if (target != "'\uFFFF'") target else "end-of-input" s"Unexpected $unexpected at input index $cursor (line $lineNr, position $col), expected $expected" } val detail = { val sanitizedText = text.map(c ⇒ if (Character.isISOControl(c)) '?' else c) s"\n$sanitizedText\n${" " * (col-1)}^\n" } throw new ParsingException(summary, detail) } } trait ParserInput { /** * Advance the cursor and get the next char. * Since the char is required to be a 7-Bit ASCII char no decoding is required. */ def nextChar(): Char /** * Advance the cursor and get the next char, which could potentially be outside * of the 7-Bit ASCII range. Therefore decoding might be required. */ def nextUtf8Char(): Char def cursor: Int def length: Int def sliceString(start: Int, end: Int): String def sliceCharArray(start: Int, end: Int): Array[Char] def getLine(index: Int): ParserInput.Line } object ParserInput { private final val EOI = '\uFFFF' // compile-time constant private final val ErrorChar = '\uFFFD' // compile-time constant, universal UTF-8 replacement character '�' implicit def apply(string: String): StringBasedParserInput = new StringBasedParserInput(string) implicit def apply(chars: Array[Char]): CharArrayBasedParserInput = new CharArrayBasedParserInput(chars) implicit def apply(bytes: Array[Byte]): ByteArrayBasedParserInput = new ByteArrayBasedParserInput(bytes) case class Line(lineNr: Int, column: Int, text: String) abstract class DefaultParserInput extends ParserInput { protected var _cursor: Int = -1 def cursor = _cursor def getLine(index: Int): Line = { val sb = new java.lang.StringBuilder @tailrec def rec(ix: Int, lineStartIx: Int, lineNr: Int): Line = nextUtf8Char() match { case '\n' if index > ix => sb.setLength(0); rec(ix + 1, ix + 1, lineNr + 1) case '\n' | EOI => Line(lineNr, index - lineStartIx + 1, sb.toString) case c => sb.append(c); rec(ix + 1, lineStartIx, lineNr) } val savedCursor = _cursor _cursor = -1 val line = rec(ix = 0, lineStartIx = 0, lineNr = 1) _cursor = savedCursor line } } private val UTF8 = Charset.forName("UTF-8") /** * ParserInput that allows to create a ParserInput from any randomly accessible indexed byte storage. */ abstract class IndexedBytesParserInput extends DefaultParserInput { def length: Int protected def byteAt(offset: Int): Byte private val byteBuffer = ByteBuffer.allocate(4) private val charBuffer = CharBuffer.allocate(2) private val decoder = UTF8.newDecoder() def nextChar() = { _cursor += 1 if (_cursor < length) (byteAt(_cursor) & 0xFF).toChar else EOI } def nextUtf8Char() = { @tailrec def decode(byte: Byte, remainingBytes: Int): Char = { byteBuffer.put(byte) if (remainingBytes > 0) { _cursor += 1 if (_cursor < length) decode(byteAt(_cursor), remainingBytes - 1) else ErrorChar } else { byteBuffer.flip() val coderResult = decoder.decode(byteBuffer, charBuffer, false) charBuffer.flip() val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar byteBuffer.clear() if (!charBuffer.hasRemaining) charBuffer.clear() result } } if (charBuffer.position() > 0) { val result = charBuffer.get() charBuffer.clear() result } else { _cursor += 1 if (_cursor < length) { val byte = byteAt(_cursor) if (byte >= 0) byte.toChar // 7-Bit ASCII else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence else ErrorChar } else EOI } } } /** * ParserInput reading directly off a byte array which is assumed to contain the UTF-8 encoded representation * of the JSON input, without requiring a separate decoding step. */ class ByteArrayBasedParserInput(bytes: Array[Byte]) extends IndexedBytesParserInput { protected def byteAt(offset: Int): Byte = bytes(offset) def length: Int = bytes.length def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8) def sliceCharArray(start: Int, end: Int) = UTF8.decode(ByteBuffer.wrap(java.util.Arrays.copyOfRange(bytes, start, end))).array() } class StringBasedParserInput(string: String) extends DefaultParserInput { def nextChar(): Char = { _cursor += 1 if (_cursor < string.length) string.charAt(_cursor) else EOI } def nextUtf8Char() = nextChar() def length = string.length def sliceString(start: Int, end: Int) = string.substring(start, end) def sliceCharArray(start: Int, end: Int) = { val chars = new Array[Char](end - start) string.getChars(start, end, chars, 0) chars } } class CharArrayBasedParserInput(chars: Array[Char]) extends DefaultParserInput { def nextChar(): Char = { _cursor += 1 if (_cursor < chars.length) chars(_cursor) else EOI } def nextUtf8Char() = nextChar() def length = chars.length def sliceString(start: Int, end: Int) = new String(chars, start, end - start) def sliceCharArray(start: Int, end: Int) = java.util.Arrays.copyOfRange(chars, start, end) } }