diff options
author | Mark Hatton <mark.hatton@shazam.com> | 2015-06-11 19:14:11 +0100 |
---|---|---|
committer | Mark Hatton <mark.hatton@shazam.com> | 2015-06-11 19:14:11 +0100 |
commit | 0c9b2feed5d5d253181a49beb1e806ba9df3bd1b (patch) | |
tree | 4f31eeb3c7b2a70c19259e034499a2ae9cf89a1a /src | |
parent | 42757e9f27cc7b3253cdfcc330de209722e3eb63 (diff) | |
download | spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.gz spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.bz2 spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.zip |
= Fix decoding of 4-byte UTF-8 characters into UTF-16 surrogate pairs
Diffstat (limited to 'src')
-rw-r--r-- | src/main/scala/spray/json/JsonParser.scala | 28 | ||||
-rw-r--r-- | src/test/scala/spray/json/JsonParserSpec.scala | 3 |
2 files changed, 19 insertions, 12 deletions
diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala index 855ab21..b1e59d5 100644 --- a/src/main/scala/spray/json/JsonParser.scala +++ b/src/main/scala/spray/json/JsonParser.scala @@ -271,7 +271,7 @@ object ParserInput { */ class ByteArrayBasedParserInput(bytes: Array[Byte]) extends DefaultParserInput { private val byteBuffer = ByteBuffer.allocate(4) - private val charBuffer = CharBuffer.allocate(1) // we currently don't support surrogate pairs! + private val charBuffer = CharBuffer.allocate(2) private val decoder = UTF8.newDecoder() def nextChar() = { _cursor += 1 @@ -289,20 +289,26 @@ object ParserInput { charBuffer.flip() val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar byteBuffer.clear() - charBuffer.clear() + if (!charBuffer.hasRemaining) charBuffer.clear() result } } - _cursor += 1 - if (_cursor < bytes.length) { - val byte = bytes(_cursor) - if (byte >= 0) byte.toChar // 7-Bit ASCII - else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence - else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence - else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence, will probably produce an (unsupported) surrogate pair - else ErrorChar - } else EOI + if (charBuffer.position() > 0) { + val result = charBuffer.get() + charBuffer.clear() + result + } else { + _cursor += 1 + if (_cursor < bytes.length) { + val byte = bytes(_cursor) + if (byte >= 0) byte.toChar // 7-Bit ASCII + else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence + else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence + else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence + else ErrorChar + } else EOI + } } def length = bytes.length def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8) diff --git a/src/test/scala/spray/json/JsonParserSpec.scala b/src/test/scala/spray/json/JsonParserSpec.scala index 2bef24c..0f7ae7f 100644 --- a/src/test/scala/spray/json/JsonParserSpec.scala +++ b/src/test/scala/spray/json/JsonParserSpec.scala @@ -64,7 +64,8 @@ class JsonParserSpec extends Specification { val json = JsObject( "7-bit" -> JsString("This is regular 7-bit ASCII text."), "2-bytes" -> JsString("2-byte UTF-8 chars like £, æ or Ö"), - "3-bytes" -> JsString("3-byte UTF-8 chars like ヨ, ᄅ or ᐁ.")) + "3-bytes" -> JsString("3-byte UTF-8 chars like ヨ, ᄅ or ᐁ."), + "4-bytes" -> JsString("4-byte UTF-8 chars like \uD801\uDC37, \uD852\uDF62 or \uD83D\uDE01.")) JsonParser(json.prettyPrint.getBytes("UTF-8")) === json } "parse directly from UTF-8 encoded bytes when string starts with a multi-byte character" in { |