diff options
author | Mark Hatton <mark.hatton@shazam.com> | 2015-06-11 19:14:11 +0100 |
---|---|---|
committer | Mark Hatton <mark.hatton@shazam.com> | 2015-06-11 19:14:11 +0100 |
commit | 0c9b2feed5d5d253181a49beb1e806ba9df3bd1b (patch) | |
tree | 4f31eeb3c7b2a70c19259e034499a2ae9cf89a1a /src/main/scala | |
parent | 42757e9f27cc7b3253cdfcc330de209722e3eb63 (diff) | |
download | spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.gz spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.bz2 spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.zip |
= Fix decoding of 4-byte UTF-8 characters into UTF-16 surrogate pairs
Diffstat (limited to 'src/main/scala')
-rw-r--r-- | src/main/scala/spray/json/JsonParser.scala | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala index 855ab21..b1e59d5 100644 --- a/src/main/scala/spray/json/JsonParser.scala +++ b/src/main/scala/spray/json/JsonParser.scala @@ -271,7 +271,7 @@ object ParserInput { */ class ByteArrayBasedParserInput(bytes: Array[Byte]) extends DefaultParserInput { private val byteBuffer = ByteBuffer.allocate(4) - private val charBuffer = CharBuffer.allocate(1) // we currently don't support surrogate pairs! + private val charBuffer = CharBuffer.allocate(2) private val decoder = UTF8.newDecoder() def nextChar() = { _cursor += 1 @@ -289,20 +289,26 @@ object ParserInput { charBuffer.flip() val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar byteBuffer.clear() - charBuffer.clear() + if (!charBuffer.hasRemaining) charBuffer.clear() result } } - _cursor += 1 - if (_cursor < bytes.length) { - val byte = bytes(_cursor) - if (byte >= 0) byte.toChar // 7-Bit ASCII - else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence - else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence - else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence, will probably produce an (unsupported) surrogate pair - else ErrorChar - } else EOI + if (charBuffer.position() > 0) { + val result = charBuffer.get() + charBuffer.clear() + result + } else { + _cursor += 1 + if (_cursor < bytes.length) { + val byte = bytes(_cursor) + if (byte >= 0) byte.toChar // 7-Bit ASCII + else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence + else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence + else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence + else ErrorChar + } else EOI + } } def length = bytes.length def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8) |