diff options
author | Mathias <mathias@decodified.com> | 2015-06-16 14:15:09 +0200 |
---|---|---|
committer | Mathias <mathias@decodified.com> | 2015-06-16 14:15:09 +0200 |
commit | f185c5fb1d53f5399dbbb7229d7f7ae02971be20 (patch) | |
tree | 4f31eeb3c7b2a70c19259e034499a2ae9cf89a1a /src/main/scala | |
parent | 42757e9f27cc7b3253cdfcc330de209722e3eb63 (diff) | |
parent | 0c9b2feed5d5d253181a49beb1e806ba9df3bd1b (diff) | |
download | spray-json-f185c5fb1d53f5399dbbb7229d7f7ae02971be20.tar.gz spray-json-f185c5fb1d53f5399dbbb7229d7f7ae02971be20.tar.bz2 spray-json-f185c5fb1d53f5399dbbb7229d7f7ae02971be20.zip |
Merge pull request #157 from markhatton/master
Fix decoding of 4-byte UTF-8 characters into UTF-16 surrogate pairs
Diffstat (limited to 'src/main/scala')
-rw-r--r-- | src/main/scala/spray/json/JsonParser.scala | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala index 855ab21..b1e59d5 100644 --- a/src/main/scala/spray/json/JsonParser.scala +++ b/src/main/scala/spray/json/JsonParser.scala @@ -271,7 +271,7 @@ object ParserInput { */ class ByteArrayBasedParserInput(bytes: Array[Byte]) extends DefaultParserInput { private val byteBuffer = ByteBuffer.allocate(4) - private val charBuffer = CharBuffer.allocate(1) // we currently don't support surrogate pairs! + private val charBuffer = CharBuffer.allocate(2) private val decoder = UTF8.newDecoder() def nextChar() = { _cursor += 1 @@ -289,20 +289,26 @@ object ParserInput { charBuffer.flip() val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar byteBuffer.clear() - charBuffer.clear() + if (!charBuffer.hasRemaining) charBuffer.clear() result } } - _cursor += 1 - if (_cursor < bytes.length) { - val byte = bytes(_cursor) - if (byte >= 0) byte.toChar // 7-Bit ASCII - else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence - else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence - else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence, will probably produce an (unsupported) surrogate pair - else ErrorChar - } else EOI + if (charBuffer.position() > 0) { + val result = charBuffer.get() + charBuffer.clear() + result + } else { + _cursor += 1 + if (_cursor < bytes.length) { + val byte = bytes(_cursor) + if (byte >= 0) byte.toChar // 7-Bit ASCII + else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence + else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence + else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence + else ErrorChar + } else EOI + } } def length = bytes.length def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8) |