summaryrefslogtreecommitdiff
path: root/src/main
diff options
context:
space:
mode:
authorMark Hatton <mark.hatton@shazam.com>2015-06-11 19:14:11 +0100
committerMark Hatton <mark.hatton@shazam.com>2015-06-11 19:14:11 +0100
commit0c9b2feed5d5d253181a49beb1e806ba9df3bd1b (patch)
tree4f31eeb3c7b2a70c19259e034499a2ae9cf89a1a /src/main
parent42757e9f27cc7b3253cdfcc330de209722e3eb63 (diff)
downloadspray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.gz
spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.bz2
spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.zip
= Fix decoding of 4-byte UTF-8 characters into UTF-16 surrogate pairs
Diffstat (limited to 'src/main')
-rw-r--r--src/main/scala/spray/json/JsonParser.scala28
1 files changed, 17 insertions, 11 deletions
diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala
index 855ab21..b1e59d5 100644
--- a/src/main/scala/spray/json/JsonParser.scala
+++ b/src/main/scala/spray/json/JsonParser.scala
@@ -271,7 +271,7 @@ object ParserInput {
*/
class ByteArrayBasedParserInput(bytes: Array[Byte]) extends DefaultParserInput {
private val byteBuffer = ByteBuffer.allocate(4)
- private val charBuffer = CharBuffer.allocate(1) // we currently don't support surrogate pairs!
+ private val charBuffer = CharBuffer.allocate(2)
private val decoder = UTF8.newDecoder()
def nextChar() = {
_cursor += 1
@@ -289,20 +289,26 @@ object ParserInput {
charBuffer.flip()
val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar
byteBuffer.clear()
- charBuffer.clear()
+ if (!charBuffer.hasRemaining) charBuffer.clear()
result
}
}
- _cursor += 1
- if (_cursor < bytes.length) {
- val byte = bytes(_cursor)
- if (byte >= 0) byte.toChar // 7-Bit ASCII
- else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence
- else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence
- else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence, will probably produce an (unsupported) surrogate pair
- else ErrorChar
- } else EOI
+ if (charBuffer.position() > 0) {
+ val result = charBuffer.get()
+ charBuffer.clear()
+ result
+ } else {
+ _cursor += 1
+ if (_cursor < bytes.length) {
+ val byte = bytes(_cursor)
+ if (byte >= 0) byte.toChar // 7-Bit ASCII
+ else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence
+ else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence
+ else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence
+ else ErrorChar
+ } else EOI
+ }
}
def length = bytes.length
def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8)