summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMark Hatton <mark.hatton@shazam.com>2015-06-11 19:14:11 +0100
committerMark Hatton <mark.hatton@shazam.com>2015-06-11 19:14:11 +0100
commit0c9b2feed5d5d253181a49beb1e806ba9df3bd1b (patch)
tree4f31eeb3c7b2a70c19259e034499a2ae9cf89a1a /src
parent42757e9f27cc7b3253cdfcc330de209722e3eb63 (diff)
downloadspray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.gz
spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.tar.bz2
spray-json-0c9b2feed5d5d253181a49beb1e806ba9df3bd1b.zip
= Fix decoding of 4-byte UTF-8 characters into UTF-16 surrogate pairs
Diffstat (limited to 'src')
-rw-r--r--src/main/scala/spray/json/JsonParser.scala28
-rw-r--r--src/test/scala/spray/json/JsonParserSpec.scala3
2 files changed, 19 insertions, 12 deletions
diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala
index 855ab21..b1e59d5 100644
--- a/src/main/scala/spray/json/JsonParser.scala
+++ b/src/main/scala/spray/json/JsonParser.scala
@@ -271,7 +271,7 @@ object ParserInput {
*/
class ByteArrayBasedParserInput(bytes: Array[Byte]) extends DefaultParserInput {
private val byteBuffer = ByteBuffer.allocate(4)
- private val charBuffer = CharBuffer.allocate(1) // we currently don't support surrogate pairs!
+ private val charBuffer = CharBuffer.allocate(2)
private val decoder = UTF8.newDecoder()
def nextChar() = {
_cursor += 1
@@ -289,20 +289,26 @@ object ParserInput {
charBuffer.flip()
val result = if (coderResult.isUnderflow & charBuffer.hasRemaining) charBuffer.get() else ErrorChar
byteBuffer.clear()
- charBuffer.clear()
+ if (!charBuffer.hasRemaining) charBuffer.clear()
result
}
}
- _cursor += 1
- if (_cursor < bytes.length) {
- val byte = bytes(_cursor)
- if (byte >= 0) byte.toChar // 7-Bit ASCII
- else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence
- else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence
- else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence, will probably produce an (unsupported) surrogate pair
- else ErrorChar
- } else EOI
+ if (charBuffer.position() > 0) {
+ val result = charBuffer.get()
+ charBuffer.clear()
+ result
+ } else {
+ _cursor += 1
+ if (_cursor < bytes.length) {
+ val byte = bytes(_cursor)
+ if (byte >= 0) byte.toChar // 7-Bit ASCII
+ else if ((byte & 0xE0) == 0xC0) decode(byte, 1) // 2-byte UTF-8 sequence
+ else if ((byte & 0xF0) == 0xE0) decode(byte, 2) // 3-byte UTF-8 sequence
+ else if ((byte & 0xF8) == 0xF0) decode(byte, 3) // 4-byte UTF-8 sequence
+ else ErrorChar
+ } else EOI
+ }
}
def length = bytes.length
def sliceString(start: Int, end: Int) = new String(bytes, start, end - start, UTF8)
diff --git a/src/test/scala/spray/json/JsonParserSpec.scala b/src/test/scala/spray/json/JsonParserSpec.scala
index 2bef24c..0f7ae7f 100644
--- a/src/test/scala/spray/json/JsonParserSpec.scala
+++ b/src/test/scala/spray/json/JsonParserSpec.scala
@@ -64,7 +64,8 @@ class JsonParserSpec extends Specification {
val json = JsObject(
"7-bit" -> JsString("This is regular 7-bit ASCII text."),
"2-bytes" -> JsString("2-byte UTF-8 chars like £, æ or Ö"),
- "3-bytes" -> JsString("3-byte UTF-8 chars like ヨ, ᄅ or ᐁ."))
+ "3-bytes" -> JsString("3-byte UTF-8 chars like ヨ, ᄅ or ᐁ."),
+ "4-bytes" -> JsString("4-byte UTF-8 chars like \uD801\uDC37, \uD852\uDF62 or \uD83D\uDE01."))
JsonParser(json.prettyPrint.getBytes("UTF-8")) === json
}
"parse directly from UTF-8 encoded bytes when string starts with a multi-byte character" in {