From 9f8daa47ffdddaab5afec04905fb68aed55942e4 Mon Sep 17 00:00:00 2001 From: Burak Emir Date: Tue, 10 Jul 2007 15:22:37 +0000 Subject: added UTF8Codec.encode(ch:Int):Array[Byte] use it in json lexer and xml character-reference conversion, for java1.4 --- src/library/scala/io/UTF8Codec.scala | 36 +++++++++++++++++++++++++ src/library/scala/util/parsing/json/Lexer.scala | 8 +++--- src/library/scala/xml/Utility.scala | 2 +- 3 files changed, 41 insertions(+), 5 deletions(-) (limited to 'src/library') diff --git a/src/library/scala/io/UTF8Codec.scala b/src/library/scala/io/UTF8Codec.scala index a1d688cb1e..2c2218766b 100644 --- a/src/library/scala/io/UTF8Codec.scala +++ b/src/library/scala/io/UTF8Codec.scala @@ -17,6 +17,42 @@ package scala.io */ object UTF8Codec { + final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD + /** convert a codepoint to utf-8 bytes + * @author buraq + * @param ch codepoint + */ + def encode(ch1: Int): Array[Byte] = { + var ch = ch1 + val byteMask = 0xBF; + val byteMark = 0x80; + var bytesToWrite = 0 + val firstByteMark = List[Byte](0x00.asInstanceOf[Byte], 0x00.asInstanceOf[Byte], 0xC0.asInstanceOf[Byte], 0xE0.asInstanceOf[Byte], 0xF0.asInstanceOf[Byte], 0xF8.asInstanceOf[Byte], 0xFC.asInstanceOf[Byte]) + + if (ch < 0x80) { bytesToWrite = 1 } + else if (ch < 0x800) { bytesToWrite = 2 } + else if (ch < 0x10000) { bytesToWrite = 3 } + else if (ch <= 0x0010FFFF) { bytesToWrite = 4 } + else return encode(UNI_REPLACEMENT_CHAR) + + val res = new Array[Byte](bytesToWrite) + + var bw = bytesToWrite + if(bw>=4) { + res(3) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1 + } + if(bw>=3) { + res(2) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1 + } + if(bw>=2) { + res(1) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1 + } + if(bw>=1) { + res(0) = (ch | firstByteMark(bytesToWrite)).asInstanceOf[Byte] + } + return res + } + def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = { var i = from var j = to diff --git a/src/library/scala/util/parsing/json/Lexer.scala b/src/library/scala/util/parsing/json/Lexer.scala index a9badbec83..c709b37900 100644 --- a/src/library/scala/util/parsing/json/Lexer.scala +++ b/src/library/scala/util/parsing/json/Lexer.scala @@ -75,10 +75,10 @@ class Lexer extends StdLexical with ImplicitConversions { val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray def hexDigit = elem("hex digit", hexDigits.contains(_)) - def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { - case a ~ b ~ c ~ d => - new String(Character.toChars(Integer.parseInt(List(a,b,c,d).mkString(""),16))) - } + def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { + case a ~ b ~ c ~ d => + new String(io.UTF8Codec.encode(Integer.parseInt(List(a,b,c,d).mkString(""),16))) + } private def lift[T](f: String => T)(xs: List[Any]): T = f(xs.mkString("")) } diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala index fc7c024eca..53a974bca1 100644 --- a/src/library/scala/xml/Utility.scala +++ b/src/library/scala/xml/Utility.scala @@ -476,7 +476,7 @@ object Utility extends AnyRef with parsing.TokenTests { } nextch() } - i.asInstanceOf[Char].toString() + new String(io.UTF8Codec.encode(i)) } } -- cgit v1.2.3