summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorBurak Emir <emir@epfl.ch>2007-07-10 15:22:37 +0000
committerBurak Emir <emir@epfl.ch>2007-07-10 15:22:37 +0000
commit9f8daa47ffdddaab5afec04905fb68aed55942e4 (patch)
treea0de82d064bde54e53bd2761eb67cb166350f7d6 /src/library
parent212f89bcc6a31a2facb338e5dfb02d7b61c26891 (diff)
downloadscala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.gz
scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.bz2
scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.zip
added UTF8Codec.encode(ch:Int):Array[Byte]
use it in json lexer and xml character-reference conversion, for java1.4
Diffstat (limited to 'src/library')
-rw-r--r--src/library/scala/io/UTF8Codec.scala36
-rw-r--r--src/library/scala/util/parsing/json/Lexer.scala8
-rw-r--r--src/library/scala/xml/Utility.scala2
3 files changed, 41 insertions, 5 deletions
diff --git a/src/library/scala/io/UTF8Codec.scala b/src/library/scala/io/UTF8Codec.scala
index a1d688cb1e..2c2218766b 100644
--- a/src/library/scala/io/UTF8Codec.scala
+++ b/src/library/scala/io/UTF8Codec.scala
@@ -17,6 +17,42 @@ package scala.io
*/
object UTF8Codec {
+ final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD
+ /** convert a codepoint to utf-8 bytes
+ * @author buraq
+ * @param ch codepoint
+ */
+ def encode(ch1: Int): Array[Byte] = {
+ var ch = ch1
+ val byteMask = 0xBF;
+ val byteMark = 0x80;
+ var bytesToWrite = 0
+ val firstByteMark = List[Byte](0x00.asInstanceOf[Byte], 0x00.asInstanceOf[Byte], 0xC0.asInstanceOf[Byte], 0xE0.asInstanceOf[Byte], 0xF0.asInstanceOf[Byte], 0xF8.asInstanceOf[Byte], 0xFC.asInstanceOf[Byte])
+
+ if (ch < 0x80) { bytesToWrite = 1 }
+ else if (ch < 0x800) { bytesToWrite = 2 }
+ else if (ch < 0x10000) { bytesToWrite = 3 }
+ else if (ch <= 0x0010FFFF) { bytesToWrite = 4 }
+ else return encode(UNI_REPLACEMENT_CHAR)
+
+ val res = new Array[Byte](bytesToWrite)
+
+ var bw = bytesToWrite
+ if(bw>=4) {
+ res(3) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+ }
+ if(bw>=3) {
+ res(2) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+ }
+ if(bw>=2) {
+ res(1) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+ }
+ if(bw>=1) {
+ res(0) = (ch | firstByteMark(bytesToWrite)).asInstanceOf[Byte]
+ }
+ return res
+ }
+
def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = {
var i = from
var j = to
diff --git a/src/library/scala/util/parsing/json/Lexer.scala b/src/library/scala/util/parsing/json/Lexer.scala
index a9badbec83..c709b37900 100644
--- a/src/library/scala/util/parsing/json/Lexer.scala
+++ b/src/library/scala/util/parsing/json/Lexer.scala
@@ -75,10 +75,10 @@ class Lexer extends StdLexical with ImplicitConversions {
val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray
def hexDigit = elem("hex digit", hexDigits.contains(_))
- def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ {
- case a ~ b ~ c ~ d =>
- new String(Character.toChars(Integer.parseInt(List(a,b,c,d).mkString(""),16)))
- }
+ def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ {
+ case a ~ b ~ c ~ d =>
+ new String(io.UTF8Codec.encode(Integer.parseInt(List(a,b,c,d).mkString(""),16)))
+ }
private def lift[T](f: String => T)(xs: List[Any]): T = f(xs.mkString(""))
}
diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala
index fc7c024eca..53a974bca1 100644
--- a/src/library/scala/xml/Utility.scala
+++ b/src/library/scala/xml/Utility.scala
@@ -476,7 +476,7 @@ object Utility extends AnyRef with parsing.TokenTests {
}
nextch()
}
- i.asInstanceOf[Char].toString()
+ new String(io.UTF8Codec.encode(i))
}
}