added UTF8Codec.encode(ch:Int):Array[Byte]

use it in json lexer and xml character-reference conversion, for java1.4
author: Burak Emir <emir@epfl.ch> 2007-07-10 15:22:37 +0000
committer: Burak Emir <emir@epfl.ch> 2007-07-10 15:22:37 +0000
commit: 9f8daa47ffdddaab5afec04905fb68aed55942e4 (patch)
tree: a0de82d064bde54e53bd2761eb67cb166350f7d6 /src/library
parent: 212f89bcc6a31a2facb338e5dfb02d7b61c26891 (diff)
download: scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.gz
scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.bz2
scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.zip
3 files changed, 41 insertions, 5 deletions
diff --git a/src/library/scala/io/UTF8Codec.scala b/src/library/scala/io/UTF8Codec.scala
index a1d688cb1e..2c2218766b 100644
--- a/src/library/scala/io/UTF8Codec.scala
+++ b/src/library/scala/io/UTF8Codec.scala
@@ -17,6 +17,42 @@ package scala.io
  */
 object UTF8Codec {
 
+  final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD
+  /** convert a codepoint to utf-8 bytes
+   * @author buraq
+   * @param ch codepoint
+   */
+  def encode(ch1: Int): Array[Byte] = {
+    var ch = ch1
+    val byteMask = 0xBF;
+    val byteMark = 0x80;
+    var bytesToWrite = 0
+    val firstByteMark = List[Byte](0x00.asInstanceOf[Byte], 0x00.asInstanceOf[Byte], 0xC0.asInstanceOf[Byte], 0xE0.asInstanceOf[Byte], 0xF0.asInstanceOf[Byte], 0xF8.asInstanceOf[Byte], 0xFC.asInstanceOf[Byte])
+
+    if      (ch < 0x80)        { bytesToWrite = 1 }
+    else if (ch < 0x800)       { bytesToWrite = 2 }
+    else if (ch < 0x10000)     { bytesToWrite = 3 }
+    else if (ch <= 0x0010FFFF) { bytesToWrite = 4 }
+    else return encode(UNI_REPLACEMENT_CHAR)
+
+    val res = new Array[Byte](bytesToWrite)
+
+    var bw = bytesToWrite
+    if(bw>=4) {
+      res(3) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+    }
+    if(bw>=3) {
+      res(2) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+    }
+    if(bw>=2) {
+      res(1) = ((ch | byteMark) & byteMask).asInstanceOf[Byte]; ch = ch >> 6; bw = bw - 1
+    }
+    if(bw>=1) {
+      res(0) = (ch | firstByteMark(bytesToWrite)).asInstanceOf[Byte]
+    }
+    return res
+  }
+
   def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = {
     var i = from
     var j = to
diff --git a/src/library/scala/util/parsing/json/Lexer.scala b/src/library/scala/util/parsing/json/Lexer.scala
index a9badbec83..c709b37900 100644
--- a/src/library/scala/util/parsing/json/Lexer.scala
+++ b/src/library/scala/util/parsing/json/Lexer.scala
@@ -75,10 +75,10 @@ class Lexer extends StdLexical with ImplicitConversions {
   val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray
   def hexDigit = elem("hex digit", hexDigits.contains(_))
 
-  def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ {
-    case a ~ b ~ c ~ d =>
-      new String(Character.toChars(Integer.parseInt(List(a,b,c,d).mkString(""),16)))
-  }
+ def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ {
+   case a ~ b ~ c ~ d =>
+     new String(io.UTF8Codec.encode(Integer.parseInt(List(a,b,c,d).mkString(""),16)))
+ }
 
   private def lift[T](f: String => T)(xs: List[Any]): T = f(xs.mkString(""))
 }
diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala
index fc7c024eca..53a974bca1 100644
--- a/src/library/scala/xml/Utility.scala
+++ b/src/library/scala/xml/Utility.scala
@@ -476,7 +476,7 @@ object Utility extends AnyRef with parsing.TokenTests {
       }
       nextch()
     }
-    i.asInstanceOf[Char].toString()
+    new String(io.UTF8Codec.encode(i))
   }
 
 }
author	Burak Emir <emir@epfl.ch>	2007-07-10 15:22:37 +0000
committer	Burak Emir <emir@epfl.ch>	2007-07-10 15:22:37 +0000
commit	9f8daa47ffdddaab5afec04905fb68aed55942e4 (patch)
tree	a0de82d064bde54e53bd2761eb67cb166350f7d6 /src/library
parent	212f89bcc6a31a2facb338e5dfb02d7b61c26891 (diff)
download	scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.gz scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.tar.bz2 scala-9f8daa47ffdddaab5afec04905fb68aed55942e4.zip