1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2003-2011, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala.io
/**
* @author Martin Odersky
* @version 1.0, 04/10/2004
*/
object UTF8Codec
{
final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD
final val UNI_REPLACEMENT_BYTES = Array[Byte](-17, -65, -67)
// Note, from http://unicode.org/faq/utf_bom.html#utf8-5
//
// A different issue arises if an unpaired surrogate is encountered when converting
// ill-formed UTF-16 data. By represented such an unpaired surrogate on its own as a
// 3-byte sequence, the resulting UTF-8 data stream would become ill-formed.
// While it faithfully reflects the nature of the input, Unicode conformance
// requires that encoding form conversion always results in valid data stream.
// Therefore a converter must treat this as an error.
//
// Some useful locations:
// http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
@deprecated("""Use new String(Array(ch), 0, 1).getBytes("UTF-8") instead""", "2.8.0")
def encode(ch: Int): Array[Byte] =
if ((Character getType ch) == Character.SURROGATE.toInt) UNI_REPLACEMENT_BYTES
else try new String(Array(ch), 0, 1) getBytes "UTF-8" catch {
case _: IllegalArgumentException => UNI_REPLACEMENT_BYTES
}
@deprecated("Use Codec.toUTF8 instead", "2.8.0")
def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = {
val bytes = Codec toUTF8 src.slice(from, from + len)
Array.copy(bytes, 0, dst, to, bytes.length)
bytes.length
}
@deprecated("Use Codec.toUTF8 instead", "2.8.0")
def encode(s: String, dst: Array[Byte], to: Int): Int =
encode(s.toArray, 0, dst, to, s.length)
@deprecated("Use Codec.toUTF8 instead", "2.8.0")
def encode(s: String): Array[Byte] = Codec toUTF8 s
@deprecated("Use Codec.fromUTF8 instead", "2.8.0")
def decode(src: Array[Byte], from: Int, dst: Array[Char], to: Int, len: Int): Int = {
val chars = Codec fromUTF8 src.slice(from, from + len)
Array.copy(chars, 0, dst, to, chars.length)
chars.length
}
@deprecated("Use Codec.fromUTF8 instead", "2.8.0")
def decode(src: Array[Byte], from: Int, len: Int): String =
Codec fromUTF8 src.slice(from, from + len) mkString
}
|