summaryrefslogtreecommitdiff
path: root/src/library/scala/io/UTF8Codec.scala
blob: 389d4bd719c60c5ee256ce7e1755db39d3bea318 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2011, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */


package scala.io

/**
 *  @author  Martin Odersky
 *  @version 1.0, 04/10/2004
 */
object UTF8Codec
{
  final val UNI_REPLACEMENT_CHAR: Int = 0x0000FFFD
  final val UNI_REPLACEMENT_BYTES = Array[Byte](-17, -65, -67)

  // Note, from http://unicode.org/faq/utf_bom.html#utf8-5
  //
  // A different issue arises if an unpaired surrogate is encountered when converting
  // ill-formed UTF-16 data. By represented such an unpaired surrogate on its own as a
  // 3-byte sequence, the resulting UTF-8 data stream would become ill-formed.
  // While it faithfully reflects the nature of the input, Unicode conformance
  // requires that encoding form conversion always results in valid data stream.
  // Therefore a converter must treat this as an error.
  //
  // Some useful locations:
  //    http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt

  @deprecated("""Use new String(Array(ch), 0, 1).getBytes("UTF-8") instead""")
  def encode(ch: Int): Array[Byte] =
    if ((Character getType ch) == Character.SURROGATE.toInt) UNI_REPLACEMENT_BYTES
    else try new String(Array(ch), 0, 1) getBytes "UTF-8" catch {
      case _: IllegalArgumentException  => UNI_REPLACEMENT_BYTES
    }

  @deprecated("Use Codec.toUTF8 instead")
  def encode(src: Array[Char], from: Int, dst: Array[Byte], to: Int, len: Int): Int = {
    val bytes = Codec toUTF8 src.slice(from, from + len)
    Array.copy(bytes, 0, dst, to, bytes.length)
    bytes.length
  }

  @deprecated("Use Codec.toUTF8 instead")
  def encode(s: String, dst: Array[Byte], to: Int): Int =
    encode(s.toArray, 0, dst, to, s.length)

  @deprecated("Use Codec.toUTF8 instead")
  def encode(s: String): Array[Byte] = Codec toUTF8 s

  @deprecated("Use Codec.fromUTF8 instead")
  def decode(src: Array[Byte], from: Int, dst: Array[Char], to: Int, len: Int): Int = {
    val chars = Codec fromUTF8 src.slice(from, from + len)
    Array.copy(chars, 0, dst, to, chars.length)
    chars.length
  }

  @deprecated("Use Codec.fromUTF8 instead")
  def decode(src: Array[Byte], from: Int, len: Int): String =
    Codec fromUTF8 src.slice(from, from + len) mkString
}