diff options
Diffstat (limited to 'examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_16_Common.scala')
-rw-r--r-- | examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_16_Common.scala | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_16_Common.scala b/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_16_Common.scala new file mode 100644 index 0000000..3330d9c --- /dev/null +++ b/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_16_Common.scala @@ -0,0 +1,205 @@ +/* __ *\ +** ________ ___ / / ___ __ ____ Scala.js API ** +** / __/ __// _ | / / / _ | __ / // __/ (c) 2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ |/_// /_\ \ http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | |__/ /____/ ** +** |/____/ ** +\* */ + + +package scala.scalajs.niocharset + +import scala.annotation.tailrec + +import java.nio._ +import java.nio.charset._ + +/** This is a very specific common implementation for UTF_16BE and UTF_16LE. + */ +private[niocharset] abstract class UTF_16_Common protected ( + name: String, aliases: Array[String], + private val endianness: Int) extends Charset(name, aliases) { + + import UTF_16_Common._ + + def contains(that: Charset): Boolean = true + + def newDecoder(): CharsetDecoder = new Decoder + def newEncoder(): CharsetEncoder = new Encoder + + private class Decoder extends CharsetDecoder( + UTF_16_Common.this, 0.5f, 1.0f) { + private var endianness = UTF_16_Common.this.endianness + + override protected def implReset(): Unit = { + super.implReset() + endianness = UTF_16_Common.this.endianness + } + + def decodeLoop(in: ByteBuffer, out: CharBuffer): CoderResult = { + @inline + @tailrec + def loop(): CoderResult = { + if (in.remaining < 2) CoderResult.UNDERFLOW + else { + val b1 = in.get() & 0xff + val b2 = in.get() & 0xff + + val wasBOM = if (endianness == AutoEndian) { + // Read BOM + if (b1 == 0xfe && b2 == 0xff) { + endianness = BigEndian + true + } else if (b1 == 0xff && b2 == 0xfe) { + endianness = LittleEndian + true + } else { + // Not a valid BOM: default to BigEndian and start reading + endianness = BigEndian + false + } + } else false + + if (wasBOM) { + loop() + } else { + val bigEndian = endianness == BigEndian + + @inline def bytes2char(hi: Int, lo: Int): Char = + (if (bigEndian) (hi << 8) | lo else (lo << 8) | hi).toChar + + val c1 = bytes2char(b1, b2) + + if (Character.isLowSurrogate(c1)) { + in.position(in.position - 2) + CoderResult.malformedForLength(2) + } else if (!Character.isHighSurrogate(c1)) { + if (out.remaining == 0) { + in.position(in.position - 2) + CoderResult.OVERFLOW + } else { + out.put(c1) + loop() + } + } else { + if (in.remaining < 2) { + in.position(in.position - 2) + CoderResult.UNDERFLOW + } else { + val b3 = in.get() & 0xff + val b4 = in.get() & 0xff + val c2 = bytes2char(b3, b4) + + if (!Character.isLowSurrogate(c2)) { + in.position(in.position - 4) + CoderResult.malformedForLength(2) + } else { + if (out.remaining < 2) { + in.position(in.position - 4) + CoderResult.OVERFLOW + } else { + out.put(c1) + out.put(c2) + loop() + } + } + } + } + } + } + } + + loop() + } + } + + private class Encoder extends CharsetEncoder( + UTF_16_Common.this, 2.0f, 2.0f, + // Character 0xfffd + if (endianness == LittleEndian) Array(-3, -1) else Array(-1, -3)) { + + private var needToWriteBOM: Boolean = endianness == AutoEndian + + override protected def implReset(): Unit = { + super.implReset() + needToWriteBOM = endianness == AutoEndian + } + + def encodeLoop(in: CharBuffer, out: ByteBuffer): CoderResult = { + if (needToWriteBOM) { + if (out.remaining < 2) { + return CoderResult.OVERFLOW + } else { + // Always encode in big endian + out.put(0xfe.toByte) + out.put(0xff.toByte) + needToWriteBOM = false + } + } + + val bigEndian = endianness != LittleEndian + + @inline + def putChar(c: Char): Unit = { + if (bigEndian) { + out.put((c >> 8).toByte) + out.put(c.toByte) + } else { + out.put(c.toByte) + out.put((c >> 8).toByte) + } + } + + @inline + @tailrec + def loop(): CoderResult = { + if (in.remaining == 0) CoderResult.UNDERFLOW + else { + val c1 = in.get() + + if (Character.isLowSurrogate(c1)) { + in.position(in.position - 1) + CoderResult.malformedForLength(1) + } else if (!Character.isHighSurrogate(c1)) { + if (out.remaining < 2) { + in.position(in.position - 1) + CoderResult.OVERFLOW + } else { + putChar(c1) + loop() + } + } else { + if (in.remaining < 1) { + in.position(in.position - 1) + CoderResult.UNDERFLOW + } else { + val c2 = in.get() + + if (!Character.isLowSurrogate(c2)) { + in.position(in.position - 2) + CoderResult.malformedForLength(1) + } else { + if (out.remaining < 4) { + in.position(in.position - 2) + CoderResult.OVERFLOW + } else { + putChar(c1) + putChar(c2) + loop() + } + } + } + } + } + } + + loop() + } + } +} + +private[niocharset] object UTF_16_Common { + final val AutoEndian = 0 + final val BigEndian = 1 + final val LittleEndian = 2 +} |