summaryrefslogtreecommitdiff
path: root/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala
diff options
context:
space:
mode:
Diffstat (limited to 'examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala')
-rw-r--r--examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala455
1 files changed, 0 insertions, 455 deletions
diff --git a/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala b/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala
deleted file mode 100644
index 57f4ad6..0000000
--- a/examples/scala-js/library/src/main/scala/scala/scalajs/niocharset/UTF_8.scala
+++ /dev/null
@@ -1,455 +0,0 @@
-/* __ *\
-** ________ ___ / / ___ __ ____ Scala.js API **
-** / __/ __// _ | / / / _ | __ / // __/ (c) 2013, LAMP/EPFL **
-** __\ \/ /__/ __ |/ /__/ __ |/_// /_\ \ http://scala-lang.org/ **
-** /____/\___/_/ |_/____/_/ | |__/ /____/ **
-** |/____/ **
-\* */
-
-
-package scala.scalajs.niocharset
-
-import scala.annotation.{switch, tailrec}
-
-import java.nio._
-import java.nio.charset._
-
-private[niocharset] object UTF_8 extends Charset("UTF-8", Array(
- "UTF8", "unicode-1-1-utf-8")) {
-
- import java.lang.Character._
-
- def contains(that: Charset): Boolean = true
-
- def newDecoder(): CharsetDecoder = new Decoder
- def newEncoder(): CharsetEncoder = new Encoder
-
- /* The next table contains information about UTF-8 charset and
- * correspondence of 1st byte to the length of sequence
- * For information please visit http://www.ietf.org/rfc/rfc3629.txt
- *
- * -------------------------------------------------------------------
- * 0 1 2 3 Value
- * -------------------------------------------------------------------
- * 0xxxxxxx 00000000 00000000 0xxxxxxx
- * 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
- * 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
- * 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx 000uuuzz zzzzyyyy yyxxxxxx
- */
-
- private val lengthByLeading: Array[Int] = Array(
- // 10wwwwww
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- // 110yyyyy
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- // 1110zzzz
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- // 11110uuu
- 4, 4, 4, 4, 4, 4, 4, 4,
- // > 11110111
- -1, -1, -1, -1, -1, -1, -1, -1
- )
-
- @inline
- private class DecodedMultiByte(val failure: CoderResult,
- val high: Char, val low: Char)
-
- private object DecodedMultiByte {
- @inline def apply(failure: CoderResult): DecodedMultiByte =
- new DecodedMultiByte(failure, 0, 0)
-
- @inline def apply(single: Char): DecodedMultiByte =
- new DecodedMultiByte(null, single, 0)
-
- @inline def apply(high: Char, low: Char): DecodedMultiByte =
- new DecodedMultiByte(null, high, low)
- }
-
- private class Decoder extends CharsetDecoder(UTF_8, 1.0f, 1.0f) {
- def decodeLoop(in: ByteBuffer, out: CharBuffer): CoderResult = {
- if (in.hasArray && out.hasArray)
- decodeLoopArray(in, out)
- else
- decodeLoopNoArray(in, out)
- }
-
- private def decodeLoopArray(in: ByteBuffer, out: CharBuffer): CoderResult = {
- val inArray = in.array
- val inOffset = in.arrayOffset
- val inStart = in.position + inOffset
- val inEnd = in.limit + inOffset
-
- val outArray = out.array
- val outOffset = out.arrayOffset
- val outStart = out.position + outOffset
- val outEnd = out.limit + outOffset
-
- @inline
- @tailrec
- def loop(inPos: Int, outPos: Int): CoderResult = {
- @inline
- def finalize(result: CoderResult): CoderResult = {
- in.position(inPos - inOffset)
- out.position(outPos - outOffset)
- result
- }
-
- if (inPos == inEnd) {
- finalize(CoderResult.UNDERFLOW)
- } else {
- val leading = inArray(inPos).toInt
- if (leading >= 0) {
- // US-ASCII repertoire
- if (outPos == outEnd) {
- finalize(CoderResult.OVERFLOW)
- } else {
- outArray(outPos) = leading.toChar
- loop(inPos+1, outPos+1)
- }
- } else {
- // Multi-byte
- val length = lengthByLeading(leading & 0x7f)
- if (length == -1) {
- finalize(CoderResult.malformedForLength(1))
- } else if (inPos + length > inEnd) {
- finalize(CoderResult.UNDERFLOW)
- } else {
- val decoded = {
- val b2 = inArray(inPos+1)
- if (length == 2) decode2(leading, b2)
- else if (length == 3) decode3(leading, b2, inArray(inPos+2))
- else decode4(leading, b2, inArray(inPos+2), inArray(inPos+3))
- }
-
- if (decoded.failure != null) {
- finalize(decoded.failure)
- } else if (decoded.low == 0) {
- // not a surrogate pair
- if (outPos == outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- outArray(outPos) = decoded.high
- loop(inPos+length, outPos+1)
- }
- } else {
- // a surrogate pair
- if (outPos + 2 > outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- outArray(outPos) = decoded.high
- outArray(outPos+1) = decoded.low
- loop(inPos+length, outPos+2)
- }
- }
- }
- }
- }
- }
-
- loop(inStart, outStart)
- }
-
- private def decodeLoopNoArray(in: ByteBuffer, out: CharBuffer): CoderResult = {
- @inline
- @tailrec
- def loop(): CoderResult = {
- @inline
- def finalize(read: Int, result: CoderResult): CoderResult = {
- in.position(in.position - read)
- result
- }
-
- if (!in.hasRemaining) {
- CoderResult.UNDERFLOW
- } else {
- val leading = in.get().toInt
- if (leading >= 0) {
- // US-ASCII repertoire
- if (!out.hasRemaining) {
- finalize(1, CoderResult.OVERFLOW)
- } else {
- out.put(leading.toChar)
- loop()
- }
- } else {
- // Multi-byte
- val length = lengthByLeading(leading & 0x7f)
- if (length == -1) {
- finalize(1, CoderResult.malformedForLength(1))
- } else if (in.remaining < length-1) {
- finalize(1, CoderResult.UNDERFLOW)
- } else {
- val decoded = {
- if (length == 2) decode2(leading, in.get())
- else if (length == 3) decode3(leading, in.get(), in.get())
- else decode4(leading, in.get(), in.get(), in.get())
- }
-
- if (decoded.failure != null) {
- finalize(length, decoded.failure)
- } else if (decoded.low == 0) {
- // not a surrogate pair
- if (!out.hasRemaining)
- finalize(length, CoderResult.OVERFLOW)
- else {
- out.put(decoded.high)
- loop()
- }
- } else {
- // a surrogate pair
- if (out.remaining < 2)
- finalize(length, CoderResult.OVERFLOW)
- else {
- out.put(decoded.high)
- out.put(decoded.low)
- loop()
- }
- }
- }
- }
- }
- }
-
- loop()
- }
-
- @inline private def isInvalidNextByte(b: Int): Boolean =
- (b & 0xc0) != 0x80
-
- @inline private def decode2(b1: Int, b2: Int): DecodedMultiByte = {
- if (isInvalidNextByte(b2))
- DecodedMultiByte(CoderResult.malformedForLength(1))
- else {
- val codePoint = (((b1 & 0x1f) << 6) | (b2 & 0x3f))
- // By construction, 0 <= codePoint <= 0x7ff < MIN_SURROGATE
- if (codePoint < 0x80) {
- // Should have been encoded with only 1 byte
- DecodedMultiByte(CoderResult.malformedForLength(2))
- } else {
- DecodedMultiByte(codePoint.toChar)
- }
- }
- }
-
- @inline private def decode3(b1: Int, b2: Int, b3: Int): DecodedMultiByte = {
- if (isInvalidNextByte(b2))
- DecodedMultiByte(CoderResult.malformedForLength(1))
- else if (isInvalidNextByte(b3))
- DecodedMultiByte(CoderResult.malformedForLength(2))
- else {
- val codePoint = (((b1 & 0xf) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3f))
- // By construction, 0 <= codePoint <= 0xffff < MIN_SUPPLEMENTARY_CODE_POINT
- if ((codePoint < 0x800) ||
- (codePoint >= MIN_SURROGATE && codePoint <= MAX_SURROGATE)) {
- // Should have been encoded with only 1 or 2 bytes
- // or it is a surrogate, which is not a valid code point
- DecodedMultiByte(CoderResult.malformedForLength(3))
- } else {
- DecodedMultiByte(codePoint.toChar)
- }
- }
- }
-
- @inline private def decode4(b1: Int, b2: Int, b3: Int, b4: Int): DecodedMultiByte = {
- if (isInvalidNextByte(b2))
- DecodedMultiByte(CoderResult.malformedForLength(1))
- else if (isInvalidNextByte(b3))
- DecodedMultiByte(CoderResult.malformedForLength(2))
- else if (isInvalidNextByte(b4))
- DecodedMultiByte(CoderResult.malformedForLength(3))
- else {
- val codePoint = (((b1 & 0x7) << 18) | ((b2 & 0x3f) << 12) |
- ((b3 & 0x3f) << 6) | (b4 & 0x3f))
- // By construction, 0 <= codePoint <= 0x1fffff
- if (codePoint < 0x10000 || codePoint > MAX_CODE_POINT) {
- // It should have been encoded with 1, 2, or 3 bytes
- // or it is not a valid code point
- DecodedMultiByte(CoderResult.malformedForLength(4))
- } else {
- // Here, we need to encode the code point as a surrogate pair.
- // http://en.wikipedia.org/wiki/UTF-16
- val offsetCodePoint = codePoint - 0x10000
- DecodedMultiByte(
- ((offsetCodePoint >> 10) | 0xd800).toChar,
- ((offsetCodePoint & 0x3ff) | 0xdc00).toChar)
- }
- }
- }
- }
-
- private class Encoder extends CharsetEncoder(UTF_8, 1.1f, 4.0f) {
- def encodeLoop(in: CharBuffer, out: ByteBuffer): CoderResult = {
- if (in.hasArray && out.hasArray)
- encodeLoopArray(in, out)
- else
- encodeLoopNoArray(in, out)
- }
-
- private def encodeLoopArray(in: CharBuffer, out: ByteBuffer): CoderResult = {
- val inArray = in.array
- val inOffset = in.arrayOffset
- val inStart = in.position + inOffset
- val inEnd = in.limit + inOffset
-
- val outArray = out.array
- val outOffset = out.arrayOffset
- val outStart = out.position + outOffset
- val outEnd = out.limit + outOffset
-
- @inline
- @tailrec
- def loop(inPos: Int, outPos: Int): CoderResult = {
- @inline
- def finalize(result: CoderResult): CoderResult = {
- in.position(inPos - inOffset)
- out.position(outPos - outOffset)
- result
- }
-
- if (inPos == inEnd) {
- finalize(CoderResult.UNDERFLOW)
- } else {
- val c1 = inArray(inPos)
-
- if (c1 < 0x80) {
- // Encoding in one byte
- if (outPos == outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- outArray(outPos) = c1.toByte
- loop(inPos+1, outPos+1)
- }
- } else if (c1 < 0x800) {
- // Encoding in 2 bytes (by construction, not a surrogate)
- if (outPos + 2 > outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- outArray(outPos) = ((c1 >> 6) | 0xc0).toByte
- outArray(outPos+1) = ((c1 & 0x3f) | 0x80).toByte
- loop(inPos+1, outPos+2)
- }
- } else if (!isSurrogate(c1)) {
- // Not a surrogate, encoding in 3 bytes
- if (outPos + 3 > outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- outArray(outPos) = ((c1 >> 12) | 0xe0).toByte
- outArray(outPos+1) = (((c1 >> 6) & 0x3f) | 0x80).toByte
- outArray(outPos+2) = ((c1 & 0x3f) | 0x80).toByte
- loop(inPos+1, outPos+3)
- }
- } else if (isHighSurrogate(c1)) {
- // Should have a low surrogate that follows
- if (inPos + 1 == inEnd)
- finalize(CoderResult.UNDERFLOW)
- else {
- val c2 = inArray(inPos+1)
- if (!isLowSurrogate(c2)) {
- finalize(CoderResult.malformedForLength(1))
- } else {
- // Surrogate pair, encoding in 4 bytes
- if (outPos + 4 > outEnd)
- finalize(CoderResult.OVERFLOW)
- else {
- val cp = toCodePoint(c1, c2)
- outArray(outPos) = ((cp >> 18) | 0xf0).toByte
- outArray(outPos+1) = (((cp >> 12) & 0x3f) | 0x80).toByte
- outArray(outPos+2) = (((cp >> 6) & 0x3f) | 0x80).toByte
- outArray(outPos+3) = ((cp & 0x3f) | 0x80).toByte
- loop(inPos+2, outPos+4)
- }
- }
- }
- } else {
- finalize(CoderResult.malformedForLength(1))
- }
- }
- }
-
- loop(inStart, outStart)
- }
-
- private def encodeLoopNoArray(in: CharBuffer, out: ByteBuffer): CoderResult = {
- @inline
- @tailrec
- def loop(): CoderResult = {
- @inline
- def finalize(read: Int, result: CoderResult): CoderResult = {
- in.position(in.position - read)
- result
- }
-
- if (!in.hasRemaining) {
- CoderResult.UNDERFLOW
- } else {
- val c1 = in.get()
-
- if (c1 < 0x80) {
- // Encoding in one byte
- if (!out.hasRemaining)
- finalize(1, CoderResult.OVERFLOW)
- else {
- out.put(c1.toByte)
- loop()
- }
- } else if (c1 < 0x800) {
- // Encoding in 2 bytes (by construction, not a surrogate)
- if (out.remaining < 2)
- finalize(1, CoderResult.OVERFLOW)
- else {
- out.put(((c1 >> 6) | 0xc0).toByte)
- out.put(((c1 & 0x3f) | 0x80).toByte)
- loop()
- }
- } else if (!isSurrogate(c1)) {
- // Not a surrogate, encoding in 3 bytes
- if (out.remaining < 3)
- finalize(1, CoderResult.OVERFLOW)
- else {
- out.put(((c1 >> 12) | 0xe0).toByte)
- out.put((((c1 >> 6) & 0x3f) | 0x80).toByte)
- out.put(((c1 & 0x3f) | 0x80).toByte)
- loop()
- }
- } else if (isHighSurrogate(c1)) {
- // Should have a low surrogate that follows
- if (!in.hasRemaining)
- finalize(1, CoderResult.UNDERFLOW)
- else {
- val c2 = in.get()
- if (!isLowSurrogate(c2)) {
- finalize(2, CoderResult.malformedForLength(1))
- } else {
- // Surrogate pair, encoding in 4 bytes
- if (out.remaining < 4)
- finalize(2, CoderResult.OVERFLOW)
- else {
- val cp = toCodePoint(c1, c2)
- out.put(((cp >> 18) | 0xf0).toByte)
- out.put((((cp >> 12) & 0x3f) | 0x80).toByte)
- out.put((((cp >> 6) & 0x3f) | 0x80).toByte)
- out.put(((cp & 0x3f) | 0x80).toByte)
- loop()
- }
- }
- }
- } else {
- finalize(1, CoderResult.malformedForLength(1))
- }
- }
- }
-
- loop()
- }
- }
-
- private final val SurrogateMask = 0xf800 // 11111 0 00 00000000
- private final val SurrogateID = 0xd800 // 11011 0 00 00000000
-
- @inline private def isSurrogate(c: Char): Boolean =
- (c & SurrogateMask) == SurrogateID
-}