aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-07-07 17:57:17 -0700
committerReynold Xin <rxin@databricks.com>2015-07-07 17:57:17 -0700
commit4ca90935c5ff15ccca13452572e317b86814f238 (patch)
treea5b0eba4f52ca6fb0e481b464859f3fcae191271 /unsafe
parent770ff1025e751ad139b6284c1d7cc3de778b1d87 (diff)
downloadspark-4ca90935c5ff15ccca13452572e317b86814f238.tar.gz
spark-4ca90935c5ff15ccca13452572e317b86814f238.tar.bz2
spark-4ca90935c5ff15ccca13452572e317b86814f238.zip
[SPARK-7190] [SPARK-8804] [SPARK-7815] [SQL] unsafe UTF8String
Let UTF8String work with binary buffer. Before we have better idea on manage the lifecycle of UTF8String in Row, we still do the copy when calling `UnsafeRow.get()` for StringType. cc rxin JoshRosen Author: Davies Liu <davies@databricks.com> Closes #7197 from davies/unsafe_string and squashes the following commits: 51b0ea0 [Davies Liu] fix test 50c1ebf [Davies Liu] remove optimization for upper/lower case 315d491 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_string 93fce17 [Davies Liu] address comment e9ff7ba [Davies Liu] clean up 67ec266 [Davies Liu] fix bug 7b74b1f [Davies Liu] fallback to String if local dependent ab7857c [Davies Liu] address comments 7da92f5 [Davies Liu] handle local in toUpperCase/toLowerCase 59dbb23 [Davies Liu] revert python change d1e0716 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_string 002e35f [Davies Liu] rollback hashCode change a87b7a8 [Davies Liu] improve toLowerCase and toUpperCase 76e794a [Davies Liu] fix test 8b2d5ce [Davies Liu] fix tests fd3f0a6 [Davies Liu] bug fix c4e9c88 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_string c45d921 [Davies Liu] address comments 175405f [Davies Liu] unsafe UTF8String
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java34
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java2
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java155
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java2
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java120
5 files changed, 205 insertions, 108 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index 53eadf96a6..cf693d01a4 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -17,7 +17,7 @@
package org.apache.spark.unsafe.array;
-import org.apache.spark.unsafe.PlatformDependent;
+import static org.apache.spark.unsafe.PlatformDependent.*;
public class ByteArrayMethods {
@@ -35,21 +35,27 @@ public class ByteArrayMethods {
}
/**
- * Optimized byte array equality check for 8-byte-word-aligned byte arrays.
+ * Optimized byte array equality check for byte arrays.
* @return true if the arrays are equal, false otherwise
*/
- public static boolean wordAlignedArrayEquals(
- Object leftBaseObject,
- long leftBaseOffset,
- Object rightBaseObject,
- long rightBaseOffset,
- long arrayLengthInBytes) {
- for (int i = 0; i < arrayLengthInBytes; i += 8) {
- final long left =
- PlatformDependent.UNSAFE.getLong(leftBaseObject, leftBaseOffset + i);
- final long right =
- PlatformDependent.UNSAFE.getLong(rightBaseObject, rightBaseOffset + i);
- if (left != right) return false;
+ public static boolean arrayEquals(
+ Object leftBase,
+ long leftOffset,
+ Object rightBase,
+ long rightOffset,
+ final long length) {
+ int i = 0;
+ while (i <= length - 8) {
+ if (UNSAFE.getLong(leftBase, leftOffset + i) != UNSAFE.getLong(rightBase, rightOffset + i)) {
+ return false;
+ }
+ i += 8;
+ }
+ while (i < length) {
+ if (UNSAFE.getByte(leftBase, leftOffset + i) != UNSAFE.getByte(rightBase, rightOffset + i)) {
+ return false;
+ }
+ i += 1;
}
return true;
}
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 0b4d8d286f..8d8c08de52 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -277,7 +277,7 @@ public final class BytesToBytesMap {
final MemoryLocation keyAddress = loc.getKeyAddress();
final Object storedKeyBaseObject = keyAddress.getBaseObject();
final long storedKeyBaseOffset = keyAddress.getBaseOffset();
- final boolean areEqual = ByteArrayMethods.wordAlignedArrayEquals(
+ final boolean areEqual = ByteArrayMethods.arrayEquals(
keyBaseObject,
keyBaseOffset,
storedKeyBaseObject,
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 9302b47292..d2a25096a5 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -20,9 +20,10 @@ package org.apache.spark.unsafe.types;
import javax.annotation.Nonnull;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-import org.apache.spark.unsafe.PlatformDependent;
+import org.apache.spark.unsafe.array.ByteArrayMethods;
+
+import static org.apache.spark.unsafe.PlatformDependent.*;
/**
* A UTF-8 String for internal Spark use.
@@ -35,7 +36,9 @@ import org.apache.spark.unsafe.PlatformDependent;
public final class UTF8String implements Comparable<UTF8String>, Serializable {
@Nonnull
- private byte[] bytes;
+ private final Object base;
+ private final long offset;
+ private final int numBytes;
private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -44,60 +47,82 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
5, 5, 5, 5,
6, 6, 6, 6};
+ /**
+ * Creates an UTF8String from byte array, which should be encoded in UTF-8.
+ *
+ * Note: `bytes` will be hold by returned UTF8String.
+ */
public static UTF8String fromBytes(byte[] bytes) {
- return (bytes != null) ? new UTF8String().set(bytes) : null;
- }
-
- public static UTF8String fromString(String str) {
- return (str != null) ? new UTF8String().set(str) : null;
+ if (bytes != null) {
+ return new UTF8String(bytes, BYTE_ARRAY_OFFSET, bytes.length);
+ } else {
+ return null;
+ }
}
/**
- * Updates the UTF8String with String.
+ * Creates an UTF8String from String.
*/
- protected UTF8String set(final String str) {
+ public static UTF8String fromString(String str) {
+ if (str == null) return null;
try {
- bytes = str.getBytes("utf-8");
+ return fromBytes(str.getBytes("utf-8"));
} catch (UnsupportedEncodingException e) {
// Turn the exception into unchecked so we can find out about it at runtime, but
// don't need to add lots of boilerplate code everywhere.
- PlatformDependent.throwException(e);
+ throwException(e);
+ return null;
}
- return this;
}
- /**
- * Updates the UTF8String with byte[], which should be encoded in UTF-8.
- */
- protected UTF8String set(final byte[] bytes) {
- this.bytes = bytes;
- return this;
+ protected UTF8String(Object base, long offset, int size) {
+ this.base = base;
+ this.offset = offset;
+ this.numBytes = size;
}
/**
* Returns the number of bytes for a code point with the first byte as `b`
* @param b The first byte of a code point
*/
- public int numBytes(final byte b) {
+ private static int numBytesForFirstByte(final byte b) {
final int offset = (b & 0xFF) - 192;
return (offset >= 0) ? bytesOfCodePointInUTF8[offset] : 1;
}
/**
+ * Returns the number of bytes
+ */
+ public int numBytes() {
+ return numBytes;
+ }
+
+ /**
* Returns the number of code points in it.
*
* This is only used by Substring() when `start` is negative.
*/
- public int length() {
+ public int numChars() {
int len = 0;
- for (int i = 0; i < bytes.length; i+= numBytes(bytes[i])) {
+ for (int i = 0; i < numBytes; i += numBytesForFirstByte(getByte(i))) {
len += 1;
}
return len;
}
+ /**
+ * Returns the underline bytes, will be a copy of it if it's part of another array.
+ */
public byte[] getBytes() {
- return bytes;
+ // avoid copy if `base` is `byte[]`
+ if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
+ && ((byte[]) base).length == numBytes) {
+ return (byte[]) base;
+ } else {
+ byte[] bytes = new byte[numBytes];
+ copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
+ return bytes;
+ }
}
/**
@@ -106,92 +131,110 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
* @param until the position after last code point, exclusive.
*/
public UTF8String substring(final int start, final int until) {
- if (until <= start || start >= bytes.length) {
- return UTF8String.fromBytes(new byte[0]);
+ if (until <= start || start >= numBytes) {
+ return fromBytes(new byte[0]);
}
int i = 0;
int c = 0;
- for (; i < bytes.length && c < start; i += numBytes(bytes[i])) {
+ while (i < numBytes && c < start) {
+ i += numBytesForFirstByte(getByte(i));
c += 1;
}
int j = i;
- for (; j < bytes.length && c < until; j += numBytes(bytes[i])) {
+ while (i < numBytes && c < until) {
+ i += numBytesForFirstByte(getByte(i));
c += 1;
}
- return UTF8String.fromBytes(Arrays.copyOfRange(bytes, i, j));
+ byte[] bytes = new byte[i - j];
+ copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
+ return fromBytes(bytes);
}
+ /**
+ * Returns whether this contains `substring` or not.
+ */
public boolean contains(final UTF8String substring) {
- final byte[] b = substring.getBytes();
- if (b.length == 0) {
+ if (substring.numBytes == 0) {
return true;
}
- for (int i = 0; i <= bytes.length - b.length; i++) {
- if (bytes[i] == b[0] && startsWith(b, i)) {
+ byte first = substring.getByte(0);
+ for (int i = 0; i <= numBytes - substring.numBytes; i++) {
+ if (getByte(i) == first && matchAt(substring, i)) {
return true;
}
}
return false;
}
- private boolean startsWith(final byte[] prefix, int offsetInBytes) {
- if (prefix.length + offsetInBytes > bytes.length || offsetInBytes < 0) {
+ /**
+ * Returns the byte at position `i`.
+ */
+ private byte getByte(int i) {
+ return UNSAFE.getByte(base, offset + i);
+ }
+
+ private boolean matchAt(final UTF8String s, int pos) {
+ if (s.numBytes + pos > numBytes || pos < 0) {
return false;
}
- int i = 0;
- while (i < prefix.length && prefix[i] == bytes[i + offsetInBytes]) {
- i++;
- }
- return i == prefix.length;
+ return ByteArrayMethods.arrayEquals(base, offset + pos, s.base, s.offset, s.numBytes);
}
public boolean startsWith(final UTF8String prefix) {
- return startsWith(prefix.getBytes(), 0);
+ return matchAt(prefix, 0);
}
public boolean endsWith(final UTF8String suffix) {
- return startsWith(suffix.getBytes(), bytes.length - suffix.getBytes().length);
+ return matchAt(suffix, numBytes - suffix.numBytes);
}
+ /**
+ * Returns the upper case of this string
+ */
public UTF8String toUpperCase() {
- return UTF8String.fromString(toString().toUpperCase());
+ return fromString(toString().toUpperCase());
}
+ /**
+ * Returns the lower case of this string
+ */
public UTF8String toLowerCase() {
- return UTF8String.fromString(toString().toLowerCase());
+ return fromString(toString().toLowerCase());
}
@Override
public String toString() {
try {
- return new String(bytes, "utf-8");
+ return new String(getBytes(), "utf-8");
} catch (UnsupportedEncodingException e) {
// Turn the exception into unchecked so we can find out about it at runtime, but
// don't need to add lots of boilerplate code everywhere.
- PlatformDependent.throwException(e);
+ throwException(e);
return "unknown"; // we will never reach here.
}
}
@Override
public UTF8String clone() {
- return new UTF8String().set(bytes);
+ return fromBytes(getBytes());
}
@Override
public int compareTo(final UTF8String other) {
- final byte[] b = other.getBytes();
- for (int i = 0; i < bytes.length && i < b.length; i++) {
- int res = bytes[i] - b[i];
+ int len = Math.min(numBytes, other.numBytes);
+ // TODO: compare 8 bytes as unsigned long
+ for (int i = 0; i < len; i ++) {
+ // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
+ int res = (getByte(i) & 0xFF) - (other.getByte(i) & 0xFF);
if (res != 0) {
return res;
}
}
- return bytes.length - b.length;
+ return numBytes - other.numBytes;
}
public int compare(final UTF8String other) {
@@ -201,7 +244,11 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
@Override
public boolean equals(final Object other) {
if (other instanceof UTF8String) {
- return Arrays.equals(bytes, ((UTF8String) other).getBytes());
+ UTF8String o = (UTF8String) other;
+ if (numBytes != o.numBytes){
+ return false;
+ }
+ return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
} else {
return false;
}
@@ -209,6 +256,10 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
@Override
public int hashCode() {
- return Arrays.hashCode(bytes);
+ int result = 1;
+ for (int i = 0; i < numBytes; i ++) {
+ result = 31 * result + getByte(i);
+ }
+ return result;
}
}
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 81315f7c94..dae47e4bab 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -99,7 +99,7 @@ public abstract class AbstractBytesToBytesMapSuite {
byte[] expected,
MemoryLocation actualAddr,
long actualLengthBytes) {
- return (actualLengthBytes == expected.length) && ByteArrayMethods.wordAlignedArrayEquals(
+ return (actualLengthBytes == expected.length) && ByteArrayMethods.arrayEquals(
expected,
BYTE_ARRAY_OFFSET,
actualAddr.getBaseObject(),
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 796cdc9dbe..8ec69ebac8 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -19,73 +19,113 @@ package org.apache.spark.unsafe.types;
import java.io.UnsupportedEncodingException;
-import junit.framework.Assert;
import org.junit.Test;
+import static junit.framework.Assert.*;
+
+import static org.apache.spark.unsafe.types.UTF8String.*;
+
public class UTF8StringSuite {
private void checkBasic(String str, int len) throws UnsupportedEncodingException {
- Assert.assertEquals(UTF8String.fromString(str).length(), len);
- Assert.assertEquals(UTF8String.fromBytes(str.getBytes("utf8")).length(), len);
+ UTF8String s1 = fromString(str);
+ UTF8String s2 = fromBytes(str.getBytes("utf8"));
+ assertEquals(s1.numChars(), len);
+ assertEquals(s2.numChars(), len);
+
+ assertEquals(s1.toString(), str);
+ assertEquals(s2.toString(), str);
+ assertEquals(s1, s2);
+
+ assertEquals(s1.hashCode(), s2.hashCode());
- Assert.assertEquals(UTF8String.fromString(str).toString(), str);
- Assert.assertEquals(UTF8String.fromBytes(str.getBytes("utf8")).toString(), str);
- Assert.assertEquals(UTF8String.fromBytes(str.getBytes("utf8")), UTF8String.fromString(str));
+ assertEquals(s1.compareTo(s2), 0);
- Assert.assertEquals(UTF8String.fromString(str).hashCode(),
- UTF8String.fromBytes(str.getBytes("utf8")).hashCode());
+ assertEquals(s1.contains(s2), true);
+ assertEquals(s2.contains(s1), true);
+ assertEquals(s1.startsWith(s1), true);
+ assertEquals(s1.endsWith(s1), true);
}
@Test
public void basicTest() throws UnsupportedEncodingException {
+ checkBasic("", 0);
checkBasic("hello", 5);
- checkBasic("世 界", 3);
+ checkBasic("大 千 世 界", 7);
+ }
+
+ @Test
+ public void compareTo() {
+ assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0);
+ assertTrue(fromString("abc0").compareTo(fromString("abc")) > 0);
+ assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabc")) == 0);
+ assertTrue(fromString("aBcabcabc").compareTo(fromString("Abcabcabc")) > 0);
+ assertTrue(fromString("Abcabcabc").compareTo(fromString("abcabcabC")) < 0);
+ assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabC")) > 0);
+
+ assertTrue(fromString("abc").compareTo(fromString("世界")) < 0);
+ assertTrue(fromString("你好").compareTo(fromString("世界")) > 0);
+ assertTrue(fromString("你好123").compareTo(fromString("你好122")) > 0);
+ }
+
+ protected void testUpperandLower(String upper, String lower) {
+ UTF8String us = fromString(upper);
+ UTF8String ls = fromString(lower);
+ assertEquals(ls, us.toLowerCase());
+ assertEquals(us, ls.toUpperCase());
+ assertEquals(us, us.toUpperCase());
+ assertEquals(ls, ls.toLowerCase());
+ }
+
+ @Test
+ public void upperAndLower() {
+ testUpperandLower("", "");
+ testUpperandLower("0123456", "0123456");
+ testUpperandLower("ABCXYZ", "abcxyz");
+ testUpperandLower("ЀЁЂѺΏỀ", "ѐёђѻώề");
+ testUpperandLower("大千世界 数据砖头", "大千世界 数据砖头");
}
@Test
public void contains() {
- Assert.assertTrue(UTF8String.fromString("hello").contains(UTF8String.fromString("ello")));
- Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("vello")));
- Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("hellooo")));
- Assert.assertTrue(UTF8String.fromString("大千世界").contains(UTF8String.fromString("千世")));
- Assert.assertFalse(UTF8String.fromString("大千世界").contains(UTF8String.fromString("世千")));
- Assert.assertFalse(
- UTF8String.fromString("大千世界").contains(UTF8String.fromString("大千世界好")));
+ assertTrue(fromString("").contains(fromString("")));
+ assertTrue(fromString("hello").contains(fromString("ello")));
+ assertFalse(fromString("hello").contains(fromString("vello")));
+ assertFalse(fromString("hello").contains(fromString("hellooo")));
+ assertTrue(fromString("大千世界").contains(fromString("千世界")));
+ assertFalse(fromString("大千世界").contains(fromString("世千")));
+ assertFalse(fromString("大千世界").contains(fromString("大千世界好")));
}
@Test
public void startsWith() {
- Assert.assertTrue(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hell")));
- Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("ell")));
- Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hellooo")));
- Assert.assertTrue(UTF8String.fromString("数据砖头").startsWith(UTF8String.fromString("数据")));
- Assert.assertFalse(UTF8String.fromString("大千世界").startsWith(UTF8String.fromString("千")));
- Assert.assertFalse(
- UTF8String.fromString("大千世界").startsWith(UTF8String.fromString("大千世界好")));
+ assertTrue(fromString("").startsWith(fromString("")));
+ assertTrue(fromString("hello").startsWith(fromString("hell")));
+ assertFalse(fromString("hello").startsWith(fromString("ell")));
+ assertFalse(fromString("hello").startsWith(fromString("hellooo")));
+ assertTrue(fromString("数据砖头").startsWith(fromString("数据")));
+ assertFalse(fromString("大千世界").startsWith(fromString("千")));
+ assertFalse(fromString("大千世界").startsWith(fromString("大千世界好")));
}
@Test
public void endsWith() {
- Assert.assertTrue(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ello")));
- Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ellov")));
- Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("hhhello")));
- Assert.assertTrue(UTF8String.fromString("大千世界").endsWith(UTF8String.fromString("世界")));
- Assert.assertFalse(UTF8String.fromString("大千世界").endsWith(UTF8String.fromString("世")));
- Assert.assertFalse(
- UTF8String.fromString("数据砖头").endsWith(UTF8String.fromString("我的数据砖头")));
+ assertTrue(fromString("").endsWith(fromString("")));
+ assertTrue(fromString("hello").endsWith(fromString("ello")));
+ assertFalse(fromString("hello").endsWith(fromString("ellov")));
+ assertFalse(fromString("hello").endsWith(fromString("hhhello")));
+ assertTrue(fromString("大千世界").endsWith(fromString("世界")));
+ assertFalse(fromString("大千世界").endsWith(fromString("世")));
+ assertFalse(fromString("数据砖头").endsWith(fromString("我的数据砖头")));
}
@Test
public void substring() {
- Assert.assertEquals(
- UTF8String.fromString("hello").substring(0, 0), UTF8String.fromString(""));
- Assert.assertEquals(
- UTF8String.fromString("hello").substring(1, 3), UTF8String.fromString("el"));
- Assert.assertEquals(
- UTF8String.fromString("数据砖头").substring(0, 1), UTF8String.fromString("数"));
- Assert.assertEquals(
- UTF8String.fromString("数据砖头").substring(1, 3), UTF8String.fromString("据砖"));
- Assert.assertEquals(
- UTF8String.fromString("数据砖头").substring(3, 5), UTF8String.fromString("头"));
+ assertEquals(fromString("hello").substring(0, 0), fromString(""));
+ assertEquals(fromString("hello").substring(1, 3), fromString("el"));
+ assertEquals(fromString("数据砖头").substring(0, 1), fromString("数"));
+ assertEquals(fromString("数据砖头").substring(1, 3), fromString("据砖"));
+ assertEquals(fromString("数据砖头").substring(3, 5), fromString("头"));
+ assertEquals(fromString("ߵ梷").substring(0, 2), fromString("ߵ梷"));
}
}