1 files changed, 9 insertions, 24 deletions
diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java
index be7b746e..4cf79d7b 100644
--- a/java/core/src/main/java/com/google/protobuf/Utf8.java
+++ b/java/core/src/main/java/com/google/protobuf/Utf8.java
@@ -31,7 +31,6 @@
 package com.google.protobuf;
 
 import static com.google.protobuf.UnsafeUtil.addressOffset;
-import static com.google.protobuf.UnsafeUtil.getArrayBaseOffset;
 import static com.google.protobuf.UnsafeUtil.hasUnsafeArrayOperations;
 import static com.google.protobuf.UnsafeUtil.hasUnsafeByteBufferOperations;
 import static java.lang.Character.MAX_SURROGATE;
@@ -1001,8 +1000,8 @@ final class Utf8 {
         throw new ArrayIndexOutOfBoundsException(
             String.format("Array length=%d, index=%d, limit=%d", bytes.length, index, limit));
       }
-      long offset = getArrayBaseOffset() + index;
-      final long offsetLimit = getArrayBaseOffset() + limit;
+      long offset = index;
+      final long offsetLimit = limit;
       if (state != COMPLETE) {
         // The previous decoding operation was incomplete (or malformed).
         // We look for a well-formed sequence consisting of bytes from
@@ -1187,7 +1186,7 @@ final class Utf8 {
 
     @Override
     int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
-      long outIx = getArrayBaseOffset() + offset;
+      long outIx = offset;
       final long outLimit = outIx + length;
       final int inLimit = in.length();
       if (inLimit > length || out.length - length < offset) {
@@ -1204,7 +1203,7 @@ final class Utf8 {
       }
       if (inIx == inLimit) {
         // We're done, it was ASCII encoded.
-        return (int) (outIx - getArrayBaseOffset());
+        return (int) outIx;
       }
 
       for (char c; inIx < inLimit; ++inIx) {
@@ -1243,7 +1242,7 @@ final class Utf8 {
       }
 
       // All bytes have been encoded.
-      return (int) (outIx - getArrayBaseOffset());
+      return (int) outIx;
     }
 
     @Override
@@ -1321,31 +1320,17 @@ final class Utf8 {
      */
     private static int unsafeEstimateConsecutiveAscii(
         byte[] bytes, long offset, final int maxChars) {
-      int remaining = maxChars;
-      if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
+      if (maxChars < UNSAFE_COUNT_ASCII_THRESHOLD) {
         // Don't bother with small strings.
         return 0;
       }
 
-      // Read bytes until 8-byte aligned so that we can read longs in the loop below.
-      // Byte arrays are already either 8 or 16-byte aligned, so we just need to make sure that
-      // the index (relative to the start of the array) is also 8-byte aligned. We do this by
-      // ANDing the index with 7 to determine the number of bytes that need to be read before
-      // we're 8-byte aligned.
-      final int unaligned = 8 - ((int) offset & 7);
-      for (int j = unaligned; j > 0; j--) {
+      for (int i = 0; i < maxChars; i++) {
         if (UnsafeUtil.getByte(bytes, offset++) < 0) {
-          return unaligned - j;
+          return i;
         }
       }
-
-      // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
-      // To speed things up further, we're reading longs instead of bytes so we use a mask to
-      // determine if any byte in the current long is non-ASCII.
-      remaining -= unaligned;
-      for (; remaining >= 8 && (UnsafeUtil.getLong(bytes, offset) & ASCII_MASK_LONG) == 0;
-          offset += 8, remaining -= 8) {}
-      return maxChars - remaining;
+      return maxChars;
     }
 
     /**