// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.protobuf; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InvalidObjectException; import java.io.ObjectInputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import java.util.Stack; /** * Class to represent {@code ByteStrings} formed by concatenation of other * ByteStrings, without copying the data in the pieces. The concatenation is * represented as a tree whose leaf nodes are each a * {@link com.google.protobuf.ByteString.LeafByteString}. * *
Most of the operation here is inspired by the now-famous paper * BAP95 Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and * michael plass * *
The algorithms described in the paper have been implemented for character * strings in {@code com.google.common.string.Rope} and in the c++ class {@code * cord.cc}. * *
Fundamentally the Rope algorithm represents the collection of pieces as a * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum * sequence length, sequences that are too short relative to their depth cause a * tree rebalance. More precisely, a tree of depth d is "balanced" in the * terminology of BAP95 if its length is at least F(d+2), where F(n) is the * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum * lengths 1, 2, 3, 5, 8, 13,... * * @author carlanton@google.com (Carl Haverl) */ final class RopeByteString extends ByteString { /** * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of * depth n is "balanced", i.e flat enough, if its length is at least Fn+2, * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at * least 2, of depth 4 must have length >= 8, etc. * *
There's nothing special about using the Fibonacci numbers for this, but * they are a reasonable sequence for encapsulating the idea that we are OK * with longer strings being encoded in deeper binary trees. * *
For 32-bit integers, this array has length 46.
*/
private static final int[] minLengthByDepth;
static {
// Dynamically generate the list of Fibonacci numbers the first time this
// class is accessed.
List Small pieces of length less than {@link
* ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
* BAP95. Large pieces are referenced without copy.
*
* @param left string on the left
* @param right string on the right
* @return concatenation representing the same sequence as the given strings
*/
static ByteString concatenate(ByteString left, ByteString right) {
if (right.size() == 0) {
return left;
}
if (left.size() == 0) {
return right;
}
final int newLength = left.size() + right.size();
if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
// Optimization from BAP95: For short (leaves in paper, but just short
// here) total length, do a copy of data to a new leaf.
return concatenateBytes(left, right);
}
if (left instanceof RopeByteString) {
final RopeByteString leftRope = (RopeByteString) left;
if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
// Optimization from BAP95: As an optimization of the case where the
// ByteString is constructed by repeated concatenate, recognize the case
// where a short string is concatenated to a left-hand node whose
// right-hand branch is short. In the paper this applies to leaves, but
// we just look at the length here. This has the advantage of shedding
// references to unneeded data when substrings have been taken.
//
// When we recognize this case, we do a copy of the data and create a
// new parent node so that the depth of the result is the same as the
// given left tree.
ByteString newRight = concatenateBytes(leftRope.right, right);
return new RopeByteString(leftRope.left, newRight);
}
if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
&& leftRope.getTreeDepth() > right.getTreeDepth()) {
// Typically for concatenate-built strings the left-side is deeper than
// the right. This is our final attempt to concatenate without
// increasing the tree depth. We'll redo the node on the RHS. This
// is yet another optimization for building the string by repeatedly
// concatenating on the right.
ByteString newRight = new RopeByteString(leftRope.right, right);
return new RopeByteString(leftRope.left, newRight);
}
}
// Fine, we'll add a node and increase the tree depth--unless we rebalance ;^)
int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
if (newLength >= minLengthByDepth[newDepth]) {
// The tree is shallow enough, so don't rebalance
return new RopeByteString(left, right);
}
return new Balancer().balance(left, right);
}
/**
* Concatenates two strings by copying data values. This is called in a few
* cases in order to reduce the growth of the number of tree nodes.
*
* @param left string on the left
* @param right string on the right
* @return string formed by copying data bytes
*/
private static ByteString concatenateBytes(ByteString left,
ByteString right) {
int leftSize = left.size();
int rightSize = right.size();
byte[] bytes = new byte[leftSize + rightSize];
left.copyTo(bytes, 0, 0, leftSize);
right.copyTo(bytes, 0, leftSize, rightSize);
return ByteString.wrap(bytes); // Constructor wraps bytes
}
/**
* Create a new RopeByteString for testing only while bypassing all the
* defenses of {@link #concatenate(ByteString, ByteString)}. This allows
* testing trees of specific structure. We are also able to insert empty
* leaves, though these are dis-allowed, so that we can make sure the
* implementation can withstand their presence.
*
* @param left string on the left of this node
* @param right string on the right of this node
* @return an unsafe instance for testing only
*/
static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
return new RopeByteString(left, right);
}
/**
* Gets the byte at the given index.
* Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
* reasons although it would more properly be {@link
* IndexOutOfBoundsException}.
*
* @param index index of byte
* @return the value
* @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
*/
@Override
public byte byteAt(int index) {
checkIndex(index, totalLength);
// Find the relevant piece by recursive descent
if (index < leftLength) {
return left.byteAt(index);
}
return right.byteAt(index - leftLength);
}
@Override
public int size() {
return totalLength;
}
// =================================================================
// Pieces
@Override
protected int getTreeDepth() {
return treeDepth;
}
/**
* Determines if the tree is balanced according to BAP95, which means the tree
* is flat-enough with respect to the bounds. Note that this definition of
* balanced is one where sub-trees of balanced trees are not necessarily
* balanced.
*
* @return true if the tree is balanced
*/
@Override
protected boolean isBalanced() {
return totalLength >= minLengthByDepth[treeDepth];
}
/**
* Takes a substring of this one. This involves recursive descent along the
* left and right edges of the substring, and referencing any wholly contained
* segments in between. Any leaf nodes entirely uninvolved in the substring
* will not be referenced by the substring.
*
* Substrings of {@code length < 2} should result in at most a single
* recursive call chain, terminating at a leaf node. Thus the result will be a
* {@link com.google.protobuf.ByteString.LeafByteString}.
*
* @param beginIndex start at this index
* @param endIndex the last character is the one before this index
* @return substring leaf node or tree
*/
@Override
public ByteString substring(int beginIndex, int endIndex) {
final int length = checkRange(beginIndex, endIndex, totalLength);
if (length == 0) {
// Empty substring
return ByteString.EMPTY;
}
if (length == totalLength) {
// The whole string
return this;
}
// Proper substring
if (endIndex <= leftLength) {
// Substring on the left
return left.substring(beginIndex, endIndex);
}
if (beginIndex >= leftLength) {
// Substring on the right
return right.substring(beginIndex - leftLength, endIndex - leftLength);
}
// Split substring
ByteString leftSub = left.substring(beginIndex);
ByteString rightSub = right.substring(0, endIndex - leftLength);
// Intentionally not rebalancing, since in many cases these two
// substrings will already be less deep than the top-level
// RopeByteString we're taking a substring of.
return new RopeByteString(leftSub, rightSub);
}
// =================================================================
// ByteString -> byte[]
@Override
protected void copyToInternal(byte[] target, int sourceOffset,
int targetOffset, int numberToCopy) {
if (sourceOffset + numberToCopy <= leftLength) {
left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
} else if (sourceOffset >= leftLength) {
right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
numberToCopy);
} else {
int leftLength = this.leftLength - sourceOffset;
left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
right.copyToInternal(target, 0, targetOffset + leftLength,
numberToCopy - leftLength);
}
}
@Override
public void copyTo(ByteBuffer target) {
left.copyTo(target);
right.copyTo(target);
}
@Override
public ByteBuffer asReadOnlyByteBuffer() {
ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
return byteBuffer.asReadOnlyBuffer();
}
@Override
public List One surprising aspect of the algorithm is the result of balancing is not
* necessarily balanced, though it is nearly balanced. For details, see
* BAP95.
*/
private static class Balancer {
// Stack containing the part of the string, starting from the left, that
// we've already traversed. The final string should be the equivalent of
// concatenating the strings on the stack from bottom to top.
private final Stack If the length bin for our string, and all shorter length bins, are
* empty, we just push it on the stack. Otherwise, we need to start
* concatenating, putting the given string in the "middle" and continuing
* until we land in an empty length bin that matches the length of our
* concatenation.
*
* @param byteString string to place on the balance stack
*/
private void insert(ByteString byteString) {
int depthBin = getDepthBinForLength(byteString.size());
int binEnd = minLengthByDepth[depthBin + 1];
// BAP95: Concatenate all trees occupying bins representing the length of
// our new piece or of shorter pieces, to the extent that is possible.
// The goal is to clear the bin which our piece belongs in, but that may
// not be entirely possible if there aren't enough longer bins occupied.
if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
prefixesStack.push(byteString);
} else {
int binStart = minLengthByDepth[depthBin];
// Concatenate the subtrees of shorter length
ByteString newTree = prefixesStack.pop();
while (!prefixesStack.isEmpty()
&& prefixesStack.peek().size() < binStart) {
ByteString left = prefixesStack.pop();
newTree = new RopeByteString(left, newTree);
}
// Concatenate the given string
newTree = new RopeByteString(newTree, byteString);
// Continue concatenating until we land in an empty bin
while (!prefixesStack.isEmpty()) {
depthBin = getDepthBinForLength(newTree.size());
binEnd = minLengthByDepth[depthBin + 1];
if (prefixesStack.peek().size() < binEnd) {
ByteString left = prefixesStack.pop();
newTree = new RopeByteString(left, newTree);
} else {
break;
}
}
prefixesStack.push(newTree);
}
}
private int getDepthBinForLength(int length) {
int depth = Arrays.binarySearch(minLengthByDepth, length);
if (depth < 0) {
// It wasn't an exact match, so convert to the index of the containing
// fragment, which is one less even than the insertion point.
int insertionPoint = -(depth + 1);
depth = insertionPoint - 1;
}
return depth;
}
}
/**
* This class is a continuable tree traversal, which keeps the state
* information which would exist on the stack in a recursive traversal instead
* on a stack of "Bread Crumbs". The maximum depth of the stack in this
* iterator is the same as the depth of the tree being traversed.
*
* This iterator is used to implement
* {@link RopeByteString#equalsFragments(ByteString)}.
*/
private static class PieceIterator implements Iterator
* This method assumes that all error checking has already happened.
*
* Returns the actual number of bytes read or skipped.
*/
private int readSkipInternal(byte b[], int offset, int length) {
int bytesRemaining = length;
while (bytesRemaining > 0) {
advanceIfCurrentPieceFullyRead();
if (currentPiece == null) {
if (bytesRemaining == length) {
// We didn't manage to read anything
return -1;
}
break;
} else {
// Copy the bytes from this piece.
int currentPieceRemaining = currentPieceSize - currentPieceIndex;
int count = Math.min(currentPieceRemaining, bytesRemaining);
if (b != null) {
currentPiece.copyTo(b, currentPieceIndex, offset, count);
offset += count;
}
currentPieceIndex += count;
bytesRemaining -= count;
}
}
// Return the number of bytes read.
return length - bytesRemaining;
}
@Override
public int read() throws IOException {
advanceIfCurrentPieceFullyRead();
if (currentPiece == null) {
return -1;
} else {
return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
}
}
@Override
public int available() throws IOException {
int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
return RopeByteString.this.size() - bytesRead;
}
@Override
public boolean markSupported() {
return true;
}
@Override
public void mark(int readAheadLimit) {
// Set the mark to our position in the byte string
mark = currentPieceOffsetInRope + currentPieceIndex;
}
@Override
public synchronized void reset() {
// Just reinitialize and skip the specified number of bytes.
initialize();
readSkipInternal(null, 0, mark);
}
/** Common initialization code used by both the constructor and reset() */
private void initialize() {
pieceIterator = new PieceIterator(RopeByteString.this);
currentPiece = pieceIterator.next();
currentPieceSize = currentPiece.size();
currentPieceIndex = 0;
currentPieceOffsetInRope = 0;
}
/**
* Skips to the next piece if we have read all the data in the current
* piece. Sets currentPiece to null if we have reached the end of the
* input.
*/
private void advanceIfCurrentPieceFullyRead() {
if (currentPiece != null && currentPieceIndex == currentPieceSize) {
// Generally, we can only go through this loop at most once, since
// empty strings can't end up in a rope. But better to test.
currentPieceOffsetInRope += currentPieceSize;
currentPieceIndex = 0;
if (pieceIterator.hasNext()) {
currentPiece = pieceIterator.next();
currentPieceSize = currentPiece.size();
} else {
currentPiece = null;
currentPieceSize = 0;
}
}
}
}
}