aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2010-02-01 17:41:59 +0000
committerkenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2010-02-01 17:41:59 +0000
commitdaee05168eb0f2cf102b7ef80c0af87c80729612 (patch)
tree9315d4071b5a7c9dc8501efb8ddb1f1d0bf09b3b
parentab6950d75d6bae6a1574726ee1e7060e001ed23a (diff)
downloadprotobuf-daee05168eb0f2cf102b7ef80c0af87c80729612.tar.gz
protobuf-daee05168eb0f2cf102b7ef80c0af87c80729612.tar.bz2
protobuf-daee05168eb0f2cf102b7ef80c0af87c80729612.zip
Optimize Java string serialization. Patch from Evan Jones.
-rw-r--r--CHANGES.txt5
-rw-r--r--CONTRIBUTORS.txt2
-rw-r--r--java/src/main/java/com/google/protobuf/CodedOutputStream.java17
-rw-r--r--java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java24
-rw-r--r--src/google/protobuf/compiler/java/java_primitive_field.cc58
-rw-r--r--src/google/protobuf/compiler/java/java_primitive_field.h2
6 files changed, 99 insertions, 9 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 11b332ed..8262587b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,8 @@
+????-??-?? version 2.3.1:
+
+ Java
+ * Improved performance of string serialization.
+
2010-01-08 version 2.3.0:
General
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 6c002af9..7fe99ecb 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -80,6 +80,8 @@ Patch contributors:
* Fixes for Solaris 10 32/64-bit confusion.
Evan Jones <evanj@mit.edu>
* Optimize Java serialization code when writing a small message to a stream.
+ * Optimize Java serialization of strings so that UTF-8 encoding happens only
+ once per string per serialization call.
* Clean up some Java warnings.
Michael Kucharski <m.kucharski@gmail.com>
* Added CodedInputStream.getTotalBytesRead().
diff --git a/java/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
index 58dd1506..18da6d9d 100644
--- a/java/src/main/java/com/google/protobuf/CodedOutputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -193,6 +193,23 @@ public final class CodedOutputStream {
writeStringNoTag(value);
}
+ /**
+ * Write a {@code string} field, including tag, to the stream, where bytes
+ * is the encoded version of value. Used by the SPEED version of messages
+ * to avoid performing the UTF-8 conversion twice. bytes is simply a hint
+ * and may be null. If it is null, value will be converted as usual.
+ */
+ public void writeStringCached(final int fieldNumber, final String value,
+ ByteString bytes)
+ throws IOException {
+ // The cache can be null if serializing without getting the size first, or
+ // if there are multiple threads.
+ if (bytes == null) {
+ bytes = ByteString.copyFromUtf8(value);
+ }
+ writeBytes(fieldNumber, bytes);
+ }
+
/** Write a {@code group} field, including tag, to the stream. */
public void writeGroup(final int fieldNumber, final MessageLite value)
throws IOException {
diff --git a/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java b/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
index 48e54657..85691d60 100644
--- a/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
+++ b/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
@@ -36,6 +36,7 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -211,6 +212,29 @@ public class CodedOutputStreamTest extends TestCase {
0x9abcdef012345678L);
}
+ /** Test writing cached strings. */
+ public void testWriteStringCached() throws IOException {
+ final ByteArrayOutputStream output = new ByteArrayOutputStream();
+ final CodedOutputStream stream = CodedOutputStream.newInstance(output);
+
+ // Test writing a string that is not cached
+ stream.writeStringCached(5, "hello", null);
+ stream.flush();
+ CodedInputStream in = CodedInputStream.newInstance(output.toByteArray());
+ assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
+ in.readTag());
+ assertEquals("hello", in.readString());
+
+ // Write a cached string: the real string is ignored
+ output.reset();
+ stream.writeStringCached(5, "ignored", ByteString.copyFromUtf8("hello"));
+ stream.flush();
+ in = CodedInputStream.newInstance(output.toByteArray());
+ assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
+ in.readTag());
+ assertEquals("hello", in.readString());
+ }
+
/** Test encodeZigZag32() and encodeZigZag64(). */
public void testEncodeZigZag() throws Exception {
assertEquals(0, CodedOutputStream.encodeZigZag32( 0));
diff --git a/src/google/protobuf/compiler/java/java_primitive_field.cc b/src/google/protobuf/compiler/java/java_primitive_field.cc
index f6179bfa..d0fd081f 100644
--- a/src/google/protobuf/compiler/java/java_primitive_field.cc
+++ b/src/google/protobuf/compiler/java/java_primitive_field.cc
@@ -199,6 +199,14 @@ GenerateMembers(io::Printer* printer) const {
"private $type$ $name$_ = $default$;\n"
"public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
"public $type$ get$capitalized_name$() { return $name$_; }\n");
+ // Avoid double encoding for Java strings
+ // This field does not need to be volatile because ByteString is immutable.
+ // http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html#finalRight
+ // However, it seems better to be safe than sorry.
+ if (ShouldUseStringEncodingCache()) {
+ printer->Print(variables_,
+ "private volatile com.google.protobuf.ByteString $name$EncodedCache_;\n");
+ }
}
void PrimitiveFieldGenerator::
@@ -259,25 +267,57 @@ GenerateParsingCode(io::Printer* printer) const {
void PrimitiveFieldGenerator::
GenerateSerializationCode(io::Printer* printer) const {
- printer->Print(variables_,
- "if (has$capitalized_name$()) {\n"
- " output.write$capitalized_type$($number$, get$capitalized_name$());\n"
- "}\n");
+ if (ShouldUseStringEncodingCache()) {
+ // Pass the cached serialized version, then forget it.
+ // The cached version could be null if we didn't compute the size first,
+ // or if there are two threads attempting to serialize simultaneously.
+ // CodedOutputStream.writeStringCached handles this for us.
+ printer->Print(variables_,
+ "if (has$capitalized_name$()) {\n"
+ " output.write$capitalized_type$Cached($number$,\n"
+ " get$capitalized_name$(),\n"
+ " $name$EncodedCache_);\n"
+ " $name$EncodedCache_ = null;\n"
+ "}\n");
+ } else {
+ printer->Print(variables_,
+ "if (has$capitalized_name$()) {\n"
+ " output.write$capitalized_type$($number$, get$capitalized_name$());\n"
+ "}\n");
+ }
}
void PrimitiveFieldGenerator::
GenerateSerializedSizeCode(io::Printer* printer) const {
- printer->Print(variables_,
- "if (has$capitalized_name$()) {\n"
- " size += com.google.protobuf.CodedOutputStream\n"
- " .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
- "}\n");
+ // Avoid double encoding for strings: serialize the string here
+ if (ShouldUseStringEncodingCache()) {
+ printer->Print(variables_,
+ "if (has$capitalized_name$()) {\n"
+ " com.google.protobuf.ByteString serialized = \n"
+ " com.google.protobuf.ByteString.copyFromUtf8(\n"
+ " get$capitalized_name$());\n"
+ " $name$EncodedCache_ = serialized;\n"
+ " size += com.google.protobuf.CodedOutputStream\n"
+ " .computeBytesSize($number$, serialized);\n"
+ "}\n");
+ } else {
+ printer->Print(variables_,
+ "if (has$capitalized_name$()) {\n"
+ " size += com.google.protobuf.CodedOutputStream\n"
+ " .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
+ "}\n");
+ }
}
string PrimitiveFieldGenerator::GetBoxedType() const {
return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
}
+bool PrimitiveFieldGenerator::ShouldUseStringEncodingCache() const {
+ return GetType(descriptor_) == FieldDescriptor::TYPE_STRING &&
+ descriptor_->file()->options().optimize_for() == FileOptions::SPEED;
+}
+
// ===================================================================
RepeatedPrimitiveFieldGenerator::
diff --git a/src/google/protobuf/compiler/java/java_primitive_field.h b/src/google/protobuf/compiler/java/java_primitive_field.h
index 4e482a05..9d6c3d5f 100644
--- a/src/google/protobuf/compiler/java/java_primitive_field.h
+++ b/src/google/protobuf/compiler/java/java_primitive_field.h
@@ -62,6 +62,8 @@ class PrimitiveFieldGenerator : public FieldGenerator {
string GetBoxedType() const;
private:
+ bool ShouldUseStringEncodingCache() const;
+
const FieldDescriptor* descriptor_;
map<string, string> variables_;