aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Van Lenten <thomasvl@google.com>2016-08-09 12:32:56 -0400
committerGitHub <noreply@github.com>2016-08-09 12:32:56 -0400
commit30bbbe99e220cb872f5d6c60f5156233bb408d79 (patch)
tree8a2d418467e7bcf2985243f90aa1900441f70c56
parent237f321e338b50503eb38728afa6ad29bea6076a (diff)
parent1a6c1d092df6431c86b74a059214ab76942b8b33 (diff)
downloadprotobuf-30bbbe99e220cb872f5d6c60f5156233bb408d79.tar.gz
protobuf-30bbbe99e220cb872f5d6c60f5156233bb408d79.tar.bz2
protobuf-30bbbe99e220cb872f5d6c60f5156233bb408d79.zip
Merge pull request #1934 from thomasvl/objc_strings_with_null
Never use strlen on utf8 runs so null characters work.
-rw-r--r--objectivec/GPBCodedOutputStream.m37
-rw-r--r--objectivec/GPBUtilities.m10
-rw-r--r--objectivec/Tests/GPBCodedOuputStreamTests.m86
3 files changed, 100 insertions, 33 deletions
diff --git a/objectivec/GPBCodedOutputStream.m b/objectivec/GPBCodedOutputStream.m
index 63ba8068..251a159c 100644
--- a/objectivec/GPBCodedOutputStream.m
+++ b/objectivec/GPBCodedOutputStream.m
@@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
}
-#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
-+ (void)load {
- // This test exists to verify that CFStrings with embedded NULLs will work
- // for us. If this Assert fails, all code below that depends on
- // CFStringGetCStringPtr will NOT work properly on strings that contain
- // embedded NULLs, and we do get that in some protobufs.
- // Note that this will not be compiled in release.
- // We didn't feel that just keeping it in a unit test was sufficient because
- // the Protobuf unit tests are only run when somebody is actually working
- // on protobufs.
- CFStringRef zeroTest = CFSTR("Test\0String");
- const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8);
- NSAssert(cString == NULL, @"Serious Error");
-}
-#endif // DEBUG && !defined(NS_BLOCK_ASSERTIONS)
-
- (void)dealloc {
[self flush];
[state_.output close];
@@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
}
- (void)writeStringNoTag:(const NSString *)value {
- // If you are concerned about embedded NULLs see the test in
- // +load above.
- const char *quickString =
- CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
- size_t length = (quickString != NULL)
- ? strlen(quickString)
- : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
+ size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
GPBWriteRawVarint32(&state_, (int32_t)length);
-
if (length == 0) {
return;
}
+ const char *quickString =
+ CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
+
// Fast path: Most strings are short, if the buffer already has space,
// add to it directly.
NSUInteger bufferBytesLeft = state_.size - state_.position;
@@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) {
}
size_t GPBComputeStringSizeNoTag(NSString *value) {
- // If you are concerned about embedded NULLs see the test in
- // +load above.
- const char *quickString =
- CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
- NSUInteger length =
- (quickString != NULL)
- ? strlen(quickString)
- : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
+ NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
return GPBComputeRawVarint32SizeForInteger(length) + length;
}
diff --git a/objectivec/GPBUtilities.m b/objectivec/GPBUtilities.m
index ee84fb45..ad876ed4 100644
--- a/objectivec/GPBUtilities.m
+++ b/objectivec/GPBUtilities.m
@@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) {
case '\'': [destStr appendString:@"\\\'"]; break;
case '\\': [destStr appendString:@"\\\\"]; break;
default:
- [destStr appendFormat:@"%C", aChar];
+ // This differs slightly from the C++ code in that the C++ doesn't
+ // generate UTF8; it looks at the string in UTF8, but escapes every
+ // byte > 0x7E.
+ if (aChar < 0x20) {
+ [destStr appendFormat:@"\\%d%d%d",
+ (aChar / 64), ((aChar % 64) / 8), (aChar % 8)];
+ } else {
+ [destStr appendFormat:@"%C", aChar];
+ }
break;
}
}
diff --git a/objectivec/Tests/GPBCodedOuputStreamTests.m b/objectivec/Tests/GPBCodedOuputStreamTests.m
index 0723b645..2ad326be 100644
--- a/objectivec/Tests/GPBCodedOuputStreamTests.m
+++ b/objectivec/Tests/GPBCodedOuputStreamTests.m
@@ -193,6 +193,32 @@
}
}
+- (void)assertWriteStringNoTag:(NSData*)data
+ value:(NSString *)value
+ context:(NSString *)contextMessage {
+ NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
+ GPBCodedOutputStream* output =
+ [GPBCodedOutputStream streamWithOutputStream:rawOutput];
+ [output writeStringNoTag:value];
+ [output flush];
+
+ NSData* actual =
+ [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+ XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+
+ // Try different block sizes.
+ for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
+ rawOutput = [NSOutputStream outputStreamToMemory];
+ output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
+ bufferSize:blockSize];
+ [output writeStringNoTag:value];
+ [output flush];
+
+ actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+ XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+ }
+}
+
- (void)testWriteVarint1 {
[self assertWriteVarint:bytes(0x00) value:0];
}
@@ -337,4 +363,64 @@
XCTAssertEqualObjects(rawBytes, goldenData);
}
+- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
+ // This test exists to verify that CFStrings with embedded NULLs still expose
+ // their raw buffer if they are backed by UTF8 storage. If this fails, the
+ // quick/direct access paths in GPBCodedOutputStream that depend on
+ // CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
+ // And yes, we do get NULLs in strings from some servers.
+
+ char zeroTest[] = "\0Test\0String";
+ // Note: there is a \0 at the end of this since it is a c-string.
+ NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
+ length:sizeof(zeroTest)
+ encoding:NSUTF8StringEncoding];
+ const char *cString =
+ CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
+ XCTAssertTrue(cString != NULL);
+ // Again, if the above assert fails, then it means NSString no longer exposes
+ // the raw utf8 storage of a string created from utf8 input, so the code using
+ // CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
+ // a different code path); but the optimizations for when
+ // CFStringGetCStringPtr does work could possibly go away.
+
+ XCTAssertEqual(sizeof(zeroTest),
+ [asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
+ XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
+ [asNSString release];
+}
+
+- (void)testWriteStringsWithZeroChar {
+ // Unicode allows `\0` as a character, and NSString is a class cluster, so
+ // there are a few different classes that could end up beind a given string.
+ // Historically, we've seen differences based on constant strings in code and
+ // strings built via the NSString apis. So this round trips them to ensure
+ // they are acting as expected.
+
+ NSArray<NSString *> *strs = @[
+ @"\0at start",
+ @"in\0middle",
+ @"at end\0",
+ ];
+ int i = 0;
+ for (NSString *str in strs) {
+ NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
+ NSMutableData *expected = [NSMutableData data];
+ uint8_t lengthByte = (uint8_t)asUTF8.length;
+ [expected appendBytes:&lengthByte length:1];
+ [expected appendData:asUTF8];
+
+ NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
+ [self assertWriteStringNoTag:expected value:str context:context];
+
+ // Force a new string to be built which gets a different class from the
+ // NSString class cluster than the literal did.
+ NSString *str2 = [NSString stringWithFormat:@"%@", str];
+ context = [NSString stringWithFormat:@"Loop %d - Built", i];
+ [self assertWriteStringNoTag:expected value:str2 context:context];
+
+ ++i;
+ }
+}
+
@end