diff options
author | Thomas Van Lenten <thomasvl@google.com> | 2016-08-08 18:02:43 -0400 |
---|---|---|
committer | Thomas Van Lenten <thomasvl@google.com> | 2016-08-09 10:37:16 -0400 |
commit | 1a6c1d092df6431c86b74a059214ab76942b8b33 (patch) | |
tree | 8a2d418467e7bcf2985243f90aa1900441f70c56 /objectivec/Tests | |
parent | 237f321e338b50503eb38728afa6ad29bea6076a (diff) | |
download | protobuf-1a6c1d092df6431c86b74a059214ab76942b8b33.tar.gz protobuf-1a6c1d092df6431c86b74a059214ab76942b8b33.tar.bz2 protobuf-1a6c1d092df6431c86b74a059214ab76942b8b33.zip |
Never use strlen on utf8 runs so null characters work.
Fixes https://github.com/google/protobuf/issues/1933
Add a new test that forces strings into two different implementations from the
NSString class cluster to help confirm we're exercising both paths by which
CodedOutputStream will extract data from an NSString.
Move the old +load test (that was flawed because the behavior really depends on
the type of string from the NSString class cluster); into a unittest that
targets the specific case we're adding a behavior confirmation on.
As a bonus, improve the TextFormat generation of string characters < 0x20.
Diffstat (limited to 'objectivec/Tests')
-rw-r--r-- | objectivec/Tests/GPBCodedOuputStreamTests.m | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/objectivec/Tests/GPBCodedOuputStreamTests.m b/objectivec/Tests/GPBCodedOuputStreamTests.m index 0723b645..2ad326be 100644 --- a/objectivec/Tests/GPBCodedOuputStreamTests.m +++ b/objectivec/Tests/GPBCodedOuputStreamTests.m @@ -193,6 +193,32 @@ } } +- (void)assertWriteStringNoTag:(NSData*)data + value:(NSString *)value + context:(NSString *)contextMessage { + NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory]; + GPBCodedOutputStream* output = + [GPBCodedOutputStream streamWithOutputStream:rawOutput]; + [output writeStringNoTag:value]; + [output flush]; + + NSData* actual = + [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; + XCTAssertEqualObjects(data, actual, @"%@", contextMessage); + + // Try different block sizes. + for (int blockSize = 1; blockSize <= 16; blockSize *= 2) { + rawOutput = [NSOutputStream outputStreamToMemory]; + output = [GPBCodedOutputStream streamWithOutputStream:rawOutput + bufferSize:blockSize]; + [output writeStringNoTag:value]; + [output flush]; + + actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; + XCTAssertEqualObjects(data, actual, @"%@", contextMessage); + } +} + - (void)testWriteVarint1 { [self assertWriteVarint:bytes(0x00) value:0]; } @@ -337,4 +363,64 @@ XCTAssertEqualObjects(rawBytes, goldenData); } +- (void)testCFStringGetCStringPtrAndStringsWithNullChars { + // This test exists to verify that CFStrings with embedded NULLs still expose + // their raw buffer if they are backed by UTF8 storage. If this fails, the + // quick/direct access paths in GPBCodedOutputStream that depend on + // CFStringGetCStringPtr need to be re-evalutated (maybe just removed). + // And yes, we do get NULLs in strings from some servers. + + char zeroTest[] = "\0Test\0String"; + // Note: there is a \0 at the end of this since it is a c-string. + NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest + length:sizeof(zeroTest) + encoding:NSUTF8StringEncoding]; + const char *cString = + CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8); + XCTAssertTrue(cString != NULL); + // Again, if the above assert fails, then it means NSString no longer exposes + // the raw utf8 storage of a string created from utf8 input, so the code using + // CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take + // a different code path); but the optimizations for when + // CFStringGetCStringPtr does work could possibly go away. + + XCTAssertEqual(sizeof(zeroTest), + [asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]); + XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest))); + [asNSString release]; +} + +- (void)testWriteStringsWithZeroChar { + // Unicode allows `\0` as a character, and NSString is a class cluster, so + // there are a few different classes that could end up beind a given string. + // Historically, we've seen differences based on constant strings in code and + // strings built via the NSString apis. So this round trips them to ensure + // they are acting as expected. + + NSArray<NSString *> *strs = @[ + @"\0at start", + @"in\0middle", + @"at end\0", + ]; + int i = 0; + for (NSString *str in strs) { + NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding]; + NSMutableData *expected = [NSMutableData data]; + uint8_t lengthByte = (uint8_t)asUTF8.length; + [expected appendBytes:&lengthByte length:1]; + [expected appendData:asUTF8]; + + NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i]; + [self assertWriteStringNoTag:expected value:str context:context]; + + // Force a new string to be built which gets a different class from the + // NSString class cluster than the literal did. + NSString *str2 = [NSString stringWithFormat:@"%@", str]; + context = [NSString stringWithFormat:@"Loop %d - Built", i]; + [self assertWriteStringNoTag:expected value:str2 context:context]; + + ++i; + } +} + @end |