diff options
-rw-r--r-- | objectivec/GPBCodedInputStream.m | 7 | ||||
-rw-r--r-- | objectivec/Tests/GPBCodedInputStreamTests.m | 49 |
2 files changed, 47 insertions, 9 deletions
diff --git a/objectivec/GPBCodedInputStream.m b/objectivec/GPBCodedInputStream.m index eaa28e50..319ec15b 100644 --- a/objectivec/GPBCodedInputStream.m +++ b/objectivec/GPBCodedInputStream.m @@ -219,15 +219,16 @@ NSString *GPBCodedInputStreamReadRetainedString( result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos] length:size encoding:NSUTF8StringEncoding]; + state->bufferPos += size; if (!result) { - result = @""; #ifdef DEBUG // https://developers.google.com/protocol-buffers/docs/proto#scalar - NSLog(@"UTF8 failure, is some field type 'string' when it should be " + NSLog(@"UTF-8 failure, is some field type 'string' when it should be " @"'bytes'?"); #endif + [NSException raise:NSParseErrorException + format:@"Invalid UTF-8 for a 'string'"]; } - state->bufferPos += size; } return result; } diff --git a/objectivec/Tests/GPBCodedInputStreamTests.m b/objectivec/Tests/GPBCodedInputStreamTests.m index b0e39d2c..cc402156 100644 --- a/objectivec/Tests/GPBCodedInputStreamTests.m +++ b/objectivec/Tests/GPBCodedInputStreamTests.m @@ -283,16 +283,53 @@ [output writeRawData:[NSData dataWithBytes:bytes length:sizeof(bytes)]]; [output flush]; - NSData* data = + NSData *data = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; GPBCodedInputStream* input = [GPBCodedInputStream streamWithData:data]; + NSError *error = nil; TestAllTypes* message = [TestAllTypes parseFromCodedInputStream:input extensionRegistry:nil - error:NULL]; - XCTAssertNotNil(message); - // Make sure we can read string properties twice without crashing. - XCTAssertEqual([message.defaultString length], (NSUInteger)0); - XCTAssertEqualObjects(@"", message.defaultString); + error:&error]; + XCTAssertNotNil(error); + XCTAssertNil(message); +} + +- (void)testBOMWithinStrings { + // We've seen servers that end up with BOMs within strings (not always at the + // start, and sometimes in multiple places), make sure they always parse + // correctly. (Again, this is inpart incase a custom string class is ever + // used again.) + const char* strs[] = { + "\xEF\xBB\xBF String with BOM", + "String with \xEF\xBB\xBF in middle", + "String with end bom \xEF\xBB\xBF", + "\xEF\xBB\xBF\xe2\x99\xa1", // BOM White Heart + "\xEF\xBB\xBF\xEF\xBB\xBF String with Two BOM", + }; + for (size_t i = 0; i < GPBARRAYSIZE(strs); ++i) { + NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory]; + GPBCodedOutputStream* output = + [GPBCodedOutputStream streamWithOutputStream:rawOutput]; + + int32_t tag = GPBWireFormatMakeTag(TestAllTypes_FieldNumber_DefaultString, + GPBWireFormatLengthDelimited); + [output writeRawVarint32:tag]; + size_t length = strlen(strs[i]); + [output writeRawVarint32:(int32_t)length]; + [output writeRawData:[NSData dataWithBytes:strs[i] length:length]]; + [output flush]; + + NSData* data = + [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; + GPBCodedInputStream* input = [GPBCodedInputStream streamWithData:data]; + TestAllTypes* message = [TestAllTypes parseFromCodedInputStream:input + extensionRegistry:nil + error:NULL]; + XCTAssertNotNil(message, @"Loop %zd", i); + // Ensure the string is there. NSString can consume the BOM in some + // cases, so don't actually check the string for exact equality. + XCTAssertTrue(message.defaultString.length > 0, @"Loop %zd", i); + } } @end |