diff options
Diffstat (limited to 'objectivec/GPBCodedInputStream.m')
-rw-r--r-- | objectivec/GPBCodedInputStream.m | 329 |
1 files changed, 11 insertions, 318 deletions
diff --git a/objectivec/GPBCodedInputStream.m b/objectivec/GPBCodedInputStream.m index 9371228a..fd877838 100644 --- a/objectivec/GPBCodedInputStream.m +++ b/objectivec/GPBCodedInputStream.m @@ -216,7 +216,17 @@ NSString *GPBCodedInputStreamReadRetainedString( result = @""; } else { CheckSize(state, size); - result = GPBCreateGPBStringWithUTF8(&state->bytes[state->bufferPos], size); + result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos] + length:size + encoding:NSUTF8StringEncoding]; + if (!result) { + result = @""; +#ifdef DEBUG + // https://developers.google.com/protocol-buffers/docs/proto#scalar + NSLog(@"UTF8 failure, is some field type 'string' when it should be " + @"'bytes'?"); +#endif + } state->bufferPos += size; } return result; @@ -478,320 +488,3 @@ void GPBCodedInputStreamCheckLastTagWas(GPBCodedInputStreamState *state, } @end - -@implementation GPBString { - @package - CFStringRef string_; - unsigned char *utf8_; - NSUInteger utf8Len_; - - // This lock is used to gate access to utf8_. Once GPBStringInitStringValue() - // has been called, string_ will be filled in, and utf8_ will be NULL. - OSSpinLock lock_; - - BOOL hasBOM_; - BOOL is7BitAscii_; -} - -// Returns true if the passed in bytes are 7 bit ascii. -// This routine needs to be fast. -static bool AreBytesIn7BitASCII(const uint8_t *bytes, NSUInteger len) { -// In the loops below, it's more efficient to collect rather than do -// conditional at every step. -#if __LP64__ - // Align bytes. This is especially important in case of 3 byte BOM. - while (len > 0 && ((size_t)bytes & 0x07)) { - if (*bytes++ & 0x80) return false; - len--; - } - while (len >= 32) { - uint64_t val = *(const uint64_t *)bytes; - uint64_t hiBits = (val & 0x8080808080808080ULL); - bytes += 8; - val = *(const uint64_t *)bytes; - hiBits |= (val & 0x8080808080808080ULL); - bytes += 8; - val = *(const uint64_t *)bytes; - hiBits |= (val & 0x8080808080808080ULL); - bytes += 8; - val = *(const uint64_t *)bytes; - if (hiBits | (val & 0x8080808080808080ULL)) return false; - bytes += 8; - len -= 32; - } - - while (len >= 16) { - uint64_t val = *(const uint64_t *)bytes; - uint64_t hiBits = (val & 0x8080808080808080ULL); - bytes += 8; - val = *(const uint64_t *)bytes; - if (hiBits | (val & 0x8080808080808080ULL)) return false; - bytes += 8; - len -= 16; - } - - while (len >= 8) { - uint64_t val = *(const uint64_t *)bytes; - if (val & 0x8080808080808080ULL) return false; - bytes += 8; - len -= 8; - } -#else // __LP64__ - // Align bytes. This is especially important in case of 3 byte BOM. - while (len > 0 && ((size_t)bytes & 0x03)) { - if (*bytes++ & 0x80) return false; - len--; - } - while (len >= 16) { - uint32_t val = *(const uint32_t *)bytes; - uint32_t hiBits = (val & 0x80808080U); - bytes += 4; - val = *(const uint32_t *)bytes; - hiBits |= (val & 0x80808080U); - bytes += 4; - val = *(const uint32_t *)bytes; - hiBits |= (val & 0x80808080U); - bytes += 4; - val = *(const uint32_t *)bytes; - if (hiBits | (val & 0x80808080U)) return false; - bytes += 4; - len -= 16; - } - - while (len >= 8) { - uint32_t val = *(const uint32_t *)bytes; - uint32_t hiBits = (val & 0x80808080U); - bytes += 4; - val = *(const uint32_t *)bytes; - if (hiBits | (val & 0x80808080U)) return false; - bytes += 4; - len -= 8; - } -#endif // __LP64__ - - while (len >= 4) { - uint32_t val = *(const uint32_t *)bytes; - if (val & 0x80808080U) return false; - bytes += 4; - len -= 4; - } - - while (len--) { - if (*bytes++ & 0x80) return false; - } - - return true; -} - -static void GPBStringInitStringValue(GPBString *string) { - OSSpinLockLock(&string->lock_); - GPBStringInitStringValueAlreadyLocked(string); - OSSpinLockUnlock(&string->lock_); -} - -static void GPBStringInitStringValueAlreadyLocked(GPBString *string) { - if (string->string_ == NULL && string->utf8_ != NULL) { - // Using kCFAllocatorMalloc for contentsDeallocator, as buffer in - // string->utf8_ is being handed off. - string->string_ = CFStringCreateWithBytesNoCopy( - NULL, string->utf8_, string->utf8Len_, kCFStringEncodingUTF8, false, - kCFAllocatorMalloc); - if (!string->string_) { -#ifdef DEBUG - // https://developers.google.com/protocol-buffers/docs/proto#scalar - NSLog(@"UTF8 failure, is some field type 'string' when it should be " - @"'bytes'?"); -#endif - string->string_ = CFSTR(""); - string->utf8Len_ = 0; - // On failure, we have to clean up the buffer. - free(string->utf8_); - } - string->utf8_ = NULL; - } -} - -GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length) { - GPBString *result = [[GPBString alloc] initWithBytes:bytes length:length]; - return result; -} - -- (instancetype)initWithBytes:(const void *)bytes length:(NSUInteger)length { - self = [super init]; - if (self) { - utf8_ = malloc(length); - memcpy(utf8_, bytes, length); - utf8Len_ = length; - lock_ = OS_SPINLOCK_INIT; - is7BitAscii_ = AreBytesIn7BitASCII(bytes, length); - if (length >= 3 && memcmp(utf8_, "\xef\xbb\xbf", 3) == 0) { - // We can't just remove the BOM from the string here, because in the case - // where we have > 1 BOM at the beginning of the string, we will remove one, - // and the internal NSString we create will remove the next one, and we will - // end up with a GPBString != NSString issue. - // We also just can't remove all the BOMs because then we would end up with - // potential cases where a GPBString and an NSString made with the same - // UTF8 buffer would in fact be different. - // We record the fact we have a BOM, and use it as necessary to simulate - // what NSString would return for various calls. - hasBOM_ = YES; -#if DEBUG - // Sending BOMs across the line is just wasting bits. - NSLog(@"Bad data? String should not have BOM!"); -#endif // DEBUG - } - } - return self; -} - -- (void)dealloc { - if (string_ != NULL) { - CFRelease(string_); - } - if (utf8_ != NULL) { - free(utf8_); - } - [super dealloc]; -} - -// Required NSString overrides. -- (NSUInteger)length { - if (is7BitAscii_) { - return utf8Len_; - } else { - GPBStringInitStringValue(self); - return CFStringGetLength(string_); - } -} - -- (unichar)characterAtIndex:(NSUInteger)anIndex { - OSSpinLockLock(&lock_); - if (is7BitAscii_ && utf8_) { - unichar result = utf8_[anIndex]; - OSSpinLockUnlock(&lock_); - return result; - } else { - GPBStringInitStringValueAlreadyLocked(self); - OSSpinLockUnlock(&lock_); - return CFStringGetCharacterAtIndex(string_, anIndex); - } -} - -// Override a couple of methods that typically want high performance. - -- (id)copyWithZone:(NSZone *)zone { - GPBStringInitStringValue(self); - return [(NSString *)string_ copyWithZone:zone]; -} - -- (id)mutableCopyWithZone:(NSZone *)zone { - GPBStringInitStringValue(self); - return [(NSString *)string_ mutableCopyWithZone:zone]; -} - -- (NSUInteger)hash { - // Must convert to string here to make sure that the hash is always - // consistent no matter what state the GPBString is in. - GPBStringInitStringValue(self); - return CFHash(string_); -} - -- (BOOL)isEqual:(id)object { - if (self == object) { - return YES; - } - if ([object isKindOfClass:[NSString class]]) { - GPBStringInitStringValue(self); - return CFStringCompare(string_, (CFStringRef)object, 0) == - kCFCompareEqualTo; - } - return NO; -} - -- (void)getCharacters:(unichar *)buffer range:(NSRange)aRange { - OSSpinLockLock(&lock_); - if (is7BitAscii_ && utf8_) { - unsigned char *bytes = &(utf8_[aRange.location]); - for (NSUInteger i = 0; i < aRange.length; ++i) { - buffer[i] = bytes[i]; - } - OSSpinLockUnlock(&lock_); - } else { - GPBStringInitStringValueAlreadyLocked(self); - OSSpinLockUnlock(&lock_); - CFStringGetCharacters(string_, CFRangeMake(aRange.location, aRange.length), - buffer); - } -} - -- (NSUInteger)lengthOfBytesUsingEncoding:(NSStringEncoding)encoding { - if ((encoding == NSUTF8StringEncoding) || - (encoding == NSASCIIStringEncoding && is7BitAscii_)) { - return utf8Len_ - (hasBOM_ ? 3 : 0); - } else { - GPBStringInitStringValue(self); - return [(NSString *)string_ lengthOfBytesUsingEncoding:encoding]; - } -} - -- (BOOL)getBytes:(void *)buffer - maxLength:(NSUInteger)maxLength - usedLength:(NSUInteger *)usedLength - encoding:(NSStringEncoding)encoding - options:(NSStringEncodingConversionOptions)options - range:(NSRange)range - remainingRange:(NSRangePointer)remainingRange { - // [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange] - // does not return reliable results if the maxLength argument is 0 - // (Radar 16385183). Therefore we have special cased it as a slow case so - // that it behaves however Apple intends it to behave. It should be a rare - // case. - // - // [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange] - // does not return reliable results if the range is outside of the strings - // length (Radar 16396177). Therefore we have special cased it as a slow - // case so that it behaves however Apple intends it to behave. It should - // be a rare case. - // - // We can optimize the UTF8StringEncoding and NSASCIIStringEncoding with no - // options cases. - if ((options == 0) && - (encoding == NSUTF8StringEncoding || encoding == NSASCIIStringEncoding) && - (maxLength != 0) && - (NSMaxRange(range) <= utf8Len_)) { - // Might be able to optimize it. - OSSpinLockLock(&lock_); - if (is7BitAscii_ && utf8_) { - NSUInteger length = range.length; - length = (length < maxLength) ? length : maxLength; - memcpy(buffer, utf8_ + range.location, length); - if (usedLength) { - *usedLength = length; - } - if (remainingRange) { - remainingRange->location = range.location + length; - remainingRange->length = range.length - length; - } - OSSpinLockUnlock(&lock_); - if (length > 0) { - return YES; - } else { - return NO; - } - } else { - GPBStringInitStringValueAlreadyLocked(self); - OSSpinLockUnlock(&lock_); - } - } else { - GPBStringInitStringValue(self); - } - return [(NSString *)string_ getBytes:buffer - maxLength:maxLength - usedLength:usedLength - encoding:encoding - options:options - range:range - remainingRange:remainingRange]; -} - -@end |