From ab09b2a2e203b204e11ac64750e62b3f1da7dc6e Mon Sep 17 00:00:00 2001 From: Jie Luo Date: Tue, 14 Aug 2018 11:17:16 -0700 Subject: Disable surrogate check for ucs2 (#5039) * _SURROGATE_PATTERN check for ucs4 Skip some test for ucs2 by sys.maxunicode --- python/google/protobuf/internal/decoder.py | 5 ++++- python/google/protobuf/internal/message_test.py | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py index 938f6293..d2ec4f7b 100755 --- a/python/google/protobuf/internal/decoder.py +++ b/python/google/protobuf/internal/decoder.py @@ -83,7 +83,9 @@ __author__ = 'kenton@google.com (Kenton Varda)' import struct import six +import sys +_UCS2_MAXUNICODE = 65535 if six.PY3: long = int else: @@ -550,7 +552,8 @@ def StringDecoder(field_number, is_repeated, is_packed, key, new_default, e.reason = '%s in field: %s' % (e, key.full_name) raise - if is_strict_utf8 and six.PY2: + if is_strict_utf8 and six.PY2 and sys.maxunicode > _UCS2_MAXUNICODE: + # Only do the check for python2 ucs4 when is_strict_utf8 enabled if _SURROGATE_PATTERN.search(value): reason = ('String field %s contains invalid UTF-8 data when parsing' 'a protocol buffer: surrogates not allowed. Use' diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 1a865398..ccb9221c 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -81,6 +81,7 @@ from google.protobuf.internal import testing_refleaks from google.protobuf import message from google.protobuf.internal import _parameterized +UCS2_MAXUNICODE = 65535 if six.PY3: long = int @@ -2209,7 +2210,9 @@ class Proto3Test(BaseTestCase): msg.map_int32_int32[35] = 64 msg.map_string_foreign_message['foo'].c = 5 self.assertEqual(0, len(msg.FindInitializationErrors())) - + + @unittest.skipIf(sys.maxunicode == UCS2_MAXUNICODE, + 'Skip for ucs2') def testStrictUtf8Check(self): # Test u'\ud801' is rejected at parser in both python2 and python3. serialized = (b'r\x03\xed\xa0\x81') @@ -2259,7 +2262,8 @@ class Proto3Test(BaseTestCase): unittest_proto3_arena_pb2.TestAllTypes( optional_string=u'\ud801\ud801') - @unittest.skipIf(six.PY3, 'Surrogates are rejected at setters in Python3') + @unittest.skipIf(six.PY3 or sys.maxunicode == UCS2_MAXUNICODE, + 'Surrogates are rejected at setters in Python3') def testSurrogatesInPython2(self): # Test optional_string=u'\ud801\udc01'. # surrogate pair is acceptable in python2. -- cgit v1.2.3