aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJie Luo <anandolee@gmail.com>2018-08-14 11:17:16 -0700
committerGitHub <noreply@github.com>2018-08-14 11:17:16 -0700
commitab09b2a2e203b204e11ac64750e62b3f1da7dc6e (patch)
tree43454911c0c39a5394a90c2b521d4d311e59c636
parentfe2eef4bf414ebb352cf11bcec633f1fd46ec876 (diff)
downloadprotobuf-ab09b2a2e203b204e11ac64750e62b3f1da7dc6e.tar.gz
protobuf-ab09b2a2e203b204e11ac64750e62b3f1da7dc6e.tar.bz2
protobuf-ab09b2a2e203b204e11ac64750e62b3f1da7dc6e.zip
Disable surrogate check for ucs2 (#5039)
* _SURROGATE_PATTERN check for ucs4 Skip some test for ucs2 by sys.maxunicode
-rwxr-xr-xpython/google/protobuf/internal/decoder.py5
-rwxr-xr-xpython/google/protobuf/internal/message_test.py8
2 files changed, 10 insertions, 3 deletions
diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py
index 938f6293..d2ec4f7b 100755
--- a/python/google/protobuf/internal/decoder.py
+++ b/python/google/protobuf/internal/decoder.py
@@ -83,7 +83,9 @@ __author__ = 'kenton@google.com (Kenton Varda)'
import struct
import six
+import sys
+_UCS2_MAXUNICODE = 65535
if six.PY3:
long = int
else:
@@ -550,7 +552,8 @@ def StringDecoder(field_number, is_repeated, is_packed, key, new_default,
e.reason = '%s in field: %s' % (e, key.full_name)
raise
- if is_strict_utf8 and six.PY2:
+ if is_strict_utf8 and six.PY2 and sys.maxunicode > _UCS2_MAXUNICODE:
+ # Only do the check for python2 ucs4 when is_strict_utf8 enabled
if _SURROGATE_PATTERN.search(value):
reason = ('String field %s contains invalid UTF-8 data when parsing'
'a protocol buffer: surrogates not allowed. Use'
diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py
index 1a865398..ccb9221c 100755
--- a/python/google/protobuf/internal/message_test.py
+++ b/python/google/protobuf/internal/message_test.py
@@ -81,6 +81,7 @@ from google.protobuf.internal import testing_refleaks
from google.protobuf import message
from google.protobuf.internal import _parameterized
+UCS2_MAXUNICODE = 65535
if six.PY3:
long = int
@@ -2209,7 +2210,9 @@ class Proto3Test(BaseTestCase):
msg.map_int32_int32[35] = 64
msg.map_string_foreign_message['foo'].c = 5
self.assertEqual(0, len(msg.FindInitializationErrors()))
-
+
+ @unittest.skipIf(sys.maxunicode == UCS2_MAXUNICODE,
+ 'Skip for ucs2')
def testStrictUtf8Check(self):
# Test u'\ud801' is rejected at parser in both python2 and python3.
serialized = (b'r\x03\xed\xa0\x81')
@@ -2259,7 +2262,8 @@ class Proto3Test(BaseTestCase):
unittest_proto3_arena_pb2.TestAllTypes(
optional_string=u'\ud801\ud801')
- @unittest.skipIf(six.PY3, 'Surrogates are rejected at setters in Python3')
+ @unittest.skipIf(six.PY3 or sys.maxunicode == UCS2_MAXUNICODE,
+ 'Surrogates are rejected at setters in Python3')
def testSurrogatesInPython2(self):
# Test optional_string=u'\ud801\udc01'.
# surrogate pair is acceptable in python2.