From 7f372559cc31f15e8c30694ee7329bc0082454fe Mon Sep 17 00:00:00 2001 From: "xiaofeng@google.com" Date: Mon, 25 Feb 2013 10:39:39 +0000 Subject: Down-integrate from internal branch --- python/google/protobuf/internal/text_format_test.py | 20 ++++++++++++++++++++ python/google/protobuf/text_format.py | 9 +++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index 23b50eb5..4b1b4f59 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -429,6 +429,26 @@ class TextFormatTest(unittest.TestCase): ('1:17 : Couldn\'t parse integer: bork'), text_format.Merge, text, message) + def testMergeStringFieldUnescape(self): + message = unittest_pb2.TestAllTypes() + text = r'''repeated_string: "\xf\x62" + repeated_string: "\\xf\\x62" + repeated_string: "\\\xf\\\x62" + repeated_string: "\\\\xf\\\\x62" + repeated_string: "\\\\\xf\\\\\x62" + repeated_string: "\x5cx20"''' + text_format.Merge(text, message) + + SLASH = '\\' + self.assertEqual('\x0fb', message.repeated_string[0]) + self.assertEqual(SLASH + 'xf' + SLASH + 'x62', message.repeated_string[1]) + self.assertEqual(SLASH + '\x0f' + SLASH + 'b', message.repeated_string[2]) + self.assertEqual(SLASH + SLASH + 'xf' + SLASH + SLASH + 'x62', + message.repeated_string[3]) + self.assertEqual(SLASH + SLASH + '\x0f' + SLASH + SLASH + 'b', + message.repeated_string[4]) + self.assertEqual(SLASH + 'x20', message.repeated_string[5]) + def assertRaisesWithMessage(self, e_class, e, func, *args, **kwargs): """Same as assertRaises, but also compares the exception message.""" if hasattr(e_class, '__name__'): diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index 0714c39d..24dd07f2 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -608,12 +608,17 @@ def _CEscape(text, as_utf8): return "".join([escape(c) for c in text]) -_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])') +_CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])') def _CUnescape(text): def ReplaceHex(m): - return chr(int(m.group(0)[2:], 16)) + # Only replace the match if the number of leading back slashes is odd. i.e. + # the slash itself is not escaped. + if len(m.group(1)) & 1: + return m.group(1) + 'x0' + m.group(2) + return m.group(0) + # This is required because the 'string_escape' encoding doesn't # allow single-digit hex escapes (like '\xf'). result = _CUNESCAPE_HEX.sub(ReplaceHex, text) -- cgit v1.2.3