diff options
author | Paul Yang <TeBoring@users.noreply.github.com> | 2016-09-15 11:09:01 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-09-15 11:09:01 -0700 |
commit | e0e54661f76183684dca66694967a60cbb10f04e (patch) | |
tree | 0a2a20f984705fdf6ef13de8829901b80f88efb5 /php/ext/google/protobuf/utf8.c | |
parent | 86fcd879b38505446799b2f2a2929415ddad620a (diff) | |
download | protobuf-e0e54661f76183684dca66694967a60cbb10f04e.tar.gz protobuf-e0e54661f76183684dca66694967a60cbb10f04e.tar.bz2 protobuf-e0e54661f76183684dca66694967a60cbb10f04e.zip |
Check in php implementation. (#2052)
This pull request includes two implementation: C extension and PHP
package. Both implementations support encode/decode of singular,
repeated and map fields.
Diffstat (limited to 'php/ext/google/protobuf/utf8.c')
-rw-r--r-- | php/ext/google/protobuf/utf8.c | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/php/ext/google/protobuf/utf8.c b/php/ext/google/protobuf/utf8.c new file mode 100644 index 00000000..a1541636 --- /dev/null +++ b/php/ext/google/protobuf/utf8.c @@ -0,0 +1,68 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <stdbool.h> +#include <stdint.h> + +#include "utf8.h" + +static const uint8_t utf8_offset[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +bool is_structurally_valid_utf8(const char* buf, int len) { + int i, j; + uint8_t offset; + + i = 0; + while (i < len) { + offset = utf8_offset[(uint8_t)buf[i]]; + if (offset == 0 || i + offset > len) { + return false; + } + for (j = i + 1; j < i + offset; j++) { + if (buf[j] & 0xc0 != 0x80) { + return false; + } + } + i += offset; + } + return i == len; +} |