diff options
-rw-r--r-- | conformance/failure_list_csharp.txt | 4 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf.Test/project.json | 2 | ||||
-rw-r--r-- | jenkins/docker/Dockerfile | 2 | ||||
-rw-r--r-- | js/binary/decoder.js | 37 | ||||
-rw-r--r-- | js/binary/decoder_test.js | 23 | ||||
-rw-r--r-- | js/binary/encoder.js | 29 | ||||
-rw-r--r-- | js/package.json | 2 | ||||
-rw-r--r-- | python/README.md | 2 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/message.c | 44 | ||||
-rw-r--r-- | ruby/tests/basic.rb | 10 | ||||
-rwxr-xr-x | src/google/protobuf/compiler/js/js_generator.cc | 2 | ||||
-rw-r--r-- | src/google/protobuf/message_lite.cc | 1 |
12 files changed, 135 insertions, 23 deletions
diff --git a/conformance/failure_list_csharp.txt b/conformance/failure_list_csharp.txt index 84f46fab..a43519cd 100644 --- a/conformance/failure_list_csharp.txt +++ b/conformance/failure_list_csharp.txt @@ -1,2 +1,6 @@ Recommended.JsonInput.FieldNameWithDoubleUnderscores.JsonOutput +Recommended.JsonInput.FieldNameWithDoubleUnderscores.ProtobufOutput +Recommended.JsonInput.FieldNameWithDoubleUnderscores.Validator +Required.JsonInput.FieldNameInLowerCamelCase.Validator Required.JsonInput.FieldNameInSnakeCase.JsonOutput +Required.JsonInput.FieldNameInSnakeCase.ProtobufOutput diff --git a/csharp/src/Google.Protobuf.Test/project.json b/csharp/src/Google.Protobuf.Test/project.json index 87b732c9..3a73bf7a 100644 --- a/csharp/src/Google.Protobuf.Test/project.json +++ b/csharp/src/Google.Protobuf.Test/project.json @@ -21,7 +21,7 @@ "dependencies": { "Google.Protobuf": { "target": "project" }, "NUnit": "3.4.0", - "dotnet-test-nunit": "3.4.0-alpha-2", + "dotnet-test-nunit": "3.4.0-alpha-2" }, "testRunner": "nunit", diff --git a/jenkins/docker/Dockerfile b/jenkins/docker/Dockerfile index 919a2b61..3f4725b2 100644 --- a/jenkins/docker/Dockerfile +++ b/jenkins/docker/Dockerfile @@ -132,7 +132,7 @@ ENV MVN mvn --batch-mode RUN cd /tmp && \ git clone https://github.com/google/protobuf.git && \ cd protobuf && \ - git reset 734930f9197b7bc97c3c794c7a949fee2a08c280 && \ + git reset bf379715c93b581eeb078cec1f0dd8a7d79df431 && \ ./autogen.sh && \ ./configure && \ make -j4 && \ diff --git a/js/binary/decoder.js b/js/binary/decoder.js index 41094a36..040cf715 100644 --- a/js/binary/decoder.js +++ b/js/binary/decoder.js @@ -895,11 +895,9 @@ jspb.BinaryDecoder.prototype.readEnum = function() { /** * Reads and parses a UTF-8 encoded unicode string from the stream. - * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with - * the exception that the implementation here does not get confused if it - * encounters characters longer than three bytes. These characters are ignored - * though, as they are extremely rare: three UTF-8 bytes cover virtually all - * characters in common use (http://en.wikipedia.org/wiki/UTF-8). + * The code is inspired by maps.vectortown.parse.StreamedDataViewReader. + * Supports codepoints from U+0000 up to U+10FFFF. + * (http://en.wikipedia.org/wiki/UTF-8). * @param {number} length The length of the string to read. * @return {string} The decoded string. */ @@ -907,30 +905,45 @@ jspb.BinaryDecoder.prototype.readString = function(length) { var bytes = this.bytes_; var cursor = this.cursor_; var end = cursor + length; - var chars = []; + var codeUnits = []; while (cursor < end) { var c = bytes[cursor++]; if (c < 128) { // Regular 7-bit ASCII. - chars.push(c); + codeUnits.push(c); } else if (c < 192) { // UTF-8 continuation mark. We are out of sync. This // might happen if we attempted to read a character - // with more than three bytes. + // with more than four bytes. continue; } else if (c < 224) { // UTF-8 with two bytes. var c2 = bytes[cursor++]; - chars.push(((c & 31) << 6) | (c2 & 63)); + codeUnits.push(((c & 31) << 6) | (c2 & 63)); } else if (c < 240) { // UTF-8 with three bytes. var c2 = bytes[cursor++]; var c3 = bytes[cursor++]; - chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); + codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); + } else if (c < 248) { // UTF-8 with 4 bytes. + var c2 = bytes[cursor++]; + var c3 = bytes[cursor++]; + var c4 = bytes[cursor++]; + // Characters written on 4 bytes have 21 bits for a codepoint. + // We can't fit that on 16bit characters, so we use surrogates. + var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63); + // Surrogates formula from wikipedia. + // 1. Subtract 0x10000 from codepoint + codepoint -= 0x10000; + // 2. Split this into the high 10-bit value and the low 10-bit value + // 3. Add 0xD800 to the high value to form the high surrogate + // 4. Add 0xDC00 to the low value to form the low surrogate: + var low = (codepoint & 1023) + 0xDC00; + var high = ((codepoint >> 10) & 1023) + 0xD800; + codeUnits.push(high, low) } } - // String.fromCharCode.apply is faster than manually appending characters on // Chrome 25+, and generates no additional cons string garbage. - var result = String.fromCharCode.apply(null, chars); + var result = String.fromCharCode.apply(null, codeUnits); this.cursor_ = cursor; return result; }; diff --git a/js/binary/decoder_test.js b/js/binary/decoder_test.js index ac312648..9f947b99 100644 --- a/js/binary/decoder_test.js +++ b/js/binary/decoder_test.js @@ -209,7 +209,30 @@ describe('binaryDecoderTest', function() { assertEquals(hashC, decoder.readFixedHash64()); assertEquals(hashD, decoder.readFixedHash64()); }); + + /** + * Test encoding and decoding utf-8. + */ + it('testUtf8', function() { + var encoder = new jspb.BinaryEncoder(); + var ascii = "ASCII should work in 3, 2, 1..." + var utf8_two_bytes = "©"; + var utf8_three_bytes = "❄"; + var utf8_four_bytes = "😁"; + + encoder.writeString(ascii); + encoder.writeString(utf8_two_bytes); + encoder.writeString(utf8_three_bytes); + encoder.writeString(utf8_four_bytes); + + var decoder = jspb.BinaryDecoder.alloc(encoder.end()); + + assertEquals(ascii, decoder.readString(ascii.length)); + assertEquals(utf8_two_bytes, decoder.readString(utf8_two_bytes.length)); + assertEquals(utf8_three_bytes, decoder.readString(utf8_three_bytes.length)); + assertEquals(utf8_four_bytes, decoder.readString(utf8_four_bytes.length)); + }); /** * Verifies that misuse of the decoder class triggers assertions. diff --git a/js/binary/encoder.js b/js/binary/encoder.js index c9b0c2ae..d952d714 100644 --- a/js/binary/encoder.js +++ b/js/binary/encoder.js @@ -409,19 +409,36 @@ jspb.BinaryEncoder.prototype.writeFixedHash64 = function(hash) { */ jspb.BinaryEncoder.prototype.writeString = function(value) { var oldLength = this.buffer_.length; - - // UTF16 to UTF8 conversion loop swiped from goog.crypt.stringToUtf8ByteArray. + for (var i = 0; i < value.length; i++) { + var c = value.charCodeAt(i); + if (c < 128) { this.buffer_.push(c); } else if (c < 2048) { this.buffer_.push((c >> 6) | 192); this.buffer_.push((c & 63) | 128); - } else { - this.buffer_.push((c >> 12) | 224); - this.buffer_.push(((c >> 6) & 63) | 128); - this.buffer_.push((c & 63) | 128); + } else if (c < 65536) { + // Look for surrogates + if (c >= 0xD800 && c <= 0xDBFF && i + 1 < value.length) { + var second = value.charCodeAt(i + 1); + if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + c = (c - 0xD800) * 0x400 + second - 0xDC00 + 0x10000; + + this.buffer_.push((c >> 18) | 240); + this.buffer_.push(((c >> 12) & 63 ) | 128); + this.buffer_.push(((c >> 6) & 63) | 128); + this.buffer_.push((c & 63) | 128); + i++; + } + } + else { + this.buffer_.push((c >> 12) | 224); + this.buffer_.push(((c >> 6) & 63) | 128); + this.buffer_.push((c & 63) | 128); + } } } diff --git a/js/package.json b/js/package.json index ee4181ff..d3c77eb6 100644 --- a/js/package.json +++ b/js/package.json @@ -22,5 +22,5 @@ "url": "https://github.com/google/protobuf/tree/master/js" }, "author": "Google Protocol Buffers Team", - "license": "Apache-2.0" + "license" : "BSD-3-Clause" } diff --git a/python/README.md b/python/README.md index 57acfd94..8f3db785 100644 --- a/python/README.md +++ b/python/README.md @@ -92,7 +92,7 @@ Installation error: "sem_init: Resource temporarily unavailable". This appears to be a bug either in Cygwin or in Python: http://www.cygwin.com/ml/cygwin/2005-07/msg01378.html - We do not know if or when it might me fixed. We also do not know + We do not know if or when it might be fixed. We also do not know how likely it is that this bug will affect users in practice. 5) Install: diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index e16250f3..837a974b 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -178,6 +178,45 @@ VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { } } +VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) { + MessageHeader* self; + VALUE method_name, method_str; + char* name; + size_t name_len; + bool setter; + const upb_oneofdef* o; + const upb_fielddef* f; + + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + if (argc < 1) { + rb_raise(rb_eArgError, "Expected method name as first argument."); + } + method_name = argv[0]; + if (!SYMBOL_P(method_name)) { + rb_raise(rb_eArgError, "Expected symbol as method name."); + } + method_str = rb_id2str(SYM2ID(method_name)); + name = RSTRING_PTR(method_str); + name_len = RSTRING_LEN(method_str); + setter = false; + + // Setters have names that end in '='. + if (name[name_len - 1] == '=') { + setter = true; + name_len--; + } + + // See if this name corresponds to either a oneof or field in this message. + if (!upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, &f, + &o)) { + return rb_call_super(argc, argv); + } + if (o != NULL) { + return setter ? Qfalse : Qtrue; + } + return Qtrue; +} + int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { MessageHeader* self; VALUE method_str; @@ -305,6 +344,9 @@ VALUE Message_deep_copy(VALUE _self) { VALUE Message_eq(VALUE _self, VALUE _other) { MessageHeader* self; MessageHeader* other; + if (TYPE(_self) != TYPE(_other)) { + return Qfalse; + } TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); TypedData_Get_Struct(_other, MessageHeader, &Message_type, other); @@ -459,6 +501,8 @@ VALUE build_class_from_descriptor(Descriptor* desc) { rb_define_method(klass, "method_missing", Message_method_missing, -1); + rb_define_method(klass, "respond_to_missing?", + Message_respond_to_missing, -1); rb_define_method(klass, "initialize", Message_initialize, -1); rb_define_method(klass, "dup", Message_dup, 0); // Also define #clone so that we don't inherit Object#clone. diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index 989a047e..967ff81f 100644 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -1181,5 +1181,15 @@ module BasicTest m2 = MapMessage.decode_json(MapMessage.encode_json(m)) assert m == m2 end + + def test_comparison_with_arbitrary_object + assert_false MapMessage.new == nil + end + + def test_respond_to + msg = MapMessage.new + assert msg.respond_to?(:map_string_int32) + assert_false msg.respond_to?(:bacon) + end end end diff --git a/src/google/protobuf/compiler/js/js_generator.cc b/src/google/protobuf/compiler/js/js_generator.cc index 58597b4c..47ecc537 100755 --- a/src/google/protobuf/compiler/js/js_generator.cc +++ b/src/google/protobuf/compiler/js/js_generator.cc @@ -28,7 +28,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "google/protobuf/compiler/js/js_generator.h" +#include <google/protobuf/compiler/js/js_generator.h> #include <assert.h> #include <algorithm> diff --git a/src/google/protobuf/message_lite.cc b/src/google/protobuf/message_lite.cc index 9d6da264..1c6786db 100644 --- a/src/google/protobuf/message_lite.cc +++ b/src/google/protobuf/message_lite.cc @@ -33,6 +33,7 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. +#include <climits> #include <google/protobuf/arena.h> #include <google/protobuf/generated_message_util.h> #include <google/protobuf/message_lite.h> |