From fd1a3ff11d5854c34ba66c63598cdc5fd234e399 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 6 Jan 2015 15:44:09 -0800 Subject: Support for maps in the MRI C Ruby extension. This adds the Map container and support for parsing and serializing maps in the protobuf wire format (as defined by the C++ implementation, with MapEntry submessages in a repeated field). JSON map serialization/parsing are not yet supported as these will require some changes to upb as well. --- ruby/ext/google/protobuf_c/defs.c | 110 +++- ruby/ext/google/protobuf_c/encode_decode.c | 386 +++++++++--- ruby/ext/google/protobuf_c/extconf.rb | 2 +- ruby/ext/google/protobuf_c/map.c | 883 ++++++++++++++++++++++++++++ ruby/ext/google/protobuf_c/message.c | 15 +- ruby/ext/google/protobuf_c/protobuf.c | 1 + ruby/ext/google/protobuf_c/protobuf.h | 69 ++- ruby/ext/google/protobuf_c/repeated_field.c | 6 +- ruby/ext/google/protobuf_c/storage.c | 173 ++++-- ruby/ext/google/protobuf_c/upb.c | 75 ++- ruby/ext/google/protobuf_c/upb.h | 58 +- 11 files changed, 1617 insertions(+), 161 deletions(-) create mode 100644 ruby/ext/google/protobuf_c/map.c (limited to 'ruby/ext') diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index bb6f10e1..5c94a74a 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -923,6 +923,7 @@ DEFINE_CLASS(MessageBuilderContext, void MessageBuilderContext_mark(void* _self) { MessageBuilderContext* self = _self; rb_gc_mark(self->descriptor); + rb_gc_mark(self->builder); } void MessageBuilderContext_free(void* _self) { @@ -935,6 +936,7 @@ VALUE MessageBuilderContext_alloc(VALUE klass) { VALUE ret = TypedData_Wrap_Struct( klass, &_MessageBuilderContext_type, self); self->descriptor = Qnil; + self->builder = Qnil; return ret; } @@ -943,24 +945,29 @@ void MessageBuilderContext_register(VALUE module) { module, "MessageBuilderContext", rb_cObject); rb_define_alloc_func(klass, MessageBuilderContext_alloc); rb_define_method(klass, "initialize", - MessageBuilderContext_initialize, 1); + MessageBuilderContext_initialize, 2); rb_define_method(klass, "optional", MessageBuilderContext_optional, -1); rb_define_method(klass, "required", MessageBuilderContext_required, -1); rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); + rb_define_method(klass, "map", MessageBuilderContext_map, -1); cMessageBuilderContext = klass; rb_gc_register_address(&cMessageBuilderContext); } /* * call-seq: - * MessageBuilderContext.new(desc) => context + * MessageBuilderContext.new(desc, builder) => context * - * Create a new builder context around the given message descriptor. This class - * is intended to serve as a DSL context to be used with #instance_eval. + * Create a new message builder context around the given message descriptor and + * builder context. This class is intended to serve as a DSL context to be used + * with #instance_eval. */ -VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) { +VALUE MessageBuilderContext_initialize(VALUE _self, + VALUE msgdef, + VALUE builder) { DEFINE_SELF(MessageBuilderContext, self, _self); self->descriptor = msgdef; + self->builder = builder; return Qnil; } @@ -1065,6 +1072,96 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { name, type, number, type_class); } +/* + * call-seq: + * MessageBuilderContext.map(name, key_type, value_type, number, + * value_type_class = nil) + * + * Defines a new map field on this message type with the given key and value types, tag + * number, and type class (for message and enum value types). The key type must + * be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type type must + * be a Ruby symbol (as accepted by FieldDescriptor#type=) and the type_class + * must be a string, if present (as accepted by FieldDescriptor#submsg_name=). + */ +VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { + DEFINE_SELF(MessageBuilderContext, self, _self); + + if (argc < 4) { + rb_raise(rb_eArgError, "Expected at least 4 arguments."); + } + VALUE name = argv[0]; + VALUE key_type = argv[1]; + VALUE value_type = argv[2]; + VALUE number = argv[3]; + VALUE type_class = (argc > 4) ? argv[4] : Qnil; + + // Validate the key type. We can't accept enums, messages, or floats/doubles + // as map keys. (We exclude these explicitly, and the field-descriptor setter + // below then ensures that the type is one of the remaining valid options.) + if (SYM2ID(key_type) == rb_intern("float") || + SYM2ID(key_type) == rb_intern("double") || + SYM2ID(key_type) == rb_intern("enum") || + SYM2ID(key_type) == rb_intern("message")) { + rb_raise(rb_eArgError, + "Cannot add a map field with a float, double, enum, or message " + "type."); + } + + // Create a new message descriptor for the map entry message, and create a + // repeated submessage field here with that type. + VALUE mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor); + VALUE mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0); + mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_"); + mapentry_desc_name = rb_str_cat2(mapentry_desc_name, + rb_id2name(SYM2ID(name))); + Descriptor_name_set(mapentry_desc, mapentry_desc_name); + + // The 'mapentry' attribute has no Ruby setter because we do not want the user + // attempting to DIY the setup below; we want to ensure that the fields are + // correct. So we reach into the msgdef here to set the bit manually. + Descriptor* mapentry_desc_self = ruby_to_Descriptor(mapentry_desc); + upb_msgdef_setmapentry((upb_msgdef*)mapentry_desc_self->msgdef, true); + + // optional key = 1; + VALUE key_field = rb_class_new_instance(0, NULL, cFieldDescriptor); + FieldDescriptor_name_set(key_field, rb_str_new2("key")); + FieldDescriptor_label_set(key_field, ID2SYM(rb_intern("optional"))); + FieldDescriptor_number_set(key_field, INT2NUM(1)); + FieldDescriptor_type_set(key_field, key_type); + Descriptor_add_field(mapentry_desc, key_field); + + // optional value = 2; + VALUE value_field = rb_class_new_instance(0, NULL, cFieldDescriptor); + FieldDescriptor_name_set(value_field, rb_str_new2("value")); + FieldDescriptor_label_set(value_field, ID2SYM(rb_intern("optional"))); + FieldDescriptor_number_set(value_field, INT2NUM(2)); + FieldDescriptor_type_set(value_field, value_type); + if (type_class != Qnil) { + VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute. + submsg_name = rb_str_append(submsg_name, type_class); + FieldDescriptor_submsg_name_set(value_field, submsg_name); + } + Descriptor_add_field(mapentry_desc, value_field); + + // Add the map-entry message type to the current builder, and use the type to + // create the map field itself. + Builder* builder_self = ruby_to_Builder(self->builder); + rb_ary_push(builder_self->pending_list, mapentry_desc); + + VALUE map_field = rb_class_new_instance(0, NULL, cFieldDescriptor); + VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name))); + FieldDescriptor_name_set(map_field, name_str); + FieldDescriptor_number_set(map_field, number); + FieldDescriptor_label_set(map_field, ID2SYM(rb_intern("repeated"))); + FieldDescriptor_type_set(map_field, ID2SYM(rb_intern("message"))); + VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute. + submsg_name = rb_str_append(submsg_name, mapentry_desc_name); + FieldDescriptor_submsg_name_set(map_field, submsg_name); + Descriptor_add_field(self->descriptor, map_field); + + return Qnil; +} + // ----------------------------------------------------------------------------- // EnumBuilderContext. // ----------------------------------------------------------------------------- @@ -1190,7 +1287,8 @@ void Builder_register(VALUE module) { VALUE Builder_add_message(VALUE _self, VALUE name) { DEFINE_SELF(Builder, self, _self); VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor); - VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext); + VALUE args[2] = { msgdef, _self }; + VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext); VALUE block = rb_block_proc(); rb_funcall(msgdef, rb_intern("name="), 1, name); rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 8aba3c9e..6263edcc 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -174,9 +174,222 @@ static void *submsg_handler(void *closure, const void *hd) { return submsg; } +// Handler data for startmap/endmap handlers. +typedef struct { + size_t ofs; + const upb_fielddef* key_field; + const upb_fielddef* value_field; + VALUE value_field_typeclass; +} map_handlerdata_t; + +// Temporary frame for map parsing: at the beginning of a map entry message, a +// submsg handler allocates a frame to hold (i) a reference to the Map object +// into which this message will be inserted and (ii) storage slots to +// temporarily hold the key and value for this map entry until the end of the +// submessage. When the submessage ends, another handler is called to insert the +// value into the map. +typedef struct { + VALUE map; + char key_storage[NATIVE_SLOT_MAX_SIZE]; + char value_storage[NATIVE_SLOT_MAX_SIZE]; +} map_parse_frame_t; + +// Handler to begin a sequence of map entries: simple no-op that exists only to +// set context for the map entry handlers. +static void *startmap_handler(void *closure, const void *hd) { + return closure; +} + +// Handler to begin a map entry: allocates a temporary frame. This is the +// 'startsubmsg' handler on the msgdef that contains the map field. +static void *startmapentry_handler(void *closure, const void *hd) { + MessageHeader* msg = closure; + const map_handlerdata_t* mapdata = hd; + VALUE map_rb = DEREF(Message_data(msg), mapdata->ofs, VALUE); + + map_parse_frame_t* frame = ALLOC(map_parse_frame_t); + frame->map = map_rb; + + native_slot_init(upb_fielddef_type(mapdata->key_field), + &frame->key_storage); + native_slot_init(upb_fielddef_type(mapdata->value_field), + &frame->value_storage); + + return frame; +} + +// Handler to end a map entry: inserts the value defined during the message into +// the map. This is the 'endmsg' handler on the map entry msgdef. +static bool endmap_handler(void *closure, const void *hd, upb_status* s) { + map_parse_frame_t* frame = closure; + const map_handlerdata_t* mapdata = hd; + + VALUE key = native_slot_get( + upb_fielddef_type(mapdata->key_field), Qnil, + &frame->key_storage); + VALUE value = native_slot_get( + upb_fielddef_type(mapdata->value_field), mapdata->value_field_typeclass, + &frame->value_storage); + + Map_index_set(frame->map, key, value); + free(frame); + + return true; +} + +// Allocates a new map_handlerdata_t given the map entry message definition. If +// the offset of the field within the parent message is also given, that is +// added to the handler data as well. Note that this is called *twice* per map +// field: once in the parent message handler setup when setting the startsubmsg +// handler and once in the map entry message handler setup when setting the +// key/value and endmsg handlers. The reason is that there is no easy way to +// pass the handlerdata down to the sub-message handler setup. +static map_handlerdata_t* new_map_handlerdata( + size_t ofs, + const upb_msgdef* mapentry_def) { + + map_handlerdata_t* hd = ALLOC(map_handlerdata_t); + hd->ofs = ofs; + hd->key_field = upb_msgdef_itof(mapentry_def, 1); + assert(hd->key_field != NULL); + hd->value_field = upb_msgdef_itof(mapentry_def, 2); + assert(hd->value_field != NULL); + hd->value_field_typeclass = field_type_class(hd->value_field); + + return hd; +} + +// Set up handlers for a repeated field. +static void add_handlers_for_repeated_field(upb_handlers *h, + const upb_fielddef *f, + size_t offset) { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); + upb_handlers_setstartseq(h, f, startseq_handler, &attr); + upb_handlerattr_uninit(&attr); + + switch (upb_fielddef_type(f)) { + +#define SET_HANDLER(utype, ltype) \ + case utype: \ + upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ + break; + + SET_HANDLER(UPB_TYPE_BOOL, bool); + SET_HANDLER(UPB_TYPE_INT32, int32); + SET_HANDLER(UPB_TYPE_UINT32, uint32); + SET_HANDLER(UPB_TYPE_ENUM, int32); + SET_HANDLER(UPB_TYPE_FLOAT, float); + SET_HANDLER(UPB_TYPE_INT64, int64); + SET_HANDLER(UPB_TYPE_UINT64, uint64); + SET_HANDLER(UPB_TYPE_DOUBLE, double); + +#undef SET_HANDLER + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; + upb_handlers_setstartstr(h, f, is_bytes ? + appendbytes_handler : appendstr_handler, + NULL); + upb_handlers_setstring(h, f, stringdata_handler, NULL); + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f)); + upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } +} + +// Set up handlers for a singular field. +static void add_handlers_for_singular_field(upb_handlers *h, + const upb_fielddef *f, + size_t offset) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + // The shim writes directly at the given offset (instead of using + // DEREF()) so we need to add the msg overhead. + upb_shim_set(h, f, offset + sizeof(MessageHeader), -1); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); + upb_handlers_setstartstr(h, f, + is_bytes ? bytes_handler : str_handler, + &attr); + upb_handlers_setstring(h, f, stringdata_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f)); + upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } +} + +// Adds handlers to a map field. +static void add_handlers_for_mapfield(upb_handlers* h, + const upb_fielddef* fielddef, + size_t offset) { + const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef); + map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, hd); + upb_handlers_setstartseq(h, fielddef, startmap_handler, &attr); + upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr); + upb_handlerattr_uninit(&attr); +} + +// Adds handlers to a map-entry msgdef. +static void add_handlers_for_mapentry(const upb_msgdef* msgdef, + upb_handlers* h) { + map_handlerdata_t* hd = new_map_handlerdata(0, msgdef); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, hd); + upb_handlers_setendmsg(h, endmap_handler, &attr); + + add_handlers_for_singular_field( + h, hd->key_field, + // Convert the offset into map_parse_frame_t to an offset understood by the + // singular field handlers, so that we don't have to use special + // map-key/value-specific handlers. The ordinary singular field handlers expect + // a Message* and assume offset is relative to the data section at the end, so + // we compensate for that addition. + offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader)); + add_handlers_for_singular_field( + h, hd->value_field, + offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader)); +} + static void add_handlers_for_message(const void *closure, upb_handlers *h) { - Descriptor* desc = ruby_to_Descriptor( - get_def_obj((void*)upb_handlers_msgdef(h))); + const upb_msgdef* msgdef = upb_handlers_msgdef(h); + Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef)); + + // If this is a mapentry message type, set up a special set of handlers and + // bail out of the normal (user-defined) message type handling. + if (upb_msgdef_mapentry(msgdef)) { + add_handlers_for_mapentry(msgdef, h); + return; + } + // Ensure layout exists. We may be invoked to create handlers for a given // message if we are included as a submsg of another message type before our // class is actually built, so to work around this, we just create the layout @@ -193,80 +406,12 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { const upb_fielddef *f = upb_msg_iter_field(&i); size_t offset = desc->layout->offsets[upb_fielddef_index(f)]; - if (upb_fielddef_isseq(f)) { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); - upb_handlers_setstartseq(h, f, startseq_handler, &attr); - upb_handlerattr_uninit(&attr); - - switch (upb_fielddef_type(f)) { - -#define SET_HANDLER(utype, ltype) \ - case utype: \ - upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ - break; - - SET_HANDLER(UPB_TYPE_BOOL, bool); - SET_HANDLER(UPB_TYPE_INT32, int32); - SET_HANDLER(UPB_TYPE_UINT32, uint32); - SET_HANDLER(UPB_TYPE_ENUM, int32); - SET_HANDLER(UPB_TYPE_FLOAT, float); - SET_HANDLER(UPB_TYPE_INT64, int64); - SET_HANDLER(UPB_TYPE_UINT64, uint64); - SET_HANDLER(UPB_TYPE_DOUBLE, double); - -#undef SET_HANDLER - - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; - upb_handlers_setstartstr(h, f, is_bytes ? - appendbytes_handler : appendstr_handler, - NULL); - upb_handlers_setstring(h, f, stringdata_handler, NULL); - } - case UPB_TYPE_MESSAGE: { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f)); - upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); - upb_handlerattr_uninit(&attr); - break; - } - } - } - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - // The shim writes directly at the given offset (instead of using - // DEREF()) so we need to add the msg overhead. - upb_shim_set(h, f, offset + sizeof(MessageHeader), -1); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); - upb_handlers_setstartstr(h, f, - is_bytes ? bytes_handler : str_handler, - &attr); - upb_handlers_setstring(h, f, stringdata_handler, &attr); - upb_handlerattr_uninit(&attr); - break; - } - case UPB_TYPE_MESSAGE: { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f)); - upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); - upb_handlerattr_uninit(&attr); - break; - } + if (is_map_field(f)) { + add_handlers_for_mapfield(h, f, offset); + } else if (upb_fielddef_isseq(f)) { + add_handlers_for_repeated_field(h, f, offset); + } else { + add_handlers_for_singular_field(h, f, offset); } } } @@ -558,6 +703,88 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink, upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); } +static void put_ruby_value(VALUE value, + const upb_fielddef *f, + VALUE type_class, + int depth, + upb_sink *sink) { + upb_selector_t sel = 0; + if (upb_fielddef_isprimitive(f)) { + sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + } + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + upb_sink_putint32(sink, sel, NUM2INT(value)); + break; + case UPB_TYPE_INT64: + upb_sink_putint64(sink, sel, NUM2LL(value)); + break; + case UPB_TYPE_UINT32: + upb_sink_putuint32(sink, sel, NUM2UINT(value)); + break; + case UPB_TYPE_UINT64: + upb_sink_putuint64(sink, sel, NUM2ULL(value)); + break; + case UPB_TYPE_FLOAT: + upb_sink_putfloat(sink, sel, NUM2DBL(value)); + break; + case UPB_TYPE_DOUBLE: + upb_sink_putdouble(sink, sel, NUM2DBL(value)); + break; + case UPB_TYPE_ENUM: { + if (TYPE(value) == T_SYMBOL) { + value = rb_funcall(type_class, rb_intern("resolve"), 1, value); + } + upb_sink_putint32(sink, sel, NUM2INT(value)); + break; + } + case UPB_TYPE_BOOL: + upb_sink_putbool(sink, sel, value == Qtrue); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + putstr(value, f, sink); + break; + case UPB_TYPE_MESSAGE: + putsubmsg(value, f, sink, depth); + } +} + +static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink, + int depth) { + if (map == Qnil) return; + Map* self = ruby_to_Map(map); + + upb_sink subsink; + + upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); + + assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE); + const upb_fielddef* key_field = map_field_key(f); + const upb_fielddef* value_field = map_field_value(f); + + Map_iter it; + for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) { + VALUE key = Map_iter_key(&it); + VALUE value = Map_iter_value(&it); + + upb_sink entry_sink; + upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), &entry_sink); + upb_sink_startmsg(&entry_sink); + + put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink); + put_ruby_value(value, value_field, self->value_type_class, depth + 1, + &entry_sink); + + upb_status status; + upb_sink_endmsg(&entry_sink, &status); + upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG)); + } + + upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); +} + static void putmsg(VALUE msg_rb, const Descriptor* desc, upb_sink *sink, int depth) { upb_sink_startmsg(sink); @@ -580,7 +807,12 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, upb_fielddef *f = upb_msg_iter_field(&i); uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)]; - if (upb_fielddef_isseq(f)) { + if (is_map_field(f)) { + VALUE map = DEREF(msg_data, offset, VALUE); + if (map != Qnil) { + putmap(map, f, sink, depth); + } + } else if (upb_fielddef_isseq(f)) { VALUE ary = DEREF(msg_data, offset, VALUE); if (ary != Qnil) { putary(ary, f, sink, depth); diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb index 7cf7bf6a..8d60392c 100644 --- a/ruby/ext/google/protobuf_c/extconf.rb +++ b/ruby/ext/google/protobuf_c/extconf.rb @@ -5,6 +5,6 @@ require 'mkmf' $CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG " $objs = ["protobuf.o", "defs.o", "storage.o", "message.o", - "repeated_field.o", "encode_decode.o", "upb.o"] + "repeated_field.o", "map.o", "encode_decode.o", "upb.o"] create_makefile("google/protobuf_c") diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c new file mode 100644 index 00000000..9d5de9cd --- /dev/null +++ b/ruby/ext/google/protobuf_c/map.c @@ -0,0 +1,883 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Basic map operations on top of upb's strtable. +// ----------------------------------------------------------------------------- + +// Map values are stored using the native_slot abstraction (as with repeated +// field values), but keys are a bit special. Since we use a strtable, we need +// to store keys as sequences of bytes such that equality of those bytes maps +// one-to-one to equality of keys. We store strings directly (i.e., they map to +// their own bytes) and integers as sequences of either 4 or 8 bytes in +// host-byte-order as either a uint32_t or a uint64_t. + +// Forms a key to use with the underlying strtable from a Ruby key value. |buf| +// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to +// construct a key byte sequence if needed. |out_key| and |out_length| provide +// the resulting key data/length. +#define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t) +static void table_key(Map* self, VALUE key, + char* buf, + const char** out_key, + size_t* out_length) { + switch (self->key_type) { + case UPB_TYPE_BYTES: + case UPB_TYPE_STRING: + // Strings: use string content directly. + Check_Type(key, T_STRING); + native_slot_validate_string_encoding(self->key_type, key); + *out_key = RSTRING_PTR(key); + *out_length = RSTRING_LEN(key); + break; + + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: { + // Signed numeric types: use an int64 in host-native byte order. + int64_t key_val = 0; + + // Do a range/value check. + switch (self->key_type) { + case UPB_TYPE_BOOL: + if (key != Qtrue && key != Qfalse) { + rb_raise(rb_eTypeError, "Key must be true or false"); + } + key_val = (key == Qtrue) ? 1 : 0; + break; + case UPB_TYPE_INT32: + native_slot_check_int_range_precision(self->key_type, key); + key_val = NUM2INT(key); + break; + case UPB_TYPE_INT64: + native_slot_check_int_range_precision(self->key_type, key); + key_val = NUM2LL(key); + break; + default: + break; + } + + int64_t* int64_key = (int64_t*)buf; + *int64_key = key_val; + *out_key = buf; + *out_length = sizeof(int64_t); + break; + } + + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: { + // Unsigned numeric types: use a uint64 in host-native byte order. + uint64_t key_val = 0; + + // Do a range/value check. + native_slot_check_int_range_precision(self->key_type, key); + switch (self->key_type) { + case UPB_TYPE_UINT32: + key_val = NUM2UINT(key); + break; + case UPB_TYPE_UINT64: + key_val = NUM2ULL(key); + break; + default: + break; + } + + uint64_t* uint64_key = (uint64_t*)buf; + *uint64_key = key_val; + *out_key = buf; + *out_length = sizeof(uint64_t); + break; + } + + default: + // Map constructor should not allow a Map with another key type to be + // constructed. + assert(false); + break; + } +} + +static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) { + switch (self->key_type) { + case UPB_TYPE_BYTES: + case UPB_TYPE_STRING: { + VALUE ret = rb_str_new(buf, length); + rb_enc_associate(ret, + (self->key_type == UPB_TYPE_BYTES) ? + kRubyString8bitEncoding : kRubyStringUtf8Encoding); + return ret; + } + + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: { + assert(length == sizeof(int64_t)); + int64_t* int64_key = (int64_t*)buf; + + if (self->key_type == UPB_TYPE_BOOL) { + return *int64_key ? Qtrue : Qfalse; + } else { + return LL2NUM(*int64_key); + } + } + + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: { + assert(length == sizeof(uint64_t)); + uint64_t* uint64_key = (uint64_t*)buf; + return ULL2NUM(*uint64_key); + } + + default: + assert(false); + return Qnil; + } +} + +static upb_ctype_t upb_table_value_type(upb_fieldtype_t value_type) { + switch (value_type) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + return UPB_CTYPE_UINT64; + + default: + assert(false); + return 0; + } +} + +static void* value_memory(upb_value* v) { + return (void*)(&v->val.uint64); +} + +// ----------------------------------------------------------------------------- +// Map container type. +// ----------------------------------------------------------------------------- + +const rb_data_type_t Map_type = { + "Google::Protobuf::Map", + { Map_mark, Map_free, NULL }, +}; + +VALUE cMap; + +Map* ruby_to_Map(VALUE _self) { + Map* self; + TypedData_Get_Struct(_self, Map, &Map_type, self); + return self; +} + +void Map_mark(void* _self) { + Map* self = _self; + + rb_gc_mark(self->value_type_class); + + if (self->value_type == UPB_TYPE_STRING || + self->value_type == UPB_TYPE_BYTES || + self->value_type == UPB_TYPE_MESSAGE) { + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + native_slot_mark(self->value_type, mem); + } + } +} + +void Map_free(void* _self) { + Map* self = _self; + upb_strtable_uninit(&self->table); + xfree(self); +} + +VALUE Map_alloc(VALUE klass) { + Map* self = ALLOC(Map); + memset(self, 0, sizeof(Map)); + self->value_type_class = Qnil; + VALUE ret = TypedData_Wrap_Struct(klass, &Map_type, self); + return ret; +} + +static bool needs_typeclass(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_MESSAGE: + case UPB_TYPE_ENUM: + return true; + default: + return false; + } +} + +/* + * call-seq: + * Map.new(key_type, value_type, value_typeclass = nil, init_hashmap = {}) + * => new map + * + * Allocates a new Map container. This constructor may be called with 2, 3, or 4 + * arguments. The first two arguments are always present and are symbols (taking + * on the same values as field-type symbols in message descriptors) that + * indicate the type of the map key and value fields. + * + * The supported key types are: :int32, :int64, :uint32, :uint64, :bool, + * :string, :bytes. + * + * The supported value types are: :int32, :int64, :uint32, :uint64, :bool, + * :string, :bytes, :enum, :message. + * + * The third argument, value_typeclass, must be present if value_type is :enum + * or :message. As in RepeatedField#new, this argument must be a message class + * (for :message) or enum module (for :enum). + * + * The last argument, if present, provides initial content for map. Note that + * this may be an ordinary Ruby hashmap or another Map instance with identical + * key and value types. Also note that this argument may be rpesent whether or + * not value_typeclass is present (and it is unambiguously separate from + * value_typeclass because value_typeclass's presence is strictly determined by + * value_type). + */ +VALUE Map_init(int argc, VALUE* argv, VALUE _self) { + Map* self = ruby_to_Map(_self); + + // We take either two args (:key_type, :value_type), three args (:key_type, + // :value_type, "ValueMessageType"), or four args (the above plus an initial + // hashmap). + if (argc < 2 || argc > 4) { + rb_raise(rb_eArgError, "Map constructor expects 2, 3 or 4 arguments."); + } + + self->key_type = ruby_to_fieldtype(argv[0]); + self->value_type = ruby_to_fieldtype(argv[1]); + + // Check that the key type is an allowed type. + switch (self->key_type) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + case UPB_TYPE_BOOL: + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + // These are OK. + break; + default: + rb_raise(rb_eArgError, "Invalid key type for map."); + } + + int init_value_arg = 2; + if (needs_typeclass(self->value_type) && argc > 2) { + self->value_type_class = argv[2]; + validate_type_class(self->value_type, self->value_type_class); + init_value_arg = 3; + } + + if (!upb_strtable_init(&self->table, upb_table_value_type(self->value_type))) { + rb_raise(rb_eRuntimeError, "Could not allocate table."); + } + + if (argc > init_value_arg) { + Map_merge_into_self(_self, argv[init_value_arg]); + } + + return Qnil; +} + +/* + * call-seq: + * Map.each(&block) + * + * Invokes &block on each |key, value| pair in the map, in unspecified order. + * Note that Map also includes Enumerable; map thus acts like a normal Ruby + * sequence. + */ +VALUE Map_each(VALUE _self) { + Map* self = ruby_to_Map(_self); + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + VALUE key = table_key_to_ruby( + self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it)); + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + VALUE value = native_slot_get(self->value_type, + self->value_type_class, + mem); + + rb_yield_values(2, key, value); + } + + return Qnil; +} + +/* + * call-seq: + * Map.keys => [list_of_keys] + * + * Returns the list of keys contained in the map, in unspecified order. + */ +VALUE Map_keys(VALUE _self) { + Map* self = ruby_to_Map(_self); + + VALUE ret = rb_ary_new(); + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + VALUE key = table_key_to_ruby( + self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it)); + + rb_ary_push(ret, key); + } + + return ret; +} + +/* + * call-seq: + * Map.values => [list_of_values] + * + * Returns the list of values contained in the map, in unspecified order. + */ +VALUE Map_values(VALUE _self) { + Map* self = ruby_to_Map(_self); + + VALUE ret = rb_ary_new(); + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + VALUE value = native_slot_get(self->value_type, + self->value_type_class, + mem); + + rb_ary_push(ret, value); + } + + return ret; +} + +/* + * call-seq: + * Map.[](key) => value + * + * Accesses the element at the given key. Throws an exception if the key type is + * incorrect. Returns nil when the key is not present in the map. + */ +VALUE Map_index(VALUE _self, VALUE key) { + Map* self = ruby_to_Map(_self); + + char keybuf[TABLE_KEY_BUF_LENGTH]; + const char* keyval = NULL; + size_t length = 0; + table_key(self, key, keybuf, &keyval, &length); + + upb_value v; + if (upb_strtable_lookup2(&self->table, keyval, length, &v)) { + void* mem = value_memory(&v); + return native_slot_get(self->value_type, self->value_type_class, mem); + } else { + return Qnil; + } +} + +/* + * call-seq: + * Map.[]=(key, value) => value + * + * Inserts or overwrites the value at the given key with the given new value. + * Throws an exception if the key type is incorrect. Returns the new value that + * was just inserted. + */ +VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) { + Map* self = ruby_to_Map(_self); + + char keybuf[TABLE_KEY_BUF_LENGTH]; + const char* keyval = NULL; + size_t length = 0; + table_key(self, key, keybuf, &keyval, &length); + + upb_value v; + void* mem = value_memory(&v); + native_slot_set(self->value_type, self->value_type_class, mem, value); + + // Replace any existing value by issuing a 'remove' operation first. + upb_value oldv; + upb_strtable_remove2(&self->table, keyval, length, &oldv); + if (!upb_strtable_insert2(&self->table, keyval, length, v)) { + rb_raise(rb_eRuntimeError, "Could not insert into table"); + } + + // Ruby hashmap's :[]= method also returns the inserted value. + return value; +} + +/* + * call-seq: + * Map.has_key?(key) => bool + * + * Returns true if the given key is present in the map. Throws an exception if + * the key has the wrong type. + */ +VALUE Map_has_key(VALUE _self, VALUE key) { + Map* self = ruby_to_Map(_self); + + char keybuf[TABLE_KEY_BUF_LENGTH]; + const char* keyval = NULL; + size_t length = 0; + table_key(self, key, keybuf, &keyval, &length); + + upb_value v; + if (upb_strtable_lookup2(&self->table, keyval, length, &v)) { + return Qtrue; + } else { + return Qfalse; + } +} + +/* + * call-seq: + * Map.delete(key) => old_value + * + * Deletes the value at the given key, if any, returning either the old value or + * nil if none was present. Throws an exception if the key is of the wrong type. + */ +VALUE Map_delete(VALUE _self, VALUE key) { + Map* self = ruby_to_Map(_self); + + char keybuf[TABLE_KEY_BUF_LENGTH]; + const char* keyval = NULL; + size_t length = 0; + table_key(self, key, keybuf, &keyval, &length); + + upb_value v; + if (upb_strtable_remove2(&self->table, keyval, length, &v)) { + void* mem = value_memory(&v); + return native_slot_get(self->value_type, self->value_type_class, mem); + } else { + return Qnil; + } +} + +/* + * call-seq: + * Map.clear + * + * Removes all entries from the map. + */ +VALUE Map_clear(VALUE _self) { + Map* self = ruby_to_Map(_self); + + // Uninit and reinit the table -- this is faster than iterating and doing a + // delete-lookup on each key. + upb_strtable_uninit(&self->table); + if (!upb_strtable_init(&self->table, + upb_table_value_type(self->value_type))) { + rb_raise(rb_eRuntimeError, "Unable to re-initialize table"); + } + return Qnil; +} + +/* + * call-seq: + * Map.length + * + * Returns the number of entries (key-value pairs) in the map. + */ +VALUE Map_length(VALUE _self) { + Map* self = ruby_to_Map(_self); + return INT2NUM(upb_strtable_count(&self->table)); +} + +static VALUE Map_new_this_type(VALUE _self) { + Map* self = ruby_to_Map(_self); + VALUE new_map = Qnil; + VALUE key_type = fieldtype_to_ruby(self->key_type); + VALUE value_type = fieldtype_to_ruby(self->value_type); + if (self->value_type_class != Qnil) { + new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3, + key_type, value_type, self->value_type_class); + } else { + new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2, + key_type, value_type); + } + return new_map; +} + +/* + * call-seq: + * Map.dup => new_map + * + * Duplicates this map with a shallow copy. References to all non-primitive + * element objects (e.g., submessages) are shared. + */ +VALUE Map_dup(VALUE _self) { + Map* self = ruby_to_Map(_self); + VALUE new_map = Map_new_this_type(_self); + Map* new_self = ruby_to_Map(new_map); + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + upb_value dup; + void* dup_mem = value_memory(&dup); + native_slot_dup(self->value_type, dup_mem, mem); + + if (!upb_strtable_insert2(&new_self->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + dup)) { + rb_raise(rb_eRuntimeError, "Error inserting value into new table"); + } + } + + return new_map; +} + +// Used by Google::Protobuf.deep_copy but not exposed directly. +VALUE Map_deep_copy(VALUE _self) { + Map* self = ruby_to_Map(_self); + VALUE new_map = Map_new_this_type(_self); + Map* new_self = ruby_to_Map(new_map); + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + upb_value dup; + void* dup_mem = value_memory(&dup); + native_slot_deep_copy(self->value_type, dup_mem, mem); + + if (!upb_strtable_insert2(&new_self->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + dup)) { + rb_raise(rb_eRuntimeError, "Error inserting value into new table"); + } + } + + return new_map; +} + +/* + * call-seq: + * Map.==(other) => boolean + * + * Compares this map to another. Maps are equal if they have identical key sets, + * and for each key, the values in both maps compare equal. Elements are + * compared as per normal Ruby semantics, by calling their :== methods (or + * performing a more efficient comparison for primitive types). + * + * Maps with dissimilar key types or value types/typeclasses are never equal, + * even if value comparison (for example, between integers and floats) would + * have otherwise indicated that every element has equal value. + */ +VALUE Map_eq(VALUE _self, VALUE _other) { + Map* self = ruby_to_Map(_self); + + // Allow comparisons to Ruby hashmaps by converting to a temporary Map + // instance. Slow, but workable. + if (TYPE(_other) == T_HASH) { + VALUE other_map = Map_new_this_type(_self); + Map_merge_into_self(other_map, _other); + _other = other_map; + } + + Map* other = ruby_to_Map(_other); + + if (self == other) { + return Qtrue; + } + if (self->key_type != other->key_type || + self->value_type != other->value_type || + self->value_type_class != other->value_type_class) { + return Qfalse; + } + if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) { + return Qfalse; + } + + // For each member of self, check that an equal member exists at the same key + // in other. + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + upb_value other_v; + void* other_mem = value_memory(&other_v); + + if (!upb_strtable_lookup2(&other->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + &other_v)) { + // Not present in other map. + return Qfalse; + } + + if (!native_slot_eq(self->value_type, mem, other_mem)) { + // Present, but value not equal. + return Qfalse; + } + } + + // For each member of other, check that a member exists at the same key in + // self. We don't need to compare values here -- if the key exists in both, we + // compared values above; if not, we already know that the maps are not equal. + for (upb_strtable_begin(&it, &other->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + upb_value v; + if (!upb_strtable_lookup2(&self->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + &v)) { + return Qfalse; + } + } + + return Qtrue; +} + +/* + * call-seq: + * Map.hash => hash_value + * + * Returns a hash value based on this map's contents. + */ +VALUE Map_hash(VALUE _self) { + Map* self = ruby_to_Map(_self); + + st_index_t h = rb_hash_start(0); + VALUE hash_sym = rb_intern("hash"); + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + VALUE key = table_key_to_ruby( + self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it)); + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + VALUE value = native_slot_get(self->value_type, + self->value_type_class, + mem); + + h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0))); + h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0))); + } + + return INT2FIX(h); +} + +/* + * call-seq: + * Map.inspect => string + * + * Returns a string representing this map's elements. It will be formatted as + * "{key => value, key => value, ...}", with each key and value string + * representation computed by its own #inspect method. + */ +VALUE Map_inspect(VALUE _self) { + Map* self = ruby_to_Map(_self); + + VALUE str = rb_str_new2("{"); + + bool first = true; + VALUE inspect_sym = rb_intern("inspect"); + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &self->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + VALUE key = table_key_to_ruby( + self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it)); + + upb_value v = upb_strtable_iter_value(&it); + void* mem = value_memory(&v); + VALUE value = native_slot_get(self->value_type, + self->value_type_class, + mem); + + if (!first) { + str = rb_str_cat2(str, ", "); + } else { + first = false; + } + str = rb_str_append(str, rb_funcall(key, inspect_sym, 0)); + str = rb_str_cat2(str, " => "); + str = rb_str_append(str, rb_funcall(value, inspect_sym, 0)); + } + + str = rb_str_cat2(str, "}"); + return str; +} + +/* + * call-seq: + * Map.merge(other_map) => map + * + * Copies key/value pairs from other_map into a copy of this map. If a key is + * set in other_map and this map, the value from other_map overwrites the value + * in the new copy of this map. Returns the new copy of this map with merged + * contents. + */ +VALUE Map_merge(VALUE _self, VALUE hashmap) { + VALUE dupped = Map_dup(_self); + return Map_merge_into_self(dupped, hashmap); +} + +static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) { + Map_index_set(self, key, value); + return ST_CONTINUE; +} + +// Used only internally -- shared by #merge and #initialize. +VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) { + if (TYPE(hashmap) == T_HASH) { + rb_hash_foreach(hashmap, merge_into_self_callback, _self); + } else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) && + RTYPEDDATA_TYPE(hashmap) == &Map_type) { + + Map* self = ruby_to_Map(_self); + Map* other = ruby_to_Map(hashmap); + + if (self->key_type != other->key_type || + self->value_type != other->value_type || + self->value_type_class != other->value_type_class) { + rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types"); + } + + upb_strtable_iter it; + for (upb_strtable_begin(&it, &other->table); + !upb_strtable_done(&it); + upb_strtable_next(&it)) { + + // Replace any existing value by issuing a 'remove' operation first. + upb_value oldv; + upb_strtable_remove2(&self->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + &oldv); + + upb_value v = upb_strtable_iter_value(&it); + upb_strtable_insert2(&self->table, + upb_strtable_iter_key(&it), + upb_strtable_iter_keylength(&it), + v); + } + } else { + rb_raise(rb_eArgError, "Unknown type merging into Map"); + } + return _self; +} + +// Internal method: map iterator initialization (used for serialization). +void Map_begin(VALUE _self, Map_iter* iter) { + Map* self = ruby_to_Map(_self); + iter->self = self; + upb_strtable_begin(&iter->it, &self->table); +} + +void Map_next(Map_iter* iter) { + upb_strtable_next(&iter->it); +} + +bool Map_done(Map_iter* iter) { + return upb_strtable_done(&iter->it); +} + +VALUE Map_iter_key(Map_iter* iter) { + return table_key_to_ruby( + iter->self, + upb_strtable_iter_key(&iter->it), + upb_strtable_iter_keylength(&iter->it)); +} + +VALUE Map_iter_value(Map_iter* iter) { + upb_value v = upb_strtable_iter_value(&iter->it); + void* mem = value_memory(&v); + return native_slot_get(iter->self->value_type, + iter->self->value_type_class, + mem); +} + +void Map_register(VALUE module) { + VALUE klass = rb_define_class_under(module, "Map", rb_cObject); + rb_define_alloc_func(klass, Map_alloc); + cMap = klass; + rb_gc_register_address(&cMap); + + rb_define_method(klass, "initialize", Map_init, -1); + rb_define_method(klass, "each", Map_each, 0); + rb_define_method(klass, "keys", Map_keys, 0); + rb_define_method(klass, "values", Map_values, 0); + rb_define_method(klass, "[]", Map_index, 1); + rb_define_method(klass, "[]=", Map_index_set, 2); + rb_define_method(klass, "has_key?", Map_has_key, 1); + rb_define_method(klass, "delete", Map_delete, 1); + rb_define_method(klass, "clear", Map_clear, 0); + rb_define_method(klass, "length", Map_length, 0); + rb_define_method(klass, "dup", Map_dup, 0); + rb_define_method(klass, "==", Map_eq, 1); + rb_define_method(klass, "hash", Map_hash, 0); + rb_define_method(klass, "inspect", Map_inspect, 0); + rb_define_method(klass, "merge", Map_merge, 1); + rb_include_module(klass, rb_mEnumerable); +} diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index 105b7807..de38dd7b 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -139,7 +139,14 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { "Unknown field name in initialization map entry."); } - if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { + if (is_map_field(f)) { + if (TYPE(val) != T_HASH) { + rb_raise(rb_eArgError, + "Expected hashmap as initializer value for map field."); + } + VALUE map = layout_get(self->descriptor->layout, Message_data(self), f); + Map_merge_into_self(map, val); + } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { if (TYPE(val) != T_ARRAY) { rb_raise(rb_eArgError, "Expected array as initializer value for repeated field."); @@ -450,13 +457,15 @@ VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) { * call-seq: * Google::Protobuf.deep_copy(obj) => copy_of_obj * - * Performs a deep copy of either a RepeatedField instance or a message object, - * recursively copying its members. + * Performs a deep copy of a RepeatedField instance, a Map instance, or a + * message object, recursively copying its members. */ VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) { VALUE klass = CLASS_OF(obj); if (klass == cRepeatedField) { return RepeatedField_deep_copy(obj); + } else if (klass == cMap) { + return Map_deep_copy(obj); } else { return Message_deep_copy(obj); } diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index d5862284..30552705 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -82,6 +82,7 @@ void Init_protobuf_c() { EnumBuilderContext_register(internal); Builder_register(internal); RepeatedField_register(protobuf); + Map_register(protobuf); rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1); rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index c3a5d653..91a97a68 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -123,6 +123,7 @@ struct EnumDescriptor { struct MessageBuilderContext { VALUE descriptor; + VALUE builder; }; struct EnumBuilderContext { @@ -213,10 +214,13 @@ void MessageBuilderContext_free(void* _self); VALUE MessageBuilderContext_alloc(VALUE klass); void MessageBuilderContext_register(VALUE module); MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value); -VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor); +VALUE MessageBuilderContext_initialize(VALUE _self, + VALUE descriptor, + VALUE builder); VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self); VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self); VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self); +VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self); void EnumBuilderContext_mark(void* _self); void EnumBuilderContext_free(void* _self); @@ -239,6 +243,8 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb); // Native slot storage abstraction. // ----------------------------------------------------------------------------- +#define NATIVE_SLOT_MAX_SIZE sizeof(void*) + size_t native_slot_size(upb_fieldtype_t type); void native_slot_set(upb_fieldtype_t type, VALUE type_class, @@ -254,11 +260,18 @@ void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from); bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2); void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value); +void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value); extern rb_encoding* kRubyStringUtf8Encoding; extern rb_encoding* kRubyStringASCIIEncoding; extern rb_encoding* kRubyString8bitEncoding; +VALUE field_type_class(const upb_fielddef* field); + +bool is_map_field(const upb_fielddef* field); +const upb_fielddef* map_field_key(const upb_fielddef* field); +const upb_fielddef* map_field_value(const upb_fielddef* field); + // ----------------------------------------------------------------------------- // Repeated field container type. // ----------------------------------------------------------------------------- @@ -282,7 +295,6 @@ extern VALUE cRepeatedField; RepeatedField* ruby_to_RepeatedField(VALUE value); -void RepeatedField_register(VALUE module); VALUE RepeatedField_each(VALUE _self); VALUE RepeatedField_index(VALUE _self, VALUE _index); void* RepeatedField_index_native(VALUE _self, int index); @@ -302,6 +314,59 @@ VALUE RepeatedField_hash(VALUE _self); VALUE RepeatedField_inspect(VALUE _self); VALUE RepeatedField_plus(VALUE _self, VALUE list); +// Defined in repeated_field.c; also used by Map. +void validate_type_class(upb_fieldtype_t type, VALUE klass); + +// ----------------------------------------------------------------------------- +// Map container type. +// ----------------------------------------------------------------------------- + +typedef struct { + upb_fieldtype_t key_type; + upb_fieldtype_t value_type; + VALUE value_type_class; + upb_strtable table; +} Map; + +void Map_mark(void* self); +void Map_free(void* self); +VALUE Map_alloc(VALUE klass); +VALUE Map_init(int argc, VALUE* argv, VALUE self); +void Map_register(VALUE module); + +extern const rb_data_type_t Map_type; +extern VALUE cMap; + +Map* ruby_to_Map(VALUE value); + +VALUE Map_each(VALUE _self); +VALUE Map_keys(VALUE _self); +VALUE Map_values(VALUE _self); +VALUE Map_index(VALUE _self, VALUE key); +VALUE Map_index_set(VALUE _self, VALUE key, VALUE value); +VALUE Map_has_key(VALUE _self, VALUE key); +VALUE Map_delete(VALUE _self, VALUE key); +VALUE Map_clear(VALUE _self); +VALUE Map_length(VALUE _self); +VALUE Map_dup(VALUE _self); +VALUE Map_deep_copy(VALUE _self); +VALUE Map_eq(VALUE _self, VALUE _other); +VALUE Map_hash(VALUE _self); +VALUE Map_inspect(VALUE _self); +VALUE Map_merge(VALUE _self, VALUE hashmap); +VALUE Map_merge_into_self(VALUE _self, VALUE hashmap); + +typedef struct { + Map* self; + upb_strtable_iter it; +} Map_iter; + +void Map_begin(VALUE _self, Map_iter* iter); +void Map_next(Map_iter* iter); +bool Map_done(Map_iter* iter); +VALUE Map_iter_key(Map_iter* iter); +VALUE Map_iter_value(Map_iter* iter); + // ----------------------------------------------------------------------------- // Message layout / storage. // ----------------------------------------------------------------------------- diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c index 6bd13b07..6e3f0bc7 100644 --- a/ruby/ext/google/protobuf_c/repeated_field.c +++ b/ruby/ext/google/protobuf_c/repeated_field.c @@ -324,6 +324,10 @@ VALUE RepeatedField_deep_copy(VALUE _self) { * element types are equal, their lengths are equal, and each element is equal. * Elements are compared as per normal Ruby semantics, by calling their :== * methods (or performing a more efficient comparison for primitive types). + * + * Repeated fields with dissimilar element types are never equal, even if value + * comparison (for example, between integers and floats) would have otherwise + * indicated that every element has equal value. */ VALUE RepeatedField_eq(VALUE _self, VALUE _other) { if (_self == _other) { @@ -458,7 +462,7 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) { return dupped; } -static void validate_type_class(upb_fieldtype_t type, VALUE klass) { +void validate_type_class(upb_fieldtype_t type, VALUE klass) { if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) { rb_raise(rb_eArgError, "Type class has no descriptor. Please pass a " diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index c4d801af..f20ddec2 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -57,7 +57,17 @@ size_t native_slot_size(upb_fieldtype_t type) { } } -static void check_int_range_precision(upb_fieldtype_t type, VALUE val) { +static bool is_ruby_num(VALUE value) { + return (TYPE(value) == T_FLOAT || + TYPE(value) == T_FIXNUM || + TYPE(value) == T_BIGNUM); +} + +void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) { + if (!is_ruby_num(val)) { + rb_raise(rb_eTypeError, "Expected number type for integral field."); + } + // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper // bound; we just need to do precision checks (i.e., disallow rounding) and // check for < 0 on unsigned types. @@ -76,12 +86,6 @@ static void check_int_range_precision(upb_fieldtype_t type, VALUE val) { } } -static bool is_ruby_num(VALUE value) { - return (TYPE(value) == T_FLOAT || - TYPE(value) == T_FIXNUM || - TYPE(value) == T_BIGNUM); -} - void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) { bool bad_encoding = false; rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value)); @@ -156,14 +160,14 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, int32_t int_val = 0; if (TYPE(value) == T_SYMBOL) { // Ensure that the given symbol exists in the enum module. - VALUE lookup = rb_const_get(type_class, SYM2ID(value)); + VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value); if (lookup == Qnil) { rb_raise(rb_eRangeError, "Unknown symbol value for enum field."); } else { int_val = NUM2INT(lookup); } } else { - check_int_range_precision(UPB_TYPE_INT32, value); + native_slot_check_int_range_precision(UPB_TYPE_INT32, value); int_val = NUM2INT(value); } DEREF(memory, int32_t) = int_val; @@ -173,10 +177,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, case UPB_TYPE_INT64: case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: - if (!is_ruby_num(value)) { - rb_raise(rb_eTypeError, "Expected number type for integral field."); - } - check_int_range_precision(type, value); + native_slot_check_int_range_precision(type, value); switch (type) { case UPB_TYPE_INT32: DEREF(memory, int32_t) = NUM2INT(value); @@ -246,8 +247,9 @@ void native_slot_init(upb_fieldtype_t type, void* memory) { break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: - // TODO(cfallin): set encoding appropriately DEREF(memory, VALUE) = rb_str_new2(""); + rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ? + kRubyString8bitEncoding : kRubyStringUtf8Encoding); break; case UPB_TYPE_MESSAGE: DEREF(memory, VALUE) = Qnil; @@ -321,6 +323,35 @@ bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) { } } +// ----------------------------------------------------------------------------- +// Map field utilities. +// ----------------------------------------------------------------------------- + +bool is_map_field(const upb_fielddef* field) { + if (upb_fielddef_label(field) != UPB_LABEL_REPEATED || + upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { + return false; + } + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + return upb_msgdef_mapentry(subdef); +} + +const upb_fielddef* map_field_key(const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_fielddef* key_field = upb_msgdef_itof(subdef, 1); + assert(key_field != NULL); + return key_field; +} + +const upb_fielddef* map_field_value(const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_fielddef* value_field = upb_msgdef_itof(subdef, 2); + assert(value_field != NULL); + return value_field; +} + // ----------------------------------------------------------------------------- // Memory layout management. // ----------------------------------------------------------------------------- @@ -334,9 +365,12 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { size_t off = 0; for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - size_t field_size = - (upb_fielddef_label(field) == UPB_LABEL_REPEATED) ? - sizeof(VALUE) : native_slot_size(upb_fielddef_type(field)); + size_t field_size = 0; + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + field_size = sizeof(VALUE); + } else { + field_size = native_slot_size(upb_fielddef_type(field)); + } // align current offset off = (off + field_size - 1) & ~(field_size - 1); layout->offsets[upb_fielddef_index(field)] = off; @@ -357,7 +391,7 @@ void free_layout(MessageLayout* layout) { xfree(layout); } -static VALUE get_type_class(const upb_fielddef* field) { +VALUE field_type_class(const upb_fielddef* field) { VALUE type_class = Qnil; if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { VALUE submsgdesc = @@ -380,7 +414,7 @@ VALUE layout_get(MessageLayout* layout, return *((VALUE *)memory); } else { return native_slot_get(upb_fielddef_type(field), - get_type_class(field), + field_type_class(field), memory); } } @@ -398,9 +432,8 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) { rb_raise(rb_eTypeError, "Repeated field array has wrong element type"); } - if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE || - upb_fielddef_type(field) == UPB_TYPE_ENUM) { - RepeatedField* self = ruby_to_RepeatedField(val); + if (self->field_type == UPB_TYPE_MESSAGE || + self->field_type == UPB_TYPE_ENUM) { if (self->field_type_class != get_def_obj(upb_fielddef_subdef(field))) { rb_raise(rb_eTypeError, @@ -409,17 +442,48 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) { } } +static void check_map_field_type(VALUE val, const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_fielddef* key_field = map_field_key(field); + const upb_fielddef* value_field = map_field_value(field); + + if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || + RTYPEDDATA_TYPE(val) != &Map_type) { + rb_raise(rb_eTypeError, "Expected Map instance"); + } + + Map* self = ruby_to_Map(val); + if (self->key_type != upb_fielddef_type(key_field)) { + rb_raise(rb_eTypeError, "Map key type does not match field's key type"); + } + if (self->value_type != upb_fielddef_type(value_field)) { + rb_raise(rb_eTypeError, "Map value type does not match field's value type"); + } + if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE || + upb_fielddef_type(value_field) == UPB_TYPE_ENUM) { + if (self->value_type_class != + get_def_obj(upb_fielddef_subdef(value_field))) { + rb_raise(rb_eTypeError, + "Map value type has wrong message/enum class"); + } + } +} + + void layout_set(MessageLayout* layout, void* storage, const upb_fielddef* field, VALUE val) { void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (is_map_field(field)) { + check_map_field_type(val, field); + DEREF(memory, VALUE) = val; + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { check_repeated_field_type(val, field); - *((VALUE *)memory) = val; + DEREF(memory, VALUE) = val; } else { - native_slot_set(upb_fielddef_type(field), get_type_class(field), + native_slot_set(upb_fielddef_type(field), field_type_class(field), memory, val); } } @@ -434,9 +498,34 @@ void layout_init(MessageLayout* layout, void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (is_map_field(field)) { + VALUE map = Qnil; + + const upb_fielddef* key_field = map_field_key(field); + const upb_fielddef* value_field = map_field_value(field); + VALUE type_class = field_type_class(value_field); + + if (type_class != Qnil) { + VALUE args[3] = { + fieldtype_to_ruby(upb_fielddef_type(key_field)), + fieldtype_to_ruby(upb_fielddef_type(value_field)), + type_class, + }; + map = rb_class_new_instance(3, args, cMap); + } else { + VALUE args[2] = { + fieldtype_to_ruby(upb_fielddef_type(key_field)), + fieldtype_to_ruby(upb_fielddef_type(value_field)), + }; + map = rb_class_new_instance(2, args, cMap); + } + + DEREF(memory, VALUE) = map; + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { VALUE ary = Qnil; - VALUE type_class = get_type_class(field); + + VALUE type_class = field_type_class(field); + if (type_class != Qnil) { VALUE args[2] = { fieldtype_to_ruby(upb_fielddef_type(field)), @@ -447,7 +536,8 @@ void layout_init(MessageLayout* layout, VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) }; ary = rb_class_new_instance(1, args, cRepeatedField); } - *((VALUE *)memory) = ary; + + DEREF(memory, VALUE) = ary; } else { native_slot_init(upb_fielddef_type(field), memory); } @@ -464,7 +554,7 @@ void layout_mark(MessageLayout* layout, void* storage) { layout->offsets[upb_fielddef_index(field)]; if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - rb_gc_mark(*((VALUE *)memory)); + rb_gc_mark(DEREF(memory, VALUE)); } else { native_slot_mark(upb_fielddef_type(field), memory); } @@ -482,8 +572,10 @@ void layout_dup(MessageLayout* layout, void* to, void* from) { void* from_memory = ((uint8_t *)from) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - *((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory)); + if (is_map_field(field)) { + DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE)); } else { native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); } @@ -501,8 +593,12 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) { void* from_memory = ((uint8_t *)from) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - *((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory)); + if (is_map_field(field)) { + DEREF(to_memory, VALUE) = + Map_deep_copy(DEREF(from_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + DEREF(to_memory, VALUE) = + RepeatedField_deep_copy(DEREF(from_memory, VALUE)); } else { native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory); } @@ -520,11 +616,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { void* msg2_memory = ((uint8_t *)msg2) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - if (RepeatedField_eq(*((VALUE *)msg1_memory), - *((VALUE *)msg2_memory)) == Qfalse) { - return Qfalse; - } + if (is_map_field(field)) { + return Map_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + return RepeatedField_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE)); } else { if (!native_slot_eq(upb_fielddef_type(field), msg1_memory, msg2_memory)) { diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index c9f47195..0015aad1 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) { if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2; if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2; if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1; + m->map_entry = false; return m; err1: @@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { if (!newm) return NULL; bool ok = upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m)), NULL); + newm->map_entry = m->map_entry; UPB_ASSERT_VAR(ok, ok); upb_msg_iter i; for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { @@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) { return upb_strtable_count(&m->ntof); } +void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) { + assert(!upb_msgdef_isfrozen(m)); + m->map_entry = map_entry; +} + +bool upb_msgdef_mapentry(const upb_msgdef *m) { + return m->map_entry; +} + void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { upb_inttable_begin(iter, &m->itof); } @@ -3401,31 +3412,28 @@ int log2ceil(uint64_t v) { } char *upb_strdup(const char *s) { - size_t n = strlen(s) + 1; + return upb_strdup2(s, strlen(s)); +} + +char *upb_strdup2(const char *s, size_t len) { + // Always null-terminate, even if binary data; but don't rely on the input to + // have a null-terminating byte since it may be a raw binary buffer. + size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, n); + if (p) memcpy(p, s, len); + p[len] = 0; return p; } // A type to represent the lookup key of either a strtable or an inttable. -// This is like upb_tabkey, but can carry a size also to allow lookups of -// non-NULL-terminated strings (we don't store string lengths in the table). typedef struct { upb_tabkey key; - uint32_t len; // For string keys only. } lookupkey_t; -static lookupkey_t strkey(const char *str) { - lookupkey_t k; - k.key.str = (char*)str; - k.len = strlen(str); - return k; -} - static lookupkey_t strkey2(const char *str, size_t len) { lookupkey_t k; - k.key.str = (char*)str; - k.len = len; + k.key.s.str = (char*)str; + k.key.s.length = len; return k; } @@ -3607,11 +3615,12 @@ static size_t begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. static uint32_t strhash(upb_tabkey key) { - return MurmurHash2(key.str, strlen(key.str), 0); + return MurmurHash2(key.s.str, key.s.length, 0); } static bool streql(upb_tabkey k1, lookupkey_t k2) { - return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0'; + return k1.s.length == k2.key.s.length && + memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0; } bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { @@ -3620,7 +3629,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { void upb_strtable_uninit(upb_strtable *t) { for (size_t i = 0; i < upb_table_size(&t->t); i++) - free((void*)t->t.entries[i].key.str); + free((void*)t->t.entries[i].key.s.str); uninit(&t->t); } @@ -3631,26 +3640,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) { upb_strtable_iter i; upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_strtable_insert( - &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i)); + upb_strtable_insert2( + &new_table, + upb_strtable_iter_key(&i), + upb_strtable_iter_keylength(&i), + upb_strtable_iter_value(&i)); } upb_strtable_uninit(t); *t = new_table; return true; } -bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { +bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len, + upb_value v) { if (isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) { return false; } } - if ((k = upb_strdup(k)) == NULL) return false; + if ((k = upb_strdup2(k, len)) == NULL) return false; - lookupkey_t key = strkey(k); - uint32_t hash = MurmurHash2(key.key.str, key.len, 0); - insert(&t->t, strkey(k), v, hash, &strhash, &streql); + lookupkey_t key = strkey2(k, len); + uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0); + insert(&t->t, key, v, hash, &strhash, &streql); return true; } @@ -3660,11 +3673,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { +bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = MurmurHash2(key, strlen(key), 0); upb_tabkey tabkey; - if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) { - free((void*)tabkey.str); + if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { + free((void*)tabkey.s.str); return true; } else { return false; @@ -3693,7 +3707,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) { const char *upb_strtable_iter_key(upb_strtable_iter *i) { assert(!upb_strtable_done(i)); - return str_tabent(i)->key.str; + return str_tabent(i)->key.s.str; +} + +size_t upb_strtable_iter_keylength(upb_strtable_iter *i) { + assert(!upb_strtable_done(i)); + return str_tabent(i)->key.s.length; } upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h index 150aef10..0e98bbab 100644 --- a/ruby/ext/google/protobuf_c/upb.h +++ b/ruby/ext/google/protobuf_c/upb.h @@ -600,6 +600,9 @@ typedef struct { // Like strdup(), which isn't always available since it's not ANSI C. char *upb_strdup(const char *s); +// Variant that works with a length-delimited rather than NULL-delimited string, +// as supported by strtable. +char *upb_strdup2(const char *s, size_t len); UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val, upb_ctype_t ctype) { @@ -654,12 +657,24 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR); typedef union { uintptr_t num; - const char *str; // We own, nullz. + struct { + // We own this. NULL-terminated but may also contain binary data; see + // explicit length below. + // TODO: move the length to the start of the string in order to reduce + // tabkey's size (to one machine word) in a way that supports static + // initialization. + const char *str; + size_t length; + } s; } upb_tabkey; #define UPB_TABKEY_NUM(n) {n} #ifdef UPB_C99 -#define UPB_TABKEY_STR(s) {.str = s} +// Given that |s| is a string literal, sizeof(s) gives us a +// compile-time-constant strlen(). We must ensure that this works for static +// data initializers. +#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \ + .length = sizeof(strval) - 1 } } #endif // TODO(haberman): C++ #define UPB_TABKEY_NONE {0} @@ -765,7 +780,14 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { // If a table resize was required but memory allocation failed, false is // returned and the table is unchanged. bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val); -bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); +bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len, + upb_value val); + +// For NULL-terminated strings. +UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, + upb_value val) { + return upb_strtable_insert2(t, key, strlen(key), val); +} // Looks up key in this table, returning "true" if the key was found. // If v is non-NULL, copies the value for this key into *v. @@ -782,7 +804,14 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val); +bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len, + upb_value *val); + +// For NULL-terminated strings. +UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, + upb_value *v) { + return upb_strtable_remove2(t, key, strlen(key), v); +} // Updates an existing entry in an inttable. If the entry does not exist, // returns false and does nothing. Unlike insert/remove, this does not @@ -876,6 +905,7 @@ void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); bool upb_strtable_done(const upb_strtable_iter *i); const char *upb_strtable_iter_key(upb_strtable_iter *i); +size_t upb_strtable_iter_keylength(upb_strtable_iter *i); upb_value upb_strtable_iter_value(const upb_strtable_iter *i); void upb_strtable_iter_setdone(upb_strtable_iter *i); bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, @@ -1777,6 +1807,10 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( // just be moved into symtab.c? MessageDef* Dup(const void* owner) const; + // Is this message a map entry? + void setmapentry(bool map_entry); + bool mapentry() const; + // Iteration over fields. The order is undefined. class iterator : public std::iterator { public: @@ -1823,6 +1857,11 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, upb_inttable itof; // int to field upb_strtable ntof; // name to field + // Is this a map-entry message? + // TODO: set this flag properly for static descriptors; regenerate + // descriptor.upb.c. + bool map_entry; + // TODO(haberman): proper extension ranges (there can be multiple). )); @@ -1830,7 +1869,7 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \ - submsg_field_count, itof, ntof \ + submsg_field_count, itof, ntof, false \ } UPB_BEGIN_EXTERN_C // { @@ -1878,6 +1917,9 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, return (upb_fielddef *)upb_msgdef_ntof(m, name, len); } +void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry); +bool upb_msgdef_mapentry(const upb_msgdef *m); + // upb_msg_iter i; // for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { // upb_fielddef *f = upb_msg_iter_field(&i); @@ -2331,6 +2373,12 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name, inline MessageDef* MessageDef::Dup(const void *owner) const { return upb_msgdef_dup(this, owner); } +inline void MessageDef::setmapentry(bool map_entry) { + upb_msgdef_setmapentry(this, map_entry); +} +inline bool MessageDef::mapentry() const { + return upb_msgdef_mapentry(this); +} inline MessageDef::iterator MessageDef::begin() { return iterator(this); } inline MessageDef::iterator MessageDef::end() { return iterator::end(this); } inline MessageDef::const_iterator MessageDef::begin() const { -- cgit v1.2.3 From 80276ac0218f6d8fcdbad0fb09b233b31d2bc0fb Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 6 Jan 2015 18:01:32 -0800 Subject: Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 11 ++++---- ruby/ext/google/protobuf_c/encode_decode.c | 42 ++++++++++++++---------------- ruby/ext/google/protobuf_c/map.c | 11 ++++++++ ruby/ext/google/protobuf_c/message.c | 2 +- ruby/ext/google/protobuf_c/protobuf.h | 9 +++++++ ruby/ext/google/protobuf_c/storage.c | 16 +++++++++--- 6 files changed, 59 insertions(+), 32 deletions(-) (limited to 'ruby/ext') diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 5c94a74a..499e041b 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -1077,11 +1077,12 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { * MessageBuilderContext.map(name, key_type, value_type, number, * value_type_class = nil) * - * Defines a new map field on this message type with the given key and value types, tag - * number, and type class (for message and enum value types). The key type must - * be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type type must - * be a Ruby symbol (as accepted by FieldDescriptor#type=) and the type_class - * must be a string, if present (as accepted by FieldDescriptor#submsg_name=). + * Defines a new map field on this message type with the given key and value + * types, tag number, and type class (for message and enum value types). The key + * type must be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type + * type must be a Ruby symbol (as accepted by FieldDescriptor#type=) and the + * type_class must be a string, if present (as accepted by + * FieldDescriptor#submsg_name=). */ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { DEFINE_SELF(MessageBuilderContext, self, _self); diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 6263edcc..f1b951fc 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -177,8 +177,8 @@ static void *submsg_handler(void *closure, const void *hd) { // Handler data for startmap/endmap handlers. typedef struct { size_t ofs; - const upb_fielddef* key_field; - const upb_fielddef* value_field; + upb_fieldtype_t key_field_type; + upb_fieldtype_t value_field_type; VALUE value_field_typeclass; } map_handlerdata_t; @@ -194,12 +194,6 @@ typedef struct { char value_storage[NATIVE_SLOT_MAX_SIZE]; } map_parse_frame_t; -// Handler to begin a sequence of map entries: simple no-op that exists only to -// set context for the map entry handlers. -static void *startmap_handler(void *closure, const void *hd) { - return closure; -} - // Handler to begin a map entry: allocates a temporary frame. This is the // 'startsubmsg' handler on the msgdef that contains the map field. static void *startmapentry_handler(void *closure, const void *hd) { @@ -210,10 +204,8 @@ static void *startmapentry_handler(void *closure, const void *hd) { map_parse_frame_t* frame = ALLOC(map_parse_frame_t); frame->map = map_rb; - native_slot_init(upb_fielddef_type(mapdata->key_field), - &frame->key_storage); - native_slot_init(upb_fielddef_type(mapdata->value_field), - &frame->value_storage); + native_slot_init(mapdata->key_field_type, &frame->key_storage); + native_slot_init(mapdata->value_field_type, &frame->value_storage); return frame; } @@ -225,10 +217,10 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { const map_handlerdata_t* mapdata = hd; VALUE key = native_slot_get( - upb_fielddef_type(mapdata->key_field), Qnil, + mapdata->key_field_type, Qnil, &frame->key_storage); VALUE value = native_slot_get( - upb_fielddef_type(mapdata->value_field), mapdata->value_field_typeclass, + mapdata->value_field_type, mapdata->value_field_typeclass, &frame->value_storage); Map_index_set(frame->map, key, value); @@ -250,11 +242,15 @@ static map_handlerdata_t* new_map_handlerdata( map_handlerdata_t* hd = ALLOC(map_handlerdata_t); hd->ofs = ofs; - hd->key_field = upb_msgdef_itof(mapentry_def, 1); - assert(hd->key_field != NULL); - hd->value_field = upb_msgdef_itof(mapentry_def, 2); - assert(hd->value_field != NULL); - hd->value_field_typeclass = field_type_class(hd->value_field); + const upb_fielddef* key_field = upb_msgdef_itof(mapentry_def, + MAP_KEY_FIELD); + assert(key_field != NULL); + hd->key_field_type = upb_fielddef_type(key_field); + const upb_fielddef* value_field = upb_msgdef_itof(mapentry_def, + MAP_VALUE_FIELD); + assert(value_field != NULL); + hd->value_field_type = upb_fielddef_type(value_field); + hd->value_field_typeclass = field_type_class(value_field); return hd; } @@ -293,6 +289,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h, appendbytes_handler : appendstr_handler, NULL); upb_handlers_setstring(h, f, stringdata_handler, NULL); + break; } case UPB_TYPE_MESSAGE: { upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; @@ -352,7 +349,6 @@ static void add_handlers_for_mapfield(upb_handlers* h, upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); - upb_handlers_setstartseq(h, fielddef, startmap_handler, &attr); upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr); upb_handlerattr_uninit(&attr); } @@ -360,6 +356,8 @@ static void add_handlers_for_mapfield(upb_handlers* h, // Adds handlers to a map-entry msgdef. static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h) { + const upb_fielddef* key_field = map_entry_key(msgdef); + const upb_fielddef* value_field = map_entry_value(msgdef); map_handlerdata_t* hd = new_map_handlerdata(0, msgdef); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; @@ -367,7 +365,7 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers_setendmsg(h, endmap_handler, &attr); add_handlers_for_singular_field( - h, hd->key_field, + h, key_field, // Convert the offset into map_parse_frame_t to an offset understood by the // singular field handlers, so that we don't have to use special // map-key/value-specific handlers. The ordinary singular field handlers expect @@ -375,7 +373,7 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, // we compensate for that addition. offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader)); add_handlers_for_singular_field( - h, hd->value_field, + h, value_field, offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader)); } diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 9d5de9cd..56548799 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -32,6 +32,17 @@ // ----------------------------------------------------------------------------- // Basic map operations on top of upb's strtable. +// +// Note that we roll our own `Map` container here because, as for +// `RepeatedField`, we want a strongly-typed container. This is so that any user +// errors due to incorrect map key or value types are raised as close as +// possible to the error site, rather than at some deferred point (e.g., +// serialization). +// +// We build our `Map` on top of upb_strtable so that we're able to take +// advantage of the native_slot storage abstraction, as RepeatedField does. +// (This is not quite a perfect mapping -- see the key conversions below -- but +// gives us full support and error-checking for all value types for free.) // ----------------------------------------------------------------------------- // Map values are stored using the native_slot abstraction (as with repeated diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index de38dd7b..ee8881d4 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -142,7 +142,7 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { if (is_map_field(f)) { if (TYPE(val) != T_HASH) { rb_raise(rb_eArgError, - "Expected hashmap as initializer value for map field."); + "Expected Hash object as initializer value for map field."); } VALUE map = layout_get(self->descriptor->layout, Message_data(self), f); Map_merge_into_self(map, val); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 91a97a68..6dac6029 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -268,10 +268,19 @@ extern rb_encoding* kRubyString8bitEncoding; VALUE field_type_class(const upb_fielddef* field); +#define MAP_KEY_FIELD 1 +#define MAP_VALUE_FIELD 2 + +// These operate on a map field (i.e., a repeated field of submessages whose +// submessage type is a map-entry msgdef). bool is_map_field(const upb_fielddef* field); const upb_fielddef* map_field_key(const upb_fielddef* field); const upb_fielddef* map_field_value(const upb_fielddef* field); +// These operate on a map-entry msgdef. +const upb_fielddef* map_entry_key(const upb_msgdef* msgdef); +const upb_fielddef* map_entry_value(const upb_msgdef* msgdef); + // ----------------------------------------------------------------------------- // Repeated field container type. // ----------------------------------------------------------------------------- diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index f20ddec2..235fbff2 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -339,15 +339,23 @@ bool is_map_field(const upb_fielddef* field) { const upb_fielddef* map_field_key(const upb_fielddef* field) { assert(is_map_field(field)); const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); - const upb_fielddef* key_field = upb_msgdef_itof(subdef, 1); - assert(key_field != NULL); - return key_field; + return map_entry_key(subdef); } const upb_fielddef* map_field_value(const upb_fielddef* field) { assert(is_map_field(field)); const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); - const upb_fielddef* value_field = upb_msgdef_itof(subdef, 2); + return map_entry_value(subdef); +} + +const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) { + const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD); + assert(key_field != NULL); + return key_field; +} + +const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) { + const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD); assert(value_field != NULL); return value_field; } -- cgit v1.2.3 From 4c92289766d76f276b322ab254ef039f670f41b1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 9 Jan 2015 15:29:45 -0800 Subject: Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 2 + ruby/ext/google/protobuf_c/encode_decode.c | 66 ++++++++--------- ruby/ext/google/protobuf_c/map.c | 111 +++++------------------------ ruby/ext/google/protobuf_c/protobuf.h | 8 ++- ruby/ext/google/protobuf_c/storage.c | 14 ++-- ruby/ext/google/protobuf_c/upb.c | 6 +- 6 files changed, 72 insertions(+), 135 deletions(-) (limited to 'ruby/ext') diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 499e041b..a18aaac4 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -226,6 +226,7 @@ DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor"); void Descriptor_mark(void* _self) { Descriptor* self = _self; rb_gc_mark(self->klass); + rb_gc_mark(self->typeclass_references); } void Descriptor_free(void* _self) { @@ -270,6 +271,7 @@ VALUE Descriptor_alloc(VALUE klass) { self->fill_method = NULL; self->pb_serialize_handlers = NULL; self->json_serialize_handlers = NULL; + self->typeclass_references = rb_ary_new(); return ret; } diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index f1b951fc..1cff9049 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -64,7 +64,7 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, static void *startseq_handler(void* closure, const void* hd) { MessageHeader* msg = closure; const size_t *ofs = hd; - return (void*)DEREF(Message_data(msg), *ofs, VALUE); + return (void*)DEREF(msg, *ofs, VALUE); } // Handlers that append primitive values to a repeated field (a regular Ruby @@ -115,7 +115,7 @@ static void* str_handler(void *closure, const size_t *ofs = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyStringUtf8Encoding); - DEREF(Message_data(msg), *ofs, VALUE) = str; + DEREF(msg, *ofs, VALUE) = str; return (void*)str; } @@ -127,7 +127,7 @@ static void* bytes_handler(void *closure, const size_t *ofs = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyString8bitEncoding); - DEREF(Message_data(msg), *ofs, VALUE) = str; + DEREF(msg, *ofs, VALUE) = str; return (void*)str; } @@ -163,12 +163,12 @@ static void *submsg_handler(void *closure, const void *hd) { get_def_obj((void*)submsgdata->md); VALUE subklass = Descriptor_msgclass(subdesc); - if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) { - DEREF(Message_data(msg), submsgdata->ofs, VALUE) = + if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { + DEREF(msg, submsgdata->ofs, VALUE) = rb_class_new_instance(0, NULL, subklass); } - VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE); + VALUE submsg_rb = DEREF(msg, submsgdata->ofs, VALUE); MessageHeader* submsg; TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); return submsg; @@ -199,7 +199,7 @@ typedef struct { static void *startmapentry_handler(void *closure, const void *hd) { MessageHeader* msg = closure; const map_handlerdata_t* mapdata = hd; - VALUE map_rb = DEREF(Message_data(msg), mapdata->ofs, VALUE); + VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE); map_parse_frame_t* frame = ALLOC(map_parse_frame_t); frame->map = map_rb; @@ -238,7 +238,8 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { // pass the handlerdata down to the sub-message handler setup. static map_handlerdata_t* new_map_handlerdata( size_t ofs, - const upb_msgdef* mapentry_def) { + const upb_msgdef* mapentry_def, + Descriptor* desc) { map_handlerdata_t* hd = ALLOC(map_handlerdata_t); hd->ofs = ofs; @@ -252,6 +253,11 @@ static map_handlerdata_t* new_map_handlerdata( hd->value_field_type = upb_fielddef_type(value_field); hd->value_field_typeclass = field_type_class(value_field); + // Ensure that value_field_typeclass is properly GC-rooted. + if (hd->value_field_typeclass != Qnil) { + rb_ary_push(desc->typeclass_references, hd->value_field_typeclass); + } + return hd; } @@ -314,9 +320,7 @@ static void add_handlers_for_singular_field(upb_handlers *h, case UPB_TYPE_INT64: case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: - // The shim writes directly at the given offset (instead of using - // DEREF()) so we need to add the msg overhead. - upb_shim_set(h, f, offset + sizeof(MessageHeader), -1); + upb_shim_set(h, f, offset, -1); break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: { @@ -343,9 +347,10 @@ static void add_handlers_for_singular_field(upb_handlers *h, // Adds handlers to a map field. static void add_handlers_for_mapfield(upb_handlers* h, const upb_fielddef* fielddef, - size_t offset) { + size_t offset, + Descriptor* desc) { const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef); - map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef); + map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); @@ -355,10 +360,11 @@ static void add_handlers_for_mapfield(upb_handlers* h, // Adds handlers to a map-entry msgdef. static void add_handlers_for_mapentry(const upb_msgdef* msgdef, - upb_handlers* h) { + upb_handlers* h, + Descriptor* desc) { const upb_fielddef* key_field = map_entry_key(msgdef); const upb_fielddef* value_field = map_entry_value(msgdef); - map_handlerdata_t* hd = new_map_handlerdata(0, msgdef); + map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); @@ -366,15 +372,10 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, add_handlers_for_singular_field( h, key_field, - // Convert the offset into map_parse_frame_t to an offset understood by the - // singular field handlers, so that we don't have to use special - // map-key/value-specific handlers. The ordinary singular field handlers expect - // a Message* and assume offset is relative to the data section at the end, so - // we compensate for that addition. - offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader)); + offsetof(map_parse_frame_t, key_storage)); add_handlers_for_singular_field( h, value_field, - offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader)); + offsetof(map_parse_frame_t, value_storage)); } static void add_handlers_for_message(const void *closure, upb_handlers *h) { @@ -384,7 +385,7 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { // If this is a mapentry message type, set up a special set of handlers and // bail out of the normal (user-defined) message type handling. if (upb_msgdef_mapentry(msgdef)) { - add_handlers_for_mapentry(msgdef, h); + add_handlers_for_mapentry(msgdef, h, desc); return; } @@ -402,10 +403,11 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - size_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + size_t offset = desc->layout->offsets[upb_fielddef_index(f)] + + sizeof(MessageHeader); if (is_map_field(f)) { - add_handlers_for_mapfield(h, f, offset); + add_handlers_for_mapfield(h, f, offset, desc); } else if (upb_fielddef_isseq(f)) { add_handlers_for_repeated_field(h, f, offset); } else { @@ -796,38 +798,38 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, MessageHeader* msg; TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - void* msg_data = Message_data(msg); upb_msg_iter i; for (upb_msg_begin(&i, desc->msgdef); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + uint32_t offset = + desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader); if (is_map_field(f)) { - VALUE map = DEREF(msg_data, offset, VALUE); + VALUE map = DEREF(msg, offset, VALUE); if (map != Qnil) { putmap(map, f, sink, depth); } } else if (upb_fielddef_isseq(f)) { - VALUE ary = DEREF(msg_data, offset, VALUE); + VALUE ary = DEREF(msg, offset, VALUE); if (ary != Qnil) { putary(ary, f, sink, depth); } } else if (upb_fielddef_isstring(f)) { - VALUE str = DEREF(msg_data, offset, VALUE); + VALUE str = DEREF(msg, offset, VALUE); if (RSTRING_LEN(str) > 0) { putstr(str, f, sink); } } else if (upb_fielddef_issubmsg(f)) { - putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth); + putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth); } else { upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); #define T(upbtypeconst, upbtype, ctype, default_value) \ case upbtypeconst: { \ - ctype value = DEREF(msg_data, offset, ctype); \ + ctype value = DEREF(msg, offset, ctype); \ if (value != default_value) { \ upb_sink_put##upbtype(sink, sel, value); \ } \ diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 56548799..2c2ae240 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -49,8 +49,14 @@ // field values), but keys are a bit special. Since we use a strtable, we need // to store keys as sequences of bytes such that equality of those bytes maps // one-to-one to equality of keys. We store strings directly (i.e., they map to -// their own bytes) and integers as sequences of either 4 or 8 bytes in -// host-byte-order as either a uint32_t or a uint64_t. +// their own bytes) and integers as native integers (using the native_slot +// abstraction). + +// Note that there is another tradeoff here in keeping string keys as native +// strings rather than Ruby strings: traversing the Map requires conversion to +// Ruby string values on every traversal, potentially creating more garbage. We +// should consider ways to cache a Ruby version of the key if this becomes an +// issue later. // Forms a key to use with the underlying strtable from a Ruby key value. |buf| // must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to @@ -73,61 +79,13 @@ static void table_key(Map* self, VALUE key, case UPB_TYPE_BOOL: case UPB_TYPE_INT32: - case UPB_TYPE_INT64: { - // Signed numeric types: use an int64 in host-native byte order. - int64_t key_val = 0; - - // Do a range/value check. - switch (self->key_type) { - case UPB_TYPE_BOOL: - if (key != Qtrue && key != Qfalse) { - rb_raise(rb_eTypeError, "Key must be true or false"); - } - key_val = (key == Qtrue) ? 1 : 0; - break; - case UPB_TYPE_INT32: - native_slot_check_int_range_precision(self->key_type, key); - key_val = NUM2INT(key); - break; - case UPB_TYPE_INT64: - native_slot_check_int_range_precision(self->key_type, key); - key_val = NUM2LL(key); - break; - default: - break; - } - - int64_t* int64_key = (int64_t*)buf; - *int64_key = key_val; - *out_key = buf; - *out_length = sizeof(int64_t); - break; - } - + case UPB_TYPE_INT64: case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: { - // Unsigned numeric types: use a uint64 in host-native byte order. - uint64_t key_val = 0; - - // Do a range/value check. - native_slot_check_int_range_precision(self->key_type, key); - switch (self->key_type) { - case UPB_TYPE_UINT32: - key_val = NUM2UINT(key); - break; - case UPB_TYPE_UINT64: - key_val = NUM2ULL(key); - break; - default: - break; - } - - uint64_t* uint64_key = (uint64_t*)buf; - *uint64_key = key_val; + case UPB_TYPE_UINT64: + native_slot_set(self->key_type, Qnil, buf, key); *out_key = buf; - *out_length = sizeof(uint64_t); + *out_length = native_slot_size(self->key_type); break; - } default: // Map constructor should not allow a Map with another key type to be @@ -148,50 +106,16 @@ static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) { return ret; } - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: { - assert(length == sizeof(int64_t)); - int64_t* int64_key = (int64_t*)buf; - - if (self->key_type == UPB_TYPE_BOOL) { - return *int64_key ? Qtrue : Qfalse; - } else { - return LL2NUM(*int64_key); - } - } - - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: { - assert(length == sizeof(uint64_t)); - uint64_t* uint64_key = (uint64_t*)buf; - return ULL2NUM(*uint64_key); - } - - default: - assert(false); - return Qnil; - } -} - -static upb_ctype_t upb_table_value_type(upb_fieldtype_t value_type) { - switch (value_type) { case UPB_TYPE_BOOL: case UPB_TYPE_INT32: case UPB_TYPE_INT64: case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - return UPB_CTYPE_UINT64; + return native_slot_get(self->key_type, Qnil, buf); default: assert(false); - return 0; + return Qnil; } } @@ -321,7 +245,9 @@ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { init_value_arg = 3; } - if (!upb_strtable_init(&self->table, upb_table_value_type(self->value_type))) { + // Table value type is always UINT64: this ensures enough space to store the + // native_slot value. + if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) { rb_raise(rb_eRuntimeError, "Could not allocate table."); } @@ -528,8 +454,7 @@ VALUE Map_clear(VALUE _self) { // Uninit and reinit the table -- this is faster than iterating and doing a // delete-lookup on each key. upb_strtable_uninit(&self->table); - if (!upb_strtable_init(&self->table, - upb_table_value_type(self->value_type))) { + if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) { rb_raise(rb_eRuntimeError, "Unable to re-initialize table"); } return Qnil; diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 6dac6029..88ae62e4 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -110,6 +110,10 @@ struct Descriptor { const upb_pbdecodermethod* fill_method; const upb_handlers* pb_serialize_handlers; const upb_handlers* json_serialize_handlers; + // Handlers hold type class references for sub-message fields directly in some + // cases. We need to keep these rooted because they might otherwise be + // collected. + VALUE typeclass_references; }; struct FieldDescriptor { @@ -252,7 +256,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE value); VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, - void* memory); + const void* memory); void native_slot_init(upb_fieldtype_t type, void* memory); void native_slot_mark(upb_fieldtype_t type, void* memory); void native_slot_dup(upb_fieldtype_t type, void* to, void* from); @@ -389,7 +393,7 @@ struct MessageLayout { MessageLayout* create_layout(const upb_msgdef* msgdef); void free_layout(MessageLayout* layout); VALUE layout_get(MessageLayout* layout, - void* storage, + const void* storage, const upb_fielddef* field); void layout_set(MessageLayout* layout, void* storage, diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index 235fbff2..14f49d44 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -200,7 +200,9 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, } } -VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) { +VALUE native_slot_get(upb_fieldtype_t type, + VALUE type_class, + const void* memory) { switch (type) { case UPB_TYPE_FLOAT: return DBL2NUM(DEREF(memory, float)); @@ -211,7 +213,7 @@ VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) { case UPB_TYPE_STRING: case UPB_TYPE_BYTES: case UPB_TYPE_MESSAGE: - return *((VALUE *)memory); + return DEREF(memory, VALUE); case UPB_TYPE_ENUM: { int32_t val = DEREF(memory, int32_t); VALUE symbol = enum_lookup(type_class, INT2NUM(val)); @@ -332,19 +334,19 @@ bool is_map_field(const upb_fielddef* field) { upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { return false; } - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return upb_msgdef_mapentry(subdef); } const upb_fielddef* map_field_key(const upb_fielddef* field) { assert(is_map_field(field)); - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return map_entry_key(subdef); } const upb_fielddef* map_field_value(const upb_fielddef* field) { assert(is_map_field(field)); - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return map_entry_value(subdef); } @@ -414,7 +416,7 @@ VALUE field_type_class(const upb_fielddef* field) { } VALUE layout_get(MessageLayout* layout, - void* storage, + const void* storage, const upb_fielddef* field) { void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index 0015aad1..abea7711 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -3420,8 +3420,10 @@ char *upb_strdup2(const char *s, size_t len) { // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, len); - p[len] = 0; + if (p) { + memcpy(p, s, len); + p[len] = 0; + } return p; } -- cgit v1.2.3 From 97b663a8be6764a3489d7150f77503ecaf40df5f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 9 Jan 2015 16:15:22 -0800 Subject: Update upb amalgamation. --- ruby/ext/google/protobuf_c/upb.c | 1122 +++++++++++++++++++++++++------------- ruby/ext/google/protobuf_c/upb.h | 25 +- 2 files changed, 747 insertions(+), 400 deletions(-) (limited to 'ruby/ext') diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index abea7711..571c809f 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -3416,6 +3416,8 @@ char *upb_strdup(const char *s) { } char *upb_strdup2(const char *s, size_t len) { + // Prevent overflow errors. + if (len == SIZE_MAX) return NULL; // Always null-terminate, even if binary data; but don't rely on the input to // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; @@ -4230,8 +4232,10 @@ static void nullz(upb_status *status) { } void upb_status_clear(upb_status *status) { - upb_status blank = UPB_STATUS_INIT; - upb_status_copy(status, &blank); + if (!status) return; + status->ok_ = true; + status->code_ = 0; + status->msg[0] = '\0'; } bool upb_ok(const upb_status *status) { return status->ok_; } @@ -5998,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) { case OP_SETDELIM: case OP_HALT: case OP_RET: + case OP_DISPATCH: put32(c, op); break; case OP_PARSE_DOUBLE: @@ -6078,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) { OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET), OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM), OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP), - OP(SETBIGGROUPNUM), OP(HALT), + OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT), }; return op > OP_HALT ? names[0] : names[op]; #undef OP @@ -6110,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { upb_handlers_msgdef(method->dest_handlers_))); break; } + case OP_DISPATCH: case OP_STARTMSG: case OP_ENDMSG: case OP_PUSHLENDELIM: @@ -6455,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { putop(c, OP_SETDISPATCH, &method->dispatch); putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); label(c, LABEL_FIELD); + uint32_t* start_pc = c->pc; upb_msg_iter i; for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); @@ -6470,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { } } + // If there were no fields, or if no handlers were defined, we need to + // generate a non-empty loop body so that we can at least dispatch for unknown + // fields and check for the end of the message. + if (c->pc == start_pc) { + // Check for end-of-message. + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + // Unconditionally dispatch. + putop(c, OP_DISPATCH, 0); + } + // For now we just loop back to the last field of the message (or if none, - // the DISPATCH opcode for the message. + // the DISPATCH opcode for the message). putop(c, OP_BRANCH, -LABEL_FIELD); // Insert both a label and a dispatch table entry for this end-of-msg. @@ -7455,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, if (result == DECODE_MISMATCH) goto badtag; if (result >= 0) return result; }) + VMCASE(OP_DISPATCH, { + CHECK_RETURN(dispatch(d)); + }) VMCASE(OP_HALT, { return size; }) @@ -7513,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { // Rewind from OP_TAG* to OP_CHECKDELIM. assert(getop(*d->pc) == OP_TAG1 || getop(*d->pc) == OP_TAG2 || - getop(*d->pc) == OP_TAGN); + getop(*d->pc) == OP_TAGN || + getop(*d->pc == OP_DISPATCH)); d->pc = p; } upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); @@ -8648,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { #define PARSER_CHECK_RETURN(x) if (!(x)) return false +// Used to signal that a capture has been suspended. +static char suspend_capture; + static upb_selector_t getsel_for_handlertype(upb_json_parser *p, upb_handlertype_t type) { upb_selector_t sel; @@ -8661,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) { p, upb_handlers_getprimitivehandlertype(p->top->f)); } -static void start_member(upb_json_parser *p) { - assert(!p->top->f); - assert(!p->accumulated); - p->accumulated_len = 0; -} - -static bool end_member(upb_json_parser *p) { - // TODO(haberman): support keys that span buffers or have escape sequences. - assert(!p->top->f); - assert(p->accumulated); - const upb_fielddef *f = - upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len); - - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", - (int)p->accumulated_len, p->accumulated); - return false; - } - - p->top->f = f; - p->accumulated = NULL; - - return true; -} - -static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); -} - -static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); -} - static bool check_stack(upb_json_parser *p) { if ((p->top + 1) == p->limit) { upb_status_seterrmsg(p->status, "Nesting too deep"); @@ -8705,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) { return true; } -static bool start_subobject(upb_json_parser *p) { - assert(p->top->f); - - if (!upb_fielddef_issubmsg(p->top->f)) { - upb_status_seterrf(p->status, - "Object specified for non-message/group field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; +// There are GCC/Clang built-ins for overflow checking which we could start +// using if there was any performance benefit to it. +static bool checked_add(size_t a, size_t b, size_t *c) { + if (SIZE_MAX - a < b) return false; + *c = a + b; return true; } -static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); -} - -static bool start_array(upb_json_parser *p) { - assert(p->top->f); - - if (!upb_fielddef_isseq(p->top->f)) { - upb_status_seterrf(p->status, - "Array specified for non-repeated field: %s", - upb_fielddef_name(p->top->f)); - return false; +static size_t saturating_multiply(size_t a, size_t b) { + // size_t is unsigned, so this is defined behavior even on overflow. + size_t ret = a * b; + if (b != 0 && ret / b != a) { + ret = SIZE_MAX; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(&p->top->sink, sel, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; + return ret; } -static void end_array(upb_json_parser *p) { - assert(p->top > p->stack); - - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(&p->top->sink, sel); -} -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +/* Base64 decoding ************************************************************/ -static bool parser_putbool(upb_json_parser *p, bool val) { - if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { - upb_status_seterrf(p->status, - "Boolean value specified for non-bool field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); - UPB_ASSERT_VAR(ok, ok); - return true; -} - -static void start_text(upb_json_parser *p, const char *ptr) { - p->text_begin = ptr; -} +// TODO(haberman): make this streaming. static const signed char b64table[] = { -1, -1, -1, -1, -1, -1, -1, -1, @@ -8901,89 +8835,326 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { - assert(!p->accumulated); // TODO: handle this case. - p->accumulated = p->text_begin; - p->accumulated_len = ptr - p->text_begin; - if (p->top->f && upb_fielddef_isstring(p->top->f)) { - // This is a string field (as opposed to a member name). - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) { - PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated, - p->accumulated_len)); - } else { - upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); - } - p->accumulated = NULL; - } else if (p->top->f && - upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && - !is_num) { - - // Enum case: resolve enum symbolic name to integer value. - const upb_enumdef *enumdef = - (const upb_enumdef*)upb_fielddef_subdef(p->top->f); - - int32_t int_val = 0; - if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, - &int_val)) { - upb_selector_t sel = parser_getsel(p); - upb_sink_putint32(&p->top->sink, sel, int_val); - } else { - upb_status_seterrmsg(p->status, "Enum value name unknown"); +/* Accumulate buffer **********************************************************/ + +// Functionality for accumulating a buffer. +// +// Some parts of the parser need an entire value as a contiguous string. For +// example, to look up a member name in a hash table, or to turn a string into +// a number, the relevant library routines need the input string to be in +// contiguous memory, even if the value spanned two or more buffers in the +// input. These routines handle that. +// +// In the common case we can just point to the input buffer to get this +// contiguous string and avoid any actual copy. So we optimistically begin +// this way. But there are a few cases where we must instead copy into a +// separate buffer: +// +// 1. The string was not contiguous in the input (it spanned buffers). +// +// 2. The string included escape sequences that need to be interpreted to get +// the true value in a contiguous buffer. + +static void assert_accumulate_empty(upb_json_parser *p) { + assert(p->accumulated == NULL); + assert(p->accumulated_len == 0); +} + +static void accumulate_clear(upb_json_parser *p) { + p->accumulated = NULL; + p->accumulated_len = 0; +} + +// Used internally by accumulate_append(). +static bool accumulate_realloc(upb_json_parser *p, size_t need) { + size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + while (new_size < need) { + new_size = saturating_multiply(new_size, 2); + } + + void *mem = realloc(p->accumulate_buf, new_size); + if (!mem) { + upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); + return false; + } + + p->accumulate_buf = mem; + p->accumulate_buf_size = new_size; + return true; +} + +// Logically appends the given data to the append buffer. +// If "can_alias" is true, we will try to avoid actually copying, but the buffer +// must be valid until the next accumulate_append() call (if any). +static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, + bool can_alias) { + if (!p->accumulated && can_alias) { + p->accumulated = buf; + p->accumulated_len = len; + return true; + } + + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } + + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; + } + + if (p->accumulated != p->accumulate_buf) { + memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); + p->accumulated = p->accumulate_buf; + } + + memcpy(p->accumulate_buf + p->accumulated_len, buf, len); + p->accumulated_len += len; + return true; +} + +// Returns a pointer to the data accumulated since the last accumulate_clear() +// call, and writes the length to *len. This with point either to the input +// buffer or a temporary accumulate buffer. +static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { + assert(p->accumulated); + *len = p->accumulated_len; + return p->accumulated; +} + + +/* Mult-part text data ********************************************************/ + +// When we have text data in the input, it can often come in multiple segments. +// For example, there may be some raw string data followed by an escape +// sequence. The two segments are processed with different logic. Also buffer +// seams in the input can cause multiple segments. +// +// As we see segments, there are two main cases for how we want to process them: +// +// 1. we want to push the captured input directly to string handlers. +// +// 2. we need to accumulate all the parts into a contiguous buffer for further +// processing (field name lookup, string->number conversion, etc). + +// This is the set of states for p->multipart_state. +enum { + // We are not currently processing multipart data. + MULTIPART_INACTIVE = 0, + + // We are processing multipart data by accumulating it into a contiguous + // buffer. + MULTIPART_ACCUMULATE = 1, + + // We are processing multipart data by pushing each part directly to the + // current string handlers. + MULTIPART_PUSHEAGERLY = 2 +}; + +// Start a multi-part text value where we accumulate the data for processing at +// the end. +static void multipart_startaccum(upb_json_parser *p) { + assert_accumulate_empty(p); + assert(p->multipart_state == MULTIPART_INACTIVE); + p->multipart_state = MULTIPART_ACCUMULATE; +} + +// Start a multi-part text value where we immediately push text data to a string +// value with the given selector. +static void multipart_start(upb_json_parser *p, upb_selector_t sel) { + assert_accumulate_empty(p); + assert(p->multipart_state == MULTIPART_INACTIVE); + p->multipart_state = MULTIPART_PUSHEAGERLY; + p->string_selector = sel; +} + +static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, + bool can_alias) { + switch (p->multipart_state) { + case MULTIPART_INACTIVE: + upb_status_seterrmsg( + p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); return false; + + case MULTIPART_ACCUMULATE: + if (!accumulate_append(p, buf, len, can_alias)) { + return false; + } + break; + + case MULTIPART_PUSHEAGERLY: { + const upb_bufhandle *handle = can_alias ? p->handle : NULL; + upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle); + break; } - p->accumulated = NULL; } return true; } -static bool start_stringval(upb_json_parser *p) { - assert(p->top->f); +// Note: this invalidates the accumulate buffer! Call only after reading its +// contents. +static void multipart_end(upb_json_parser *p) { + assert(p->multipart_state != MULTIPART_INACTIVE); + p->multipart_state = MULTIPART_INACTIVE; + accumulate_clear(p); +} - if (upb_fielddef_isstring(p->top->f)) { - if (!check_stack(p)) return false; - // Start a new parser frame: parser frames correspond one-to-one with - // handler frames, and string events occur in a sub-frame. - upb_jsonparser_frame *inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; +/* Input capture **************************************************************/ - return true; - } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { - // Do nothing -- symbolic enum names in quotes remain in the - // current parser frame. +// Functionality for capturing a region of the input as text. Gracefully +// handles the case where a buffer seam occurs in the middle of the captured +// region. + +static void capture_begin(upb_json_parser *p, const char *ptr) { + assert(p->multipart_state != MULTIPART_INACTIVE); + assert(p->capture == NULL); + p->capture = ptr; +} + +static bool capture_end(upb_json_parser *p, const char *ptr) { + assert(p->capture); + if (multipart_text(p, p->capture, ptr - p->capture, true)) { + p->capture = NULL; return true; } else { - upb_status_seterrf(p->status, - "String specified for non-string/non-enum field: %s", - upb_fielddef_name(p->top->f)); return false; } +} + +// This is called at the end of each input buffer (ie. when we have hit a +// buffer seam). If we are in the middle of capturing the input, this +// processes the unprocessed capture region. +static void capture_suspend(upb_json_parser *p, const char **ptr) { + if (!p->capture) return; + + if (multipart_text(p, p->capture, *ptr - p->capture, false)) { + // We use this as a signal that we were in the middle of capturing, and + // that capturing should resume at the beginning of the next buffer. + // + // We can't use *ptr here, because we have no guarantee that this pointer + // will be valid when we resume (if the underlying memory is freed, then + // using the pointer at all, even to compare to NULL, is likely undefined + // behavior). + p->capture = &suspend_capture; + } else { + // Need to back up the pointer to the beginning of the capture, since + // we were not able to actually preserve it. + *ptr = p->capture; + } +} +static void capture_resume(upb_json_parser *p, const char *ptr) { + if (p->capture) { + assert(p->capture == &suspend_capture); + p->capture = ptr; + } } -static void end_stringval(upb_json_parser *p) { - if (upb_fielddef_isstring(p->top->f)) { - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); - p->top--; + +/* Callbacks from the parser **************************************************/ + +// These are the functions called directly from the parser itself. +// We define these in the same order as their declarations in the parser. + +static char escape_char(char in) { + switch (in) { + case 'r': return '\r'; + case 't': return '\t'; + case 'n': return '\n'; + case 'f': return '\f'; + case 'b': return '\b'; + case '/': return '/'; + case '"': return '"'; + case '\\': return '\\'; + default: + assert(0); + return 'x'; } } +static bool escape(upb_json_parser *p, const char *ptr) { + char ch = escape_char(*ptr); + return multipart_text(p, &ch, 1, false); +} + +static void start_hex(upb_json_parser *p) { + p->digit = 0; +} + +static void hexdigit(upb_json_parser *p, const char *ptr) { + char ch = *ptr; + + p->digit <<= 4; + + if (ch >= '0' && ch <= '9') { + p->digit += (ch - '0'); + } else if (ch >= 'a' && ch <= 'f') { + p->digit += ((ch - 'a') + 10); + } else { + assert(ch >= 'A' && ch <= 'F'); + p->digit += ((ch - 'A') + 10); + } +} + +static bool end_hex(upb_json_parser *p) { + uint32_t codepoint = p->digit; + + // emit the codepoint as UTF-8. + char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. + int length = 0; + if (codepoint <= 0x7F) { + utf8[0] = codepoint; + length = 1; + } else if (codepoint <= 0x07FF) { + utf8[1] = (codepoint & 0x3F) | 0x80; + codepoint >>= 6; + utf8[0] = (codepoint & 0x1F) | 0xC0; + length = 2; + } else /* codepoint <= 0xFFFF */ { + utf8[2] = (codepoint & 0x3F) | 0x80; + codepoint >>= 6; + utf8[1] = (codepoint & 0x3F) | 0x80; + codepoint >>= 6; + utf8[0] = (codepoint & 0x0F) | 0xE0; + length = 3; + } + // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate + // we have to wait for the next escape to get the full code point). + + return multipart_text(p, utf8, length, false); +} + +static void start_text(upb_json_parser *p, const char *ptr) { + capture_begin(p, ptr); +} + +static bool end_text(upb_json_parser *p, const char *ptr) { + return capture_end(p, ptr); +} + static void start_number(upb_json_parser *p, const char *ptr) { - start_text(p, ptr); - assert(p->accumulated == NULL); + multipart_startaccum(p); + capture_begin(p, ptr); } -static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr, true); - const char *myend = p->accumulated + p->accumulated_len; +static bool end_number(upb_json_parser *p, const char *ptr) { + if (!capture_end(p, ptr)) { + return false; + } + + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + + size_t len; + const char *buf = accumulate_getptr(p, &len); + const char *myend = buf + len - 1; // One for NULL. char *end; switch (upb_fielddef_type(p->top->f)) { @@ -8991,7 +9162,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_INT32: { long val = strtol(p->accumulated, &end, 0); if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putint32(&p->top->sink, parser_getsel(p), val); break; @@ -8999,7 +9170,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_INT64: { long long val = strtoll(p->accumulated, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putint64(&p->top->sink, parser_getsel(p), val); break; @@ -9007,7 +9178,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_UINT32: { unsigned long val = strtoul(p->accumulated, &end, 0); if (val > UINT32_MAX || errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putuint32(&p->top->sink, parser_getsel(p), val); break; @@ -9015,7 +9186,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_UINT64: { unsigned long long val = strtoull(p->accumulated, &end, 0); if (val > UINT64_MAX || errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putuint64(&p->top->sink, parser_getsel(p), val); break; @@ -9023,7 +9194,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_DOUBLE: { double val = strtod(p->accumulated, &end); if (errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); break; @@ -9031,7 +9202,7 @@ static void end_number(upb_json_parser *p, const char *ptr) { case UPB_TYPE_FLOAT: { float val = strtof(p->accumulated, &end); if (errno == ERANGE || end != myend) - assert(false); + goto err; else upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); break; @@ -9040,230 +9211,380 @@ static void end_number(upb_json_parser *p, const char *ptr) { assert(false); } - p->accumulated = NULL; + multipart_end(p); + return true; + +err: + upb_status_seterrf(p->status, "error parsing number: %s", buf); + multipart_end(p); + return false; } -static char escape_char(char in) { - switch (in) { - case 'r': return '\r'; - case 't': return '\t'; - case 'n': return '\n'; - case 'f': return '\f'; - case 'b': return '\b'; - case '/': return '/'; - case '"': return '"'; - case '\\': return '\\'; +static bool parser_putbool(upb_json_parser *p, bool val) { + if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { + upb_status_seterrf(p->status, + "Boolean value specified for non-bool field: %s", + upb_fielddef_name(p->top->f)); + return false; + } + + bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); + UPB_ASSERT_VAR(ok, ok); + return true; +} + +static bool start_stringval(upb_json_parser *p) { + assert(p->top->f); + + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { + // For STRING fields we push data directly to the handlers as it is + // parsed. We don't do this yet for BYTES fields, because our base64 + // decoder is not streaming. + // + // TODO(haberman): make base64 decoding streaming also. + multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); + return true; + } else { + multipart_startaccum(p); + return true; + } + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // No need to push a frame -- symbolic enum names in quotes remain in the + // current parser frame. + // + // Enum string values must accumulate so we can look up the value in a table + // once it is complete. + multipart_startaccum(p); + return true; + } else { + upb_status_seterrf(p->status, + "String specified for non-string/non-enum field: %s", + upb_fielddef_name(p->top->f)); + return false; + } +} + +static bool end_stringval(upb_json_parser *p) { + bool ok = true; + + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_BYTES: + if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), + p->accumulated, p->accumulated_len)) { + return false; + } + // Fall through. + + case UPB_TYPE_STRING: { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + break; + } + + case UPB_TYPE_ENUM: { + // Resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + size_t len; + const char *buf = accumulate_getptr(p, &len); + + int32_t int_val = 0; + ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); + + if (ok) { + upb_selector_t sel = parser_getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf); + } + + break; + } + default: - assert(0); - return 'x'; + assert(false); + upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); + ok = false; + break; } + + multipart_end(p); + return ok; } -static void escape(upb_json_parser *p, const char *ptr) { - char ch = escape_char(*ptr); - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL); +static void start_member(upb_json_parser *p) { + assert(!p->top->f); + multipart_startaccum(p); } -static uint8_t hexdigit(char ch) { - if (ch >= '0' && ch <= '9') { - return ch - '0'; - } else if (ch >= 'a' && ch <= 'f') { - return ch - 'a' + 10; - } else { - assert(ch >= 'A' && ch <= 'F'); - return ch - 'A' + 10; +static bool end_member(upb_json_parser *p) { + assert(!p->top->f); + size_t len; + const char *buf = accumulate_getptr(p, &len); + + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; +} + +static void clear_member(upb_json_parser *p) { p->top->f = NULL; } + +static bool start_subobject(upb_json_parser *p) { + assert(p->top->f); + + if (!upb_fielddef_issubmsg(p->top->f)) { + upb_status_seterrf(p->status, + "Object specified for non-message/group field: %s", + upb_fielddef_name(p->top->f)); + return false; } + + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + p->top = inner; + + return true; } -static void start_hex(upb_json_parser *p, const char *ptr) { - start_text(p, ptr); +static void end_subobject(upb_json_parser *p) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); } -static void hex(upb_json_parser *p, const char *end) { - const char *start = p->text_begin; - UPB_ASSERT_VAR(end, end - start == 4); - uint16_t codepoint = - (hexdigit(start[0]) << 12) | - (hexdigit(start[1]) << 8) | - (hexdigit(start[2]) << 4) | - hexdigit(start[3]); - // emit the codepoint as UTF-8. - char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. - int length = 0; - if (codepoint <= 0x7F) { - utf8[0] = codepoint; - length = 1; - } else if (codepoint <= 0x07FF) { - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x1F) | 0xC0; - length = 2; - } else /* codepoint <= 0xFFFF */ { - utf8[2] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x0F) | 0xE0; - length = 3; +static bool start_array(upb_json_parser *p) { + assert(p->top->f); + + if (!upb_fielddef_isseq(p->top->f)) { + upb_status_seterrf(p->status, + "Array specified for non-repeated field: %s", + upb_fielddef_name(p->top->f)); + return false; } - // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate - // we have to wait for the next escape to get the full code point). - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL); + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; +} + +static void end_array(upb_json_parser *p) { + assert(p->top > p->stack); + + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); +} + +static void start_object(upb_json_parser *p) { + upb_sink_startmsg(&p->top->sink); +} + +static void end_object(upb_json_parser *p) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); } + #define CHECK_RETURN_TOP(x) if (!(x)) goto error + +/* The actual parser **********************************************************/ + // What follows is the Ragel parser itself. The language is specified in Ragel // and the actions call our C functions above. +// +// Ragel has an extensive set of functionality, and we use only a small part of +// it. There are many action types but we only use a few: +// +// ">" -- transition into a machine +// "%" -- transition out of a machine +// "@" -- transition into a final state of a machine. +// +// "@" transitions are tricky because a machine can transition into a final +// state repeatedly. But in some cases we know this can't happen, for example +// a string which is delimited by a final '"' can only transition into its +// final state once, when the closing '"' is seen. + -#line 596 "upb/json/parser.rl" +#line 904 "upb/json/parser.rl" -#line 514 "upb/json/parser.c" +#line 816 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, - 4, 1, 5, 1, 6, 1, 7, 1, - 9, 1, 11, 1, 12, 1, 13, 1, - 14, 1, 15, 1, 16, 1, 24, 1, - 26, 2, 3, 7, 2, 5, 2, 2, - 5, 7, 2, 10, 8, 2, 12, 14, - 2, 13, 14, 2, 17, 1, 2, 18, - 26, 2, 19, 8, 2, 20, 26, 2, - 21, 26, 2, 22, 26, 2, 23, 26, - 2, 25, 26, 3, 13, 10, 8 + 5, 1, 6, 1, 7, 1, 8, 1, + 10, 1, 12, 1, 13, 1, 14, 1, + 15, 1, 16, 1, 17, 1, 21, 1, + 25, 1, 27, 2, 3, 8, 2, 4, + 5, 2, 6, 2, 2, 6, 8, 2, + 11, 9, 2, 13, 15, 2, 14, 15, + 2, 18, 1, 2, 19, 27, 2, 20, + 9, 2, 22, 27, 2, 23, 27, 2, + 24, 27, 2, 26, 27, 3, 14, 11, + 9 }; static const unsigned char _json_key_offsets[] = { - 0, 0, 4, 9, 14, 18, 22, 27, - 32, 37, 41, 45, 48, 51, 53, 57, - 61, 63, 65, 70, 72, 74, 83, 89, - 95, 101, 107, 109, 118, 118, 118, 123, - 128, 133, 133, 134, 135, 136, 137, 137, - 138, 139, 140, 140, 141, 142, 143, 143, - 148, 153, 157, 161, 166, 171, 176, 180, - 180, 183, 183, 183 + 0, 0, 4, 9, 14, 15, 19, 24, + 29, 34, 38, 42, 45, 48, 50, 54, + 58, 60, 62, 67, 69, 71, 80, 86, + 92, 98, 104, 106, 115, 116, 116, 116, + 121, 126, 131, 132, 133, 134, 135, 135, + 136, 137, 138, 138, 139, 140, 141, 141, + 146, 151, 152, 156, 161, 166, 171, 175, + 175, 178, 178, 178 }; static const char _json_trans_keys[] = { 32, 123, 9, 13, 32, 34, 125, 9, - 13, 32, 34, 125, 9, 13, 32, 58, - 9, 13, 32, 58, 9, 13, 32, 93, - 125, 9, 13, 32, 44, 125, 9, 13, - 32, 44, 125, 9, 13, 32, 34, 9, - 13, 45, 48, 49, 57, 48, 49, 57, - 46, 69, 101, 48, 57, 69, 101, 48, - 57, 43, 45, 48, 57, 48, 57, 48, - 57, 46, 69, 101, 48, 57, 34, 92, - 34, 92, 34, 47, 92, 98, 102, 110, - 114, 116, 117, 48, 57, 65, 70, 97, - 102, 48, 57, 65, 70, 97, 102, 48, - 57, 65, 70, 97, 102, 48, 57, 65, - 70, 97, 102, 34, 92, 34, 45, 91, - 102, 110, 116, 123, 48, 57, 32, 93, - 125, 9, 13, 32, 44, 93, 9, 13, - 32, 93, 125, 9, 13, 97, 108, 115, - 101, 117, 108, 108, 114, 117, 101, 32, - 34, 125, 9, 13, 32, 34, 125, 9, - 13, 32, 58, 9, 13, 32, 58, 9, - 13, 32, 93, 125, 9, 13, 32, 44, - 125, 9, 13, 32, 44, 125, 9, 13, - 32, 34, 9, 13, 32, 9, 13, 0 + 13, 32, 34, 125, 9, 13, 34, 32, + 58, 9, 13, 32, 93, 125, 9, 13, + 32, 44, 125, 9, 13, 32, 44, 125, + 9, 13, 32, 34, 9, 13, 45, 48, + 49, 57, 48, 49, 57, 46, 69, 101, + 48, 57, 69, 101, 48, 57, 43, 45, + 48, 57, 48, 57, 48, 57, 46, 69, + 101, 48, 57, 34, 92, 34, 92, 34, + 47, 92, 98, 102, 110, 114, 116, 117, + 48, 57, 65, 70, 97, 102, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, + 97, 102, 48, 57, 65, 70, 97, 102, + 34, 92, 34, 45, 91, 102, 110, 116, + 123, 48, 57, 34, 32, 93, 125, 9, + 13, 32, 44, 93, 9, 13, 32, 93, + 125, 9, 13, 97, 108, 115, 101, 117, + 108, 108, 114, 117, 101, 32, 34, 125, + 9, 13, 32, 34, 125, 9, 13, 34, + 32, 58, 9, 13, 32, 93, 125, 9, + 13, 32, 44, 125, 9, 13, 32, 44, + 125, 9, 13, 32, 34, 9, 13, 32, + 9, 13, 0 }; static const char _json_single_lengths[] = { - 0, 2, 3, 3, 2, 2, 3, 3, + 0, 2, 3, 3, 1, 2, 3, 3, 3, 2, 2, 1, 3, 0, 2, 2, 0, 0, 3, 2, 2, 9, 0, 0, - 0, 0, 2, 7, 0, 0, 3, 3, - 3, 0, 1, 1, 1, 1, 0, 1, + 0, 0, 2, 7, 1, 0, 0, 3, + 3, 3, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 3, - 3, 2, 2, 3, 3, 3, 2, 0, + 3, 1, 2, 3, 3, 3, 2, 0, 1, 0, 0, 0 }; static const char _json_range_lengths[] = { - 0, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 3, - 3, 3, 0, 1, 0, 0, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 0, 1, 0, 0, 0, 1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 0, + 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }; static const short _json_index_offsets[] = { - 0, 0, 4, 9, 14, 18, 22, 27, - 32, 37, 41, 45, 48, 52, 54, 58, - 62, 64, 66, 71, 74, 77, 87, 91, - 95, 99, 103, 106, 115, 116, 117, 122, - 127, 132, 133, 135, 137, 139, 141, 142, - 144, 146, 148, 149, 151, 153, 155, 156, - 161, 166, 170, 174, 179, 184, 189, 193, - 194, 197, 198, 199 + 0, 0, 4, 9, 14, 16, 20, 25, + 30, 35, 39, 43, 46, 50, 52, 56, + 60, 62, 64, 69, 72, 75, 85, 89, + 93, 97, 101, 104, 113, 115, 116, 117, + 122, 127, 132, 134, 136, 138, 140, 141, + 143, 145, 147, 148, 150, 152, 154, 155, + 160, 165, 167, 171, 176, 181, 186, 190, + 191, 194, 195, 196 }; static const char _json_indicies[] = { 0, 2, 0, 1, 3, 4, 5, 3, - 1, 6, 7, 8, 6, 1, 9, 10, - 9, 1, 11, 12, 11, 1, 12, 1, - 1, 12, 13, 14, 15, 16, 14, 1, - 17, 18, 8, 17, 1, 18, 7, 18, - 1, 19, 20, 21, 1, 20, 21, 1, - 23, 24, 24, 22, 25, 1, 24, 24, - 25, 22, 26, 26, 27, 1, 27, 1, - 27, 22, 23, 24, 24, 21, 22, 29, - 30, 28, 32, 33, 31, 34, 34, 34, - 34, 34, 34, 34, 34, 35, 1, 36, - 36, 36, 1, 37, 37, 37, 1, 38, - 38, 38, 1, 39, 39, 39, 1, 41, - 42, 40, 43, 44, 45, 46, 47, 48, - 49, 44, 1, 50, 51, 53, 54, 1, + 1, 6, 7, 8, 6, 1, 9, 1, + 10, 11, 10, 1, 11, 1, 1, 11, + 12, 13, 14, 15, 13, 1, 16, 17, + 8, 16, 1, 17, 7, 17, 1, 18, + 19, 20, 1, 19, 20, 1, 22, 23, + 23, 21, 24, 1, 23, 23, 24, 21, + 25, 25, 26, 1, 26, 1, 26, 21, + 22, 23, 23, 20, 21, 28, 29, 27, + 31, 32, 30, 33, 33, 33, 33, 33, + 33, 33, 33, 34, 1, 35, 35, 35, + 1, 36, 36, 36, 1, 37, 37, 37, + 1, 38, 38, 38, 1, 40, 41, 39, + 42, 43, 44, 45, 46, 47, 48, 43, + 1, 49, 1, 50, 51, 53, 54, 1, 53, 52, 55, 56, 54, 55, 1, 56, - 1, 1, 56, 52, 57, 58, 1, 59, - 1, 60, 1, 61, 1, 62, 63, 1, - 64, 1, 65, 1, 66, 67, 1, 68, - 1, 69, 1, 70, 71, 72, 73, 71, - 1, 74, 75, 76, 74, 1, 77, 78, - 77, 1, 79, 80, 79, 1, 80, 1, - 1, 80, 81, 82, 83, 84, 82, 1, - 85, 86, 76, 85, 1, 86, 75, 86, - 1, 87, 88, 88, 1, 1, 1, 1, - 0 + 1, 1, 56, 52, 57, 1, 58, 1, + 59, 1, 60, 1, 61, 62, 1, 63, + 1, 64, 1, 65, 66, 1, 67, 1, + 68, 1, 69, 70, 71, 72, 70, 1, + 73, 74, 75, 73, 1, 76, 1, 77, + 78, 77, 1, 78, 1, 1, 78, 79, + 80, 81, 82, 80, 1, 83, 84, 75, + 83, 1, 84, 74, 84, 1, 85, 86, + 86, 1, 1, 1, 1, 0 }; static const char _json_trans_targs[] = { 1, 0, 2, 3, 4, 56, 3, 4, - 56, 5, 6, 5, 6, 7, 8, 9, - 56, 8, 9, 11, 12, 18, 57, 13, - 15, 14, 16, 17, 20, 58, 21, 20, - 58, 21, 19, 22, 23, 24, 25, 26, - 20, 58, 21, 28, 29, 30, 34, 39, - 43, 47, 59, 59, 31, 30, 33, 31, - 32, 59, 35, 36, 37, 38, 59, 40, - 41, 42, 59, 44, 45, 46, 59, 48, - 49, 55, 48, 49, 55, 50, 51, 50, - 51, 52, 53, 54, 55, 53, 54, 59, - 56 + 56, 5, 5, 6, 7, 8, 9, 56, + 8, 9, 11, 12, 18, 57, 13, 15, + 14, 16, 17, 20, 58, 21, 20, 58, + 21, 19, 22, 23, 24, 25, 26, 20, + 58, 21, 28, 30, 31, 34, 39, 43, + 47, 29, 59, 59, 32, 31, 29, 32, + 33, 35, 36, 37, 38, 59, 40, 41, + 42, 59, 44, 45, 46, 59, 48, 49, + 55, 48, 49, 55, 50, 50, 51, 52, + 53, 54, 55, 53, 54, 59, 56 }; static const char _json_trans_actions[] = { - 0, 0, 0, 21, 75, 48, 0, 42, - 23, 17, 17, 0, 0, 15, 19, 19, - 45, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 3, 13, 0, 0, - 33, 5, 11, 0, 7, 0, 0, 0, - 36, 39, 9, 57, 51, 25, 0, 0, - 0, 29, 60, 54, 15, 0, 27, 0, - 0, 31, 0, 0, 0, 0, 66, 0, - 0, 0, 69, 0, 0, 0, 63, 21, - 75, 48, 0, 42, 23, 17, 17, 0, - 0, 15, 19, 19, 45, 0, 0, 72, - 0 + 0, 0, 0, 21, 77, 53, 0, 47, + 23, 17, 0, 0, 15, 19, 19, 50, + 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 3, 13, 0, 0, 35, + 5, 11, 0, 38, 7, 7, 7, 41, + 44, 9, 62, 56, 25, 0, 0, 0, + 31, 29, 33, 59, 15, 0, 27, 0, + 0, 0, 0, 0, 0, 68, 0, 0, + 0, 71, 0, 0, 0, 65, 21, 77, + 53, 0, 47, 23, 17, 0, 0, 15, + 19, 19, 50, 0, 0, 74, 0 }; static const int json_start = 1; @@ -9276,13 +9597,14 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 599 "upb/json/parser.rl" +#line 907 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { UPB_UNUSED(hd); UPB_UNUSED(handle); upb_json_parser *parser = closure; + parser->handle = handle; // Variables used by Ragel's generated code. int cs = parser->current_state; @@ -9292,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, const char *p = buf; const char *pe = buf + size; + capture_resume(parser, buf); + -#line 684 "upb/json/parser.c" +#line 987 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -9368,114 +9692,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 517 "upb/json/parser.rl" +#line 819 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 518 "upb/json/parser.rl" +#line 820 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 522 "upb/json/parser.rl" +#line 824 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 523 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p, false)); } +#line 825 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 529 "upb/json/parser.rl" - { start_hex(parser, p); } +#line 831 "upb/json/parser.rl" + { start_hex(parser); } break; case 5: -#line 530 "upb/json/parser.rl" - { hex(parser, p); } +#line 832 "upb/json/parser.rl" + { hexdigit(parser, p); } break; case 6: -#line 536 "upb/json/parser.rl" - { escape(parser, p); } +#line 833 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 539 "upb/json/parser.rl" - { {cs = stack[--top]; goto _again;} } +#line 839 "upb/json/parser.rl" + { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 540 "upb/json/parser.rl" - { {stack[top++] = cs; cs = 19; goto _again;} } +#line 845 "upb/json/parser.rl" + { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 542 "upb/json/parser.rl" - { p--; {stack[top++] = cs; cs = 27; goto _again;} } +#line 848 "upb/json/parser.rl" + { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 547 "upb/json/parser.rl" - { start_member(parser); } +#line 850 "upb/json/parser.rl" + { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 548 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_member(parser)); } +#line 855 "upb/json/parser.rl" + { start_member(parser); } break; case 12: -#line 551 "upb/json/parser.rl" - { clear_member(parser); } +#line 856 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_member(parser)); } break; case 13: -#line 557 "upb/json/parser.rl" - { start_object(parser); } +#line 859 "upb/json/parser.rl" + { clear_member(parser); } break; case 14: -#line 560 "upb/json/parser.rl" - { end_object(parser); } +#line 865 "upb/json/parser.rl" + { start_object(parser); } break; case 15: -#line 566 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_array(parser)); } +#line 868 "upb/json/parser.rl" + { end_object(parser); } break; case 16: -#line 570 "upb/json/parser.rl" - { end_array(parser); } +#line 874 "upb/json/parser.rl" + { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 575 "upb/json/parser.rl" - { start_number(parser, p); } +#line 878 "upb/json/parser.rl" + { end_array(parser); } break; case 18: -#line 576 "upb/json/parser.rl" - { end_number(parser, p); } +#line 883 "upb/json/parser.rl" + { start_number(parser, p); } break; case 19: -#line 578 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_stringval(parser)); } +#line 884 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 579 "upb/json/parser.rl" - { end_stringval(parser); } +#line 886 "upb/json/parser.rl" + { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 581 "upb/json/parser.rl" - { CHECK_RETURN_TOP(parser_putbool(parser, true)); } +#line 887 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 583 "upb/json/parser.rl" - { CHECK_RETURN_TOP(parser_putbool(parser, false)); } +#line 889 "upb/json/parser.rl" + { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 585 "upb/json/parser.rl" - { /* null value */ } +#line 891 "upb/json/parser.rl" + { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 587 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_subobject(parser)); } +#line 893 "upb/json/parser.rl" + { /* null value */ } break; case 25: -#line 588 "upb/json/parser.rl" - { end_subobject(parser); } +#line 895 "upb/json/parser.rl" + { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 593 "upb/json/parser.rl" +#line 896 "upb/json/parser.rl" + { end_subobject(parser); } + break; + case 27: +#line 901 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 866 "upb/json/parser.c" +#line 1173 "upb/json/parser.c" } } @@ -9488,10 +9816,12 @@ _again: _out: {} } -#line 615 "upb/json/parser.rl" +#line 926 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); + } else { + capture_suspend(parser, &p); } error: @@ -9508,8 +9838,13 @@ bool end(void *closure, const void *hd) { return true; } + +/* Public API *****************************************************************/ + void upb_json_parser_init(upb_json_parser *p, upb_status *status) { p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; upb_byteshandler_init(&p->input_handler_); upb_byteshandler_setstring(&p->input_handler_, parse, NULL); upb_byteshandler_setendstr(&p->input_handler_, end, NULL); @@ -9519,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) { void upb_json_parser_uninit(upb_json_parser *p) { upb_byteshandler_uninit(&p->input_handler_); + free(p->accumulate_buf); } void upb_json_parser_reset(upb_json_parser *p) { @@ -9529,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 920 "upb/json/parser.c" +#line 1235 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 655 "upb/json/parser.rl" +#line 974 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; - p->text_begin = NULL; - p->accumulated = NULL; - p->accumulated_len = 0; + accumulate_clear(p); + p->multipart_state = MULTIPART_INACTIVE; + p->capture = NULL; } void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h index 0e98bbab..fbcb8e99 100644 --- a/ruby/ext/google/protobuf_c/upb.h +++ b/ruby/ext/google/protobuf_c/upb.h @@ -6662,7 +6662,9 @@ typedef enum { // | unused (24) | opc | // | upb_inttable* (32 or 64) | - OP_HALT = 36, // No arg. + OP_DISPATCH = 36, // No arg. + + OP_HALT = 37, // No arg. } opcode; #define OP_MAX OP_HALT @@ -7339,15 +7341,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser, int parser_stack[UPB_JSON_MAX_DEPTH]; int parser_top; - // A pointer to the beginning of whatever text we are currently parsing. - const char *text_begin; + // The handle for the current buffer. + const upb_bufhandle *handle; - // We have to accumulate text for member names, integers, unicode escapes, and - // base64 partial results. + // Accumulate buffer. See details in parser.rl. const char *accumulated; size_t accumulated_len; - // TODO: add members and code for allocating a buffer when necessary (when the - // member spans input buffers or contains escapes). + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; )); UPB_BEGIN_EXTERN_C -- cgit v1.2.3 From addd26cbb3e7af05917bfc9bc965b0ada4bc80b0 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 12 Jan 2015 16:09:35 -0800 Subject: Addressed code-review comments. --- ruby/ext/google/protobuf_c/encode_decode.c | 7 ++++++- ruby/ext/google/protobuf_c/map.c | 30 ++++++++---------------------- 2 files changed, 14 insertions(+), 23 deletions(-) (limited to 'ruby/ext') diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 1cff9049..e5e1514b 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -253,7 +253,12 @@ static map_handlerdata_t* new_map_handlerdata( hd->value_field_type = upb_fielddef_type(value_field); hd->value_field_typeclass = field_type_class(value_field); - // Ensure that value_field_typeclass is properly GC-rooted. + // Ensure that value_field_typeclass is properly GC-rooted. We must do this + // because we hold a reference to the Ruby class in the handlerdata, which is + // owned by the handlers. The handlers are owned by *this* message's Ruby + // object, but each Ruby object is rooted independently at the def -> Ruby + // object map. So we have to ensure that the Ruby objects we depend on will + // stick around as long as we're around. if (hd->value_field_typeclass != Qnil) { rb_ary_push(desc->typeclass_references, hd->value_field_typeclass); } diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 2c2ae240..4ee71d18 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -205,10 +205,13 @@ static bool needs_typeclass(upb_fieldtype_t type) { * * The last argument, if present, provides initial content for map. Note that * this may be an ordinary Ruby hashmap or another Map instance with identical - * key and value types. Also note that this argument may be rpesent whether or + * key and value types. Also note that this argument may be present whether or * not value_typeclass is present (and it is unambiguously separate from * value_typeclass because value_typeclass's presence is strictly determined by - * value_type). + * value_type). The contents of this initial hashmap or Map instance are + * shallow-copied into the new Map: the original map is unmodified, but + * references to underlying objects will be shared if the value type is a + * message type. */ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { Map* self = ruby_to_Map(_self); @@ -385,8 +388,7 @@ VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) { native_slot_set(self->value_type, self->value_type_class, mem, value); // Replace any existing value by issuing a 'remove' operation first. - upb_value oldv; - upb_strtable_remove2(&self->table, keyval, length, &oldv); + upb_strtable_remove2(&self->table, keyval, length, NULL); if (!upb_strtable_insert2(&self->table, keyval, length, v)) { rb_raise(rb_eRuntimeError, "Could not insert into table"); } @@ -410,8 +412,7 @@ VALUE Map_has_key(VALUE _self, VALUE key) { size_t length = 0; table_key(self, key, keybuf, &keyval, &length); - upb_value v; - if (upb_strtable_lookup2(&self->table, keyval, length, &v)) { + if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) { return Qtrue; } else { return Qfalse; @@ -468,7 +469,7 @@ VALUE Map_clear(VALUE _self) { */ VALUE Map_length(VALUE _self) { Map* self = ruby_to_Map(_self); - return INT2NUM(upb_strtable_count(&self->table)); + return ULL2NUM(upb_strtable_count(&self->table)); } static VALUE Map_new_this_type(VALUE _self) { @@ -612,21 +613,6 @@ VALUE Map_eq(VALUE _self, VALUE _other) { } } - // For each member of other, check that a member exists at the same key in - // self. We don't need to compare values here -- if the key exists in both, we - // compared values above; if not, we already know that the maps are not equal. - for (upb_strtable_begin(&it, &other->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_value v; - if (!upb_strtable_lookup2(&self->table, - upb_strtable_iter_key(&it), - upb_strtable_iter_keylength(&it), - &v)) { - return Qfalse; - } - } - return Qtrue; } -- cgit v1.2.3