From fd1a3ff11d5854c34ba66c63598cdc5fd234e399 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 6 Jan 2015 15:44:09 -0800 Subject: Support for maps in the MRI C Ruby extension. This adds the Map container and support for parsing and serializing maps in the protobuf wire format (as defined by the C++ implementation, with MapEntry submessages in a repeated field). JSON map serialization/parsing are not yet supported as these will require some changes to upb as well. --- ruby/ext/google/protobuf_c/storage.c | 173 +++++++++++++++++++++++++++-------- 1 file changed, 135 insertions(+), 38 deletions(-) (limited to 'ruby/ext/google/protobuf_c/storage.c') diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index c4d801af..f20ddec2 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -57,7 +57,17 @@ size_t native_slot_size(upb_fieldtype_t type) { } } -static void check_int_range_precision(upb_fieldtype_t type, VALUE val) { +static bool is_ruby_num(VALUE value) { + return (TYPE(value) == T_FLOAT || + TYPE(value) == T_FIXNUM || + TYPE(value) == T_BIGNUM); +} + +void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) { + if (!is_ruby_num(val)) { + rb_raise(rb_eTypeError, "Expected number type for integral field."); + } + // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper // bound; we just need to do precision checks (i.e., disallow rounding) and // check for < 0 on unsigned types. @@ -76,12 +86,6 @@ static void check_int_range_precision(upb_fieldtype_t type, VALUE val) { } } -static bool is_ruby_num(VALUE value) { - return (TYPE(value) == T_FLOAT || - TYPE(value) == T_FIXNUM || - TYPE(value) == T_BIGNUM); -} - void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) { bool bad_encoding = false; rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value)); @@ -156,14 +160,14 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, int32_t int_val = 0; if (TYPE(value) == T_SYMBOL) { // Ensure that the given symbol exists in the enum module. - VALUE lookup = rb_const_get(type_class, SYM2ID(value)); + VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value); if (lookup == Qnil) { rb_raise(rb_eRangeError, "Unknown symbol value for enum field."); } else { int_val = NUM2INT(lookup); } } else { - check_int_range_precision(UPB_TYPE_INT32, value); + native_slot_check_int_range_precision(UPB_TYPE_INT32, value); int_val = NUM2INT(value); } DEREF(memory, int32_t) = int_val; @@ -173,10 +177,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, case UPB_TYPE_INT64: case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: - if (!is_ruby_num(value)) { - rb_raise(rb_eTypeError, "Expected number type for integral field."); - } - check_int_range_precision(type, value); + native_slot_check_int_range_precision(type, value); switch (type) { case UPB_TYPE_INT32: DEREF(memory, int32_t) = NUM2INT(value); @@ -246,8 +247,9 @@ void native_slot_init(upb_fieldtype_t type, void* memory) { break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: - // TODO(cfallin): set encoding appropriately DEREF(memory, VALUE) = rb_str_new2(""); + rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ? + kRubyString8bitEncoding : kRubyStringUtf8Encoding); break; case UPB_TYPE_MESSAGE: DEREF(memory, VALUE) = Qnil; @@ -321,6 +323,35 @@ bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) { } } +// ----------------------------------------------------------------------------- +// Map field utilities. +// ----------------------------------------------------------------------------- + +bool is_map_field(const upb_fielddef* field) { + if (upb_fielddef_label(field) != UPB_LABEL_REPEATED || + upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { + return false; + } + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + return upb_msgdef_mapentry(subdef); +} + +const upb_fielddef* map_field_key(const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_fielddef* key_field = upb_msgdef_itof(subdef, 1); + assert(key_field != NULL); + return key_field; +} + +const upb_fielddef* map_field_value(const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_fielddef* value_field = upb_msgdef_itof(subdef, 2); + assert(value_field != NULL); + return value_field; +} + // ----------------------------------------------------------------------------- // Memory layout management. // ----------------------------------------------------------------------------- @@ -334,9 +365,12 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { size_t off = 0; for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - size_t field_size = - (upb_fielddef_label(field) == UPB_LABEL_REPEATED) ? - sizeof(VALUE) : native_slot_size(upb_fielddef_type(field)); + size_t field_size = 0; + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + field_size = sizeof(VALUE); + } else { + field_size = native_slot_size(upb_fielddef_type(field)); + } // align current offset off = (off + field_size - 1) & ~(field_size - 1); layout->offsets[upb_fielddef_index(field)] = off; @@ -357,7 +391,7 @@ void free_layout(MessageLayout* layout) { xfree(layout); } -static VALUE get_type_class(const upb_fielddef* field) { +VALUE field_type_class(const upb_fielddef* field) { VALUE type_class = Qnil; if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { VALUE submsgdesc = @@ -380,7 +414,7 @@ VALUE layout_get(MessageLayout* layout, return *((VALUE *)memory); } else { return native_slot_get(upb_fielddef_type(field), - get_type_class(field), + field_type_class(field), memory); } } @@ -398,9 +432,8 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) { rb_raise(rb_eTypeError, "Repeated field array has wrong element type"); } - if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE || - upb_fielddef_type(field) == UPB_TYPE_ENUM) { - RepeatedField* self = ruby_to_RepeatedField(val); + if (self->field_type == UPB_TYPE_MESSAGE || + self->field_type == UPB_TYPE_ENUM) { if (self->field_type_class != get_def_obj(upb_fielddef_subdef(field))) { rb_raise(rb_eTypeError, @@ -409,17 +442,48 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) { } } +static void check_map_field_type(VALUE val, const upb_fielddef* field) { + assert(is_map_field(field)); + const upb_fielddef* key_field = map_field_key(field); + const upb_fielddef* value_field = map_field_value(field); + + if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || + RTYPEDDATA_TYPE(val) != &Map_type) { + rb_raise(rb_eTypeError, "Expected Map instance"); + } + + Map* self = ruby_to_Map(val); + if (self->key_type != upb_fielddef_type(key_field)) { + rb_raise(rb_eTypeError, "Map key type does not match field's key type"); + } + if (self->value_type != upb_fielddef_type(value_field)) { + rb_raise(rb_eTypeError, "Map value type does not match field's value type"); + } + if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE || + upb_fielddef_type(value_field) == UPB_TYPE_ENUM) { + if (self->value_type_class != + get_def_obj(upb_fielddef_subdef(value_field))) { + rb_raise(rb_eTypeError, + "Map value type has wrong message/enum class"); + } + } +} + + void layout_set(MessageLayout* layout, void* storage, const upb_fielddef* field, VALUE val) { void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (is_map_field(field)) { + check_map_field_type(val, field); + DEREF(memory, VALUE) = val; + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { check_repeated_field_type(val, field); - *((VALUE *)memory) = val; + DEREF(memory, VALUE) = val; } else { - native_slot_set(upb_fielddef_type(field), get_type_class(field), + native_slot_set(upb_fielddef_type(field), field_type_class(field), memory, val); } } @@ -434,9 +498,34 @@ void layout_init(MessageLayout* layout, void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (is_map_field(field)) { + VALUE map = Qnil; + + const upb_fielddef* key_field = map_field_key(field); + const upb_fielddef* value_field = map_field_value(field); + VALUE type_class = field_type_class(value_field); + + if (type_class != Qnil) { + VALUE args[3] = { + fieldtype_to_ruby(upb_fielddef_type(key_field)), + fieldtype_to_ruby(upb_fielddef_type(value_field)), + type_class, + }; + map = rb_class_new_instance(3, args, cMap); + } else { + VALUE args[2] = { + fieldtype_to_ruby(upb_fielddef_type(key_field)), + fieldtype_to_ruby(upb_fielddef_type(value_field)), + }; + map = rb_class_new_instance(2, args, cMap); + } + + DEREF(memory, VALUE) = map; + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { VALUE ary = Qnil; - VALUE type_class = get_type_class(field); + + VALUE type_class = field_type_class(field); + if (type_class != Qnil) { VALUE args[2] = { fieldtype_to_ruby(upb_fielddef_type(field)), @@ -447,7 +536,8 @@ void layout_init(MessageLayout* layout, VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) }; ary = rb_class_new_instance(1, args, cRepeatedField); } - *((VALUE *)memory) = ary; + + DEREF(memory, VALUE) = ary; } else { native_slot_init(upb_fielddef_type(field), memory); } @@ -464,7 +554,7 @@ void layout_mark(MessageLayout* layout, void* storage) { layout->offsets[upb_fielddef_index(field)]; if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - rb_gc_mark(*((VALUE *)memory)); + rb_gc_mark(DEREF(memory, VALUE)); } else { native_slot_mark(upb_fielddef_type(field), memory); } @@ -482,8 +572,10 @@ void layout_dup(MessageLayout* layout, void* to, void* from) { void* from_memory = ((uint8_t *)from) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - *((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory)); + if (is_map_field(field)) { + DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE)); } else { native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); } @@ -501,8 +593,12 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) { void* from_memory = ((uint8_t *)from) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - *((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory)); + if (is_map_field(field)) { + DEREF(to_memory, VALUE) = + Map_deep_copy(DEREF(from_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + DEREF(to_memory, VALUE) = + RepeatedField_deep_copy(DEREF(from_memory, VALUE)); } else { native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory); } @@ -520,11 +616,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { void* msg2_memory = ((uint8_t *)msg2) + layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - if (RepeatedField_eq(*((VALUE *)msg1_memory), - *((VALUE *)msg2_memory)) == Qfalse) { - return Qfalse; - } + if (is_map_field(field)) { + return Map_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE)); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + return RepeatedField_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE)); } else { if (!native_slot_eq(upb_fielddef_type(field), msg1_memory, msg2_memory)) { -- cgit v1.2.3 From 80276ac0218f6d8fcdbad0fb09b233b31d2bc0fb Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 6 Jan 2015 18:01:32 -0800 Subject: Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 11 ++++---- ruby/ext/google/protobuf_c/encode_decode.c | 42 ++++++++++++++---------------- ruby/ext/google/protobuf_c/map.c | 11 ++++++++ ruby/ext/google/protobuf_c/message.c | 2 +- ruby/ext/google/protobuf_c/protobuf.h | 9 +++++++ ruby/ext/google/protobuf_c/storage.c | 16 +++++++++--- 6 files changed, 59 insertions(+), 32 deletions(-) (limited to 'ruby/ext/google/protobuf_c/storage.c') diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 5c94a74a..499e041b 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -1077,11 +1077,12 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { * MessageBuilderContext.map(name, key_type, value_type, number, * value_type_class = nil) * - * Defines a new map field on this message type with the given key and value types, tag - * number, and type class (for message and enum value types). The key type must - * be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type type must - * be a Ruby symbol (as accepted by FieldDescriptor#type=) and the type_class - * must be a string, if present (as accepted by FieldDescriptor#submsg_name=). + * Defines a new map field on this message type with the given key and value + * types, tag number, and type class (for message and enum value types). The key + * type must be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type + * type must be a Ruby symbol (as accepted by FieldDescriptor#type=) and the + * type_class must be a string, if present (as accepted by + * FieldDescriptor#submsg_name=). */ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { DEFINE_SELF(MessageBuilderContext, self, _self); diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 6263edcc..f1b951fc 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -177,8 +177,8 @@ static void *submsg_handler(void *closure, const void *hd) { // Handler data for startmap/endmap handlers. typedef struct { size_t ofs; - const upb_fielddef* key_field; - const upb_fielddef* value_field; + upb_fieldtype_t key_field_type; + upb_fieldtype_t value_field_type; VALUE value_field_typeclass; } map_handlerdata_t; @@ -194,12 +194,6 @@ typedef struct { char value_storage[NATIVE_SLOT_MAX_SIZE]; } map_parse_frame_t; -// Handler to begin a sequence of map entries: simple no-op that exists only to -// set context for the map entry handlers. -static void *startmap_handler(void *closure, const void *hd) { - return closure; -} - // Handler to begin a map entry: allocates a temporary frame. This is the // 'startsubmsg' handler on the msgdef that contains the map field. static void *startmapentry_handler(void *closure, const void *hd) { @@ -210,10 +204,8 @@ static void *startmapentry_handler(void *closure, const void *hd) { map_parse_frame_t* frame = ALLOC(map_parse_frame_t); frame->map = map_rb; - native_slot_init(upb_fielddef_type(mapdata->key_field), - &frame->key_storage); - native_slot_init(upb_fielddef_type(mapdata->value_field), - &frame->value_storage); + native_slot_init(mapdata->key_field_type, &frame->key_storage); + native_slot_init(mapdata->value_field_type, &frame->value_storage); return frame; } @@ -225,10 +217,10 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { const map_handlerdata_t* mapdata = hd; VALUE key = native_slot_get( - upb_fielddef_type(mapdata->key_field), Qnil, + mapdata->key_field_type, Qnil, &frame->key_storage); VALUE value = native_slot_get( - upb_fielddef_type(mapdata->value_field), mapdata->value_field_typeclass, + mapdata->value_field_type, mapdata->value_field_typeclass, &frame->value_storage); Map_index_set(frame->map, key, value); @@ -250,11 +242,15 @@ static map_handlerdata_t* new_map_handlerdata( map_handlerdata_t* hd = ALLOC(map_handlerdata_t); hd->ofs = ofs; - hd->key_field = upb_msgdef_itof(mapentry_def, 1); - assert(hd->key_field != NULL); - hd->value_field = upb_msgdef_itof(mapentry_def, 2); - assert(hd->value_field != NULL); - hd->value_field_typeclass = field_type_class(hd->value_field); + const upb_fielddef* key_field = upb_msgdef_itof(mapentry_def, + MAP_KEY_FIELD); + assert(key_field != NULL); + hd->key_field_type = upb_fielddef_type(key_field); + const upb_fielddef* value_field = upb_msgdef_itof(mapentry_def, + MAP_VALUE_FIELD); + assert(value_field != NULL); + hd->value_field_type = upb_fielddef_type(value_field); + hd->value_field_typeclass = field_type_class(value_field); return hd; } @@ -293,6 +289,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h, appendbytes_handler : appendstr_handler, NULL); upb_handlers_setstring(h, f, stringdata_handler, NULL); + break; } case UPB_TYPE_MESSAGE: { upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; @@ -352,7 +349,6 @@ static void add_handlers_for_mapfield(upb_handlers* h, upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); - upb_handlers_setstartseq(h, fielddef, startmap_handler, &attr); upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr); upb_handlerattr_uninit(&attr); } @@ -360,6 +356,8 @@ static void add_handlers_for_mapfield(upb_handlers* h, // Adds handlers to a map-entry msgdef. static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h) { + const upb_fielddef* key_field = map_entry_key(msgdef); + const upb_fielddef* value_field = map_entry_value(msgdef); map_handlerdata_t* hd = new_map_handlerdata(0, msgdef); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; @@ -367,7 +365,7 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers_setendmsg(h, endmap_handler, &attr); add_handlers_for_singular_field( - h, hd->key_field, + h, key_field, // Convert the offset into map_parse_frame_t to an offset understood by the // singular field handlers, so that we don't have to use special // map-key/value-specific handlers. The ordinary singular field handlers expect @@ -375,7 +373,7 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, // we compensate for that addition. offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader)); add_handlers_for_singular_field( - h, hd->value_field, + h, value_field, offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader)); } diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 9d5de9cd..56548799 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -32,6 +32,17 @@ // ----------------------------------------------------------------------------- // Basic map operations on top of upb's strtable. +// +// Note that we roll our own `Map` container here because, as for +// `RepeatedField`, we want a strongly-typed container. This is so that any user +// errors due to incorrect map key or value types are raised as close as +// possible to the error site, rather than at some deferred point (e.g., +// serialization). +// +// We build our `Map` on top of upb_strtable so that we're able to take +// advantage of the native_slot storage abstraction, as RepeatedField does. +// (This is not quite a perfect mapping -- see the key conversions below -- but +// gives us full support and error-checking for all value types for free.) // ----------------------------------------------------------------------------- // Map values are stored using the native_slot abstraction (as with repeated diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index de38dd7b..ee8881d4 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -142,7 +142,7 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { if (is_map_field(f)) { if (TYPE(val) != T_HASH) { rb_raise(rb_eArgError, - "Expected hashmap as initializer value for map field."); + "Expected Hash object as initializer value for map field."); } VALUE map = layout_get(self->descriptor->layout, Message_data(self), f); Map_merge_into_self(map, val); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 91a97a68..6dac6029 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -268,10 +268,19 @@ extern rb_encoding* kRubyString8bitEncoding; VALUE field_type_class(const upb_fielddef* field); +#define MAP_KEY_FIELD 1 +#define MAP_VALUE_FIELD 2 + +// These operate on a map field (i.e., a repeated field of submessages whose +// submessage type is a map-entry msgdef). bool is_map_field(const upb_fielddef* field); const upb_fielddef* map_field_key(const upb_fielddef* field); const upb_fielddef* map_field_value(const upb_fielddef* field); +// These operate on a map-entry msgdef. +const upb_fielddef* map_entry_key(const upb_msgdef* msgdef); +const upb_fielddef* map_entry_value(const upb_msgdef* msgdef); + // ----------------------------------------------------------------------------- // Repeated field container type. // ----------------------------------------------------------------------------- diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index f20ddec2..235fbff2 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -339,15 +339,23 @@ bool is_map_field(const upb_fielddef* field) { const upb_fielddef* map_field_key(const upb_fielddef* field) { assert(is_map_field(field)); const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); - const upb_fielddef* key_field = upb_msgdef_itof(subdef, 1); - assert(key_field != NULL); - return key_field; + return map_entry_key(subdef); } const upb_fielddef* map_field_value(const upb_fielddef* field) { assert(is_map_field(field)); const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); - const upb_fielddef* value_field = upb_msgdef_itof(subdef, 2); + return map_entry_value(subdef); +} + +const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) { + const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD); + assert(key_field != NULL); + return key_field; +} + +const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) { + const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD); assert(value_field != NULL); return value_field; } -- cgit v1.2.3 From 4c92289766d76f276b322ab254ef039f670f41b1 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 9 Jan 2015 15:29:45 -0800 Subject: Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 2 + ruby/ext/google/protobuf_c/encode_decode.c | 66 ++++++++--------- ruby/ext/google/protobuf_c/map.c | 111 +++++------------------------ ruby/ext/google/protobuf_c/protobuf.h | 8 ++- ruby/ext/google/protobuf_c/storage.c | 14 ++-- ruby/ext/google/protobuf_c/upb.c | 6 +- 6 files changed, 72 insertions(+), 135 deletions(-) (limited to 'ruby/ext/google/protobuf_c/storage.c') diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 499e041b..a18aaac4 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -226,6 +226,7 @@ DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor"); void Descriptor_mark(void* _self) { Descriptor* self = _self; rb_gc_mark(self->klass); + rb_gc_mark(self->typeclass_references); } void Descriptor_free(void* _self) { @@ -270,6 +271,7 @@ VALUE Descriptor_alloc(VALUE klass) { self->fill_method = NULL; self->pb_serialize_handlers = NULL; self->json_serialize_handlers = NULL; + self->typeclass_references = rb_ary_new(); return ret; } diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index f1b951fc..1cff9049 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -64,7 +64,7 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, static void *startseq_handler(void* closure, const void* hd) { MessageHeader* msg = closure; const size_t *ofs = hd; - return (void*)DEREF(Message_data(msg), *ofs, VALUE); + return (void*)DEREF(msg, *ofs, VALUE); } // Handlers that append primitive values to a repeated field (a regular Ruby @@ -115,7 +115,7 @@ static void* str_handler(void *closure, const size_t *ofs = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyStringUtf8Encoding); - DEREF(Message_data(msg), *ofs, VALUE) = str; + DEREF(msg, *ofs, VALUE) = str; return (void*)str; } @@ -127,7 +127,7 @@ static void* bytes_handler(void *closure, const size_t *ofs = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyString8bitEncoding); - DEREF(Message_data(msg), *ofs, VALUE) = str; + DEREF(msg, *ofs, VALUE) = str; return (void*)str; } @@ -163,12 +163,12 @@ static void *submsg_handler(void *closure, const void *hd) { get_def_obj((void*)submsgdata->md); VALUE subklass = Descriptor_msgclass(subdesc); - if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) { - DEREF(Message_data(msg), submsgdata->ofs, VALUE) = + if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { + DEREF(msg, submsgdata->ofs, VALUE) = rb_class_new_instance(0, NULL, subklass); } - VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE); + VALUE submsg_rb = DEREF(msg, submsgdata->ofs, VALUE); MessageHeader* submsg; TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); return submsg; @@ -199,7 +199,7 @@ typedef struct { static void *startmapentry_handler(void *closure, const void *hd) { MessageHeader* msg = closure; const map_handlerdata_t* mapdata = hd; - VALUE map_rb = DEREF(Message_data(msg), mapdata->ofs, VALUE); + VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE); map_parse_frame_t* frame = ALLOC(map_parse_frame_t); frame->map = map_rb; @@ -238,7 +238,8 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { // pass the handlerdata down to the sub-message handler setup. static map_handlerdata_t* new_map_handlerdata( size_t ofs, - const upb_msgdef* mapentry_def) { + const upb_msgdef* mapentry_def, + Descriptor* desc) { map_handlerdata_t* hd = ALLOC(map_handlerdata_t); hd->ofs = ofs; @@ -252,6 +253,11 @@ static map_handlerdata_t* new_map_handlerdata( hd->value_field_type = upb_fielddef_type(value_field); hd->value_field_typeclass = field_type_class(value_field); + // Ensure that value_field_typeclass is properly GC-rooted. + if (hd->value_field_typeclass != Qnil) { + rb_ary_push(desc->typeclass_references, hd->value_field_typeclass); + } + return hd; } @@ -314,9 +320,7 @@ static void add_handlers_for_singular_field(upb_handlers *h, case UPB_TYPE_INT64: case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: - // The shim writes directly at the given offset (instead of using - // DEREF()) so we need to add the msg overhead. - upb_shim_set(h, f, offset + sizeof(MessageHeader), -1); + upb_shim_set(h, f, offset, -1); break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: { @@ -343,9 +347,10 @@ static void add_handlers_for_singular_field(upb_handlers *h, // Adds handlers to a map field. static void add_handlers_for_mapfield(upb_handlers* h, const upb_fielddef* fielddef, - size_t offset) { + size_t offset, + Descriptor* desc) { const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef); - map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef); + map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); @@ -355,10 +360,11 @@ static void add_handlers_for_mapfield(upb_handlers* h, // Adds handlers to a map-entry msgdef. static void add_handlers_for_mapentry(const upb_msgdef* msgdef, - upb_handlers* h) { + upb_handlers* h, + Descriptor* desc) { const upb_fielddef* key_field = map_entry_key(msgdef); const upb_fielddef* value_field = map_entry_value(msgdef); - map_handlerdata_t* hd = new_map_handlerdata(0, msgdef); + map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc); upb_handlers_addcleanup(h, hd, free); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, hd); @@ -366,15 +372,10 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, add_handlers_for_singular_field( h, key_field, - // Convert the offset into map_parse_frame_t to an offset understood by the - // singular field handlers, so that we don't have to use special - // map-key/value-specific handlers. The ordinary singular field handlers expect - // a Message* and assume offset is relative to the data section at the end, so - // we compensate for that addition. - offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader)); + offsetof(map_parse_frame_t, key_storage)); add_handlers_for_singular_field( h, value_field, - offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader)); + offsetof(map_parse_frame_t, value_storage)); } static void add_handlers_for_message(const void *closure, upb_handlers *h) { @@ -384,7 +385,7 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { // If this is a mapentry message type, set up a special set of handlers and // bail out of the normal (user-defined) message type handling. if (upb_msgdef_mapentry(msgdef)) { - add_handlers_for_mapentry(msgdef, h); + add_handlers_for_mapentry(msgdef, h, desc); return; } @@ -402,10 +403,11 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - size_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + size_t offset = desc->layout->offsets[upb_fielddef_index(f)] + + sizeof(MessageHeader); if (is_map_field(f)) { - add_handlers_for_mapfield(h, f, offset); + add_handlers_for_mapfield(h, f, offset, desc); } else if (upb_fielddef_isseq(f)) { add_handlers_for_repeated_field(h, f, offset); } else { @@ -796,38 +798,38 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, MessageHeader* msg; TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - void* msg_data = Message_data(msg); upb_msg_iter i; for (upb_msg_begin(&i, desc->msgdef); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); - uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + uint32_t offset = + desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader); if (is_map_field(f)) { - VALUE map = DEREF(msg_data, offset, VALUE); + VALUE map = DEREF(msg, offset, VALUE); if (map != Qnil) { putmap(map, f, sink, depth); } } else if (upb_fielddef_isseq(f)) { - VALUE ary = DEREF(msg_data, offset, VALUE); + VALUE ary = DEREF(msg, offset, VALUE); if (ary != Qnil) { putary(ary, f, sink, depth); } } else if (upb_fielddef_isstring(f)) { - VALUE str = DEREF(msg_data, offset, VALUE); + VALUE str = DEREF(msg, offset, VALUE); if (RSTRING_LEN(str) > 0) { putstr(str, f, sink); } } else if (upb_fielddef_issubmsg(f)) { - putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth); + putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth); } else { upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); #define T(upbtypeconst, upbtype, ctype, default_value) \ case upbtypeconst: { \ - ctype value = DEREF(msg_data, offset, ctype); \ + ctype value = DEREF(msg, offset, ctype); \ if (value != default_value) { \ upb_sink_put##upbtype(sink, sel, value); \ } \ diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 56548799..2c2ae240 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -49,8 +49,14 @@ // field values), but keys are a bit special. Since we use a strtable, we need // to store keys as sequences of bytes such that equality of those bytes maps // one-to-one to equality of keys. We store strings directly (i.e., they map to -// their own bytes) and integers as sequences of either 4 or 8 bytes in -// host-byte-order as either a uint32_t or a uint64_t. +// their own bytes) and integers as native integers (using the native_slot +// abstraction). + +// Note that there is another tradeoff here in keeping string keys as native +// strings rather than Ruby strings: traversing the Map requires conversion to +// Ruby string values on every traversal, potentially creating more garbage. We +// should consider ways to cache a Ruby version of the key if this becomes an +// issue later. // Forms a key to use with the underlying strtable from a Ruby key value. |buf| // must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to @@ -73,61 +79,13 @@ static void table_key(Map* self, VALUE key, case UPB_TYPE_BOOL: case UPB_TYPE_INT32: - case UPB_TYPE_INT64: { - // Signed numeric types: use an int64 in host-native byte order. - int64_t key_val = 0; - - // Do a range/value check. - switch (self->key_type) { - case UPB_TYPE_BOOL: - if (key != Qtrue && key != Qfalse) { - rb_raise(rb_eTypeError, "Key must be true or false"); - } - key_val = (key == Qtrue) ? 1 : 0; - break; - case UPB_TYPE_INT32: - native_slot_check_int_range_precision(self->key_type, key); - key_val = NUM2INT(key); - break; - case UPB_TYPE_INT64: - native_slot_check_int_range_precision(self->key_type, key); - key_val = NUM2LL(key); - break; - default: - break; - } - - int64_t* int64_key = (int64_t*)buf; - *int64_key = key_val; - *out_key = buf; - *out_length = sizeof(int64_t); - break; - } - + case UPB_TYPE_INT64: case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: { - // Unsigned numeric types: use a uint64 in host-native byte order. - uint64_t key_val = 0; - - // Do a range/value check. - native_slot_check_int_range_precision(self->key_type, key); - switch (self->key_type) { - case UPB_TYPE_UINT32: - key_val = NUM2UINT(key); - break; - case UPB_TYPE_UINT64: - key_val = NUM2ULL(key); - break; - default: - break; - } - - uint64_t* uint64_key = (uint64_t*)buf; - *uint64_key = key_val; + case UPB_TYPE_UINT64: + native_slot_set(self->key_type, Qnil, buf, key); *out_key = buf; - *out_length = sizeof(uint64_t); + *out_length = native_slot_size(self->key_type); break; - } default: // Map constructor should not allow a Map with another key type to be @@ -148,50 +106,16 @@ static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) { return ret; } - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: { - assert(length == sizeof(int64_t)); - int64_t* int64_key = (int64_t*)buf; - - if (self->key_type == UPB_TYPE_BOOL) { - return *int64_key ? Qtrue : Qfalse; - } else { - return LL2NUM(*int64_key); - } - } - - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: { - assert(length == sizeof(uint64_t)); - uint64_t* uint64_key = (uint64_t*)buf; - return ULL2NUM(*uint64_key); - } - - default: - assert(false); - return Qnil; - } -} - -static upb_ctype_t upb_table_value_type(upb_fieldtype_t value_type) { - switch (value_type) { case UPB_TYPE_BOOL: case UPB_TYPE_INT32: case UPB_TYPE_INT64: case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - return UPB_CTYPE_UINT64; + return native_slot_get(self->key_type, Qnil, buf); default: assert(false); - return 0; + return Qnil; } } @@ -321,7 +245,9 @@ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { init_value_arg = 3; } - if (!upb_strtable_init(&self->table, upb_table_value_type(self->value_type))) { + // Table value type is always UINT64: this ensures enough space to store the + // native_slot value. + if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) { rb_raise(rb_eRuntimeError, "Could not allocate table."); } @@ -528,8 +454,7 @@ VALUE Map_clear(VALUE _self) { // Uninit and reinit the table -- this is faster than iterating and doing a // delete-lookup on each key. upb_strtable_uninit(&self->table); - if (!upb_strtable_init(&self->table, - upb_table_value_type(self->value_type))) { + if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) { rb_raise(rb_eRuntimeError, "Unable to re-initialize table"); } return Qnil; diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 6dac6029..88ae62e4 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -110,6 +110,10 @@ struct Descriptor { const upb_pbdecodermethod* fill_method; const upb_handlers* pb_serialize_handlers; const upb_handlers* json_serialize_handlers; + // Handlers hold type class references for sub-message fields directly in some + // cases. We need to keep these rooted because they might otherwise be + // collected. + VALUE typeclass_references; }; struct FieldDescriptor { @@ -252,7 +256,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE value); VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, - void* memory); + const void* memory); void native_slot_init(upb_fieldtype_t type, void* memory); void native_slot_mark(upb_fieldtype_t type, void* memory); void native_slot_dup(upb_fieldtype_t type, void* to, void* from); @@ -389,7 +393,7 @@ struct MessageLayout { MessageLayout* create_layout(const upb_msgdef* msgdef); void free_layout(MessageLayout* layout); VALUE layout_get(MessageLayout* layout, - void* storage, + const void* storage, const upb_fielddef* field); void layout_set(MessageLayout* layout, void* storage, diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index 235fbff2..14f49d44 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -200,7 +200,9 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, } } -VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) { +VALUE native_slot_get(upb_fieldtype_t type, + VALUE type_class, + const void* memory) { switch (type) { case UPB_TYPE_FLOAT: return DBL2NUM(DEREF(memory, float)); @@ -211,7 +213,7 @@ VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) { case UPB_TYPE_STRING: case UPB_TYPE_BYTES: case UPB_TYPE_MESSAGE: - return *((VALUE *)memory); + return DEREF(memory, VALUE); case UPB_TYPE_ENUM: { int32_t val = DEREF(memory, int32_t); VALUE symbol = enum_lookup(type_class, INT2NUM(val)); @@ -332,19 +334,19 @@ bool is_map_field(const upb_fielddef* field) { upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { return false; } - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return upb_msgdef_mapentry(subdef); } const upb_fielddef* map_field_key(const upb_fielddef* field) { assert(is_map_field(field)); - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return map_entry_key(subdef); } const upb_fielddef* map_field_value(const upb_fielddef* field) { assert(is_map_field(field)); - const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field); + const upb_msgdef* subdef = upb_fielddef_msgsubdef(field); return map_entry_value(subdef); } @@ -414,7 +416,7 @@ VALUE field_type_class(const upb_fielddef* field) { } VALUE layout_get(MessageLayout* layout, - void* storage, + const void* storage, const upb_fielddef* field) { void* memory = ((uint8_t *)storage) + layout->offsets[upb_fielddef_index(field)]; diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index 0015aad1..abea7711 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -3420,8 +3420,10 @@ char *upb_strdup2(const char *s, size_t len) { // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, len); - p[len] = 0; + if (p) { + memcpy(p, s, len); + p[len] = 0; + } return p; } -- cgit v1.2.3