diff options
Diffstat (limited to 'ruby/ext/google/protobuf_c/encode_decode.c')
-rw-r--r-- | ruby/ext/google/protobuf_c/encode_decode.c | 299 |
1 files changed, 192 insertions, 107 deletions
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index d86a1145..12080d03 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -44,6 +44,56 @@ VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) { return rb_str; } +// The code below also comes from upb's prototype Ruby binding, developed by +// haberman@. + +/* stringsink *****************************************************************/ + +static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { + stringsink *sink = _sink; + sink->len = 0; + return sink; +} + +static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, + size_t len, const upb_bufhandle *handle) { + stringsink *sink = _sink; + size_t new_size = sink->size; + + UPB_UNUSED(hd); + UPB_UNUSED(handle); + + while (sink->len + len > new_size) { + new_size *= 2; + } + + if (new_size != sink->size) { + sink->ptr = realloc(sink->ptr, new_size); + sink->size = new_size; + } + + memcpy(sink->ptr + sink->len, ptr, len); + sink->len += len; + + return len; +} + +void stringsink_init(stringsink *sink) { + upb_byteshandler_init(&sink->handler); + upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); + upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); + + upb_bytessink_reset(&sink->sink, &sink->handler, sink); + + sink->size = 32; + sink->ptr = malloc(sink->size); + sink->len = 0; +} + +void stringsink_uninit(stringsink *sink) { + free(sink->ptr); +} + // ----------------------------------------------------------------------------- // Parsing. // ----------------------------------------------------------------------------- @@ -280,11 +330,6 @@ rb_data_type_t MapParseFrame_type = { { MapParseFrame_mark, MapParseFrame_free, NULL }, }; -// Array of Ruby objects wrapping map_parse_frame_t. -// We don't allow multiple concurrent decodes, so we assume that this global -// variable is specific to the "current" decode. -VALUE map_parse_frames; - static map_parse_frame_t* map_push_frame(VALUE map, const map_handlerdata_t* handlerdata) { map_parse_frame_t* frame = ALLOC(map_parse_frame_t); @@ -293,16 +338,12 @@ static map_parse_frame_t* map_push_frame(VALUE map, native_slot_init(handlerdata->key_field_type, &frame->key_storage); native_slot_init(handlerdata->value_field_type, &frame->value_storage); - rb_ary_push(map_parse_frames, + Map_set_frame(map, TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame)); return frame; } -static void map_pop_frame() { - rb_ary_pop(map_parse_frames); -} - // Handler to begin a map entry: allocates a temporary frame. This is the // 'startsubmsg' handler on the msgdef that contains the map field. static void *startmapentry_handler(void *closure, const void *hd) { @@ -336,7 +377,7 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { &frame->value_storage); Map_index_set(frame->map, key, value); - map_pop_frame(); + Map_set_frame(frame->map, Qnil); return true; } @@ -622,6 +663,20 @@ static void add_handlers_for_oneof_field(upb_handlers *h, upb_handlerattr_uninit(&attr); } +static bool unknown_field_handler(void* closure, const void* hd, + const char* buf, size_t size) { + UPB_UNUSED(hd); + + MessageHeader* msg = (MessageHeader*)closure; + if (msg->unknown_fields == NULL) { + msg->unknown_fields = malloc(sizeof(stringsink)); + stringsink_init(msg->unknown_fields); + } + + stringsink_string(msg->unknown_fields, NULL, buf, size, NULL); + + return true; +} static void add_handlers_for_message(const void *closure, upb_handlers *h) { const upb_msgdef* msgdef = upb_handlers_msgdef(h); @@ -643,6 +698,9 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { desc->layout = create_layout(desc->msgdef); } + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlers_setunknown(h, unknown_field_handler, &attr); + for (upb_msg_field_begin(&i, desc->msgdef); !upb_msg_field_done(&i); upb_msg_field_next(&i)) { @@ -775,10 +833,6 @@ VALUE Message_decode(VALUE klass, VALUE data) { msg_rb = rb_class_new_instance(0, NULL, msgklass); TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - // We generally expect this to be clear already, but clear it in case parsing - // previously got interrupted somehow. - rb_ary_clear(map_parse_frames); - { const upb_pbdecodermethod* method = msgdef_decodermethod(desc); const upb_handlers* h = upb_pbdecodermethod_desthandlers(method); @@ -823,10 +877,6 @@ VALUE Message_decode_json(VALUE klass, VALUE data) { msg_rb = rb_class_new_instance(0, NULL, msgklass); TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - // We generally expect this to be clear already, but clear it in case parsing - // previously got interrupted somehow. - rb_ary_clear(map_parse_frames); - { const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc); stackenv se; @@ -848,65 +898,6 @@ VALUE Message_decode_json(VALUE klass, VALUE data) { // ----------------------------------------------------------------------------- // Serializing. // ----------------------------------------------------------------------------- -// -// The code below also comes from upb's prototype Ruby binding, developed by -// haberman@. - -/* stringsink *****************************************************************/ - -// This should probably be factored into a common upb component. - -typedef struct { - upb_byteshandler handler; - upb_bytessink sink; - char *ptr; - size_t len, size; -} stringsink; - -static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { - stringsink *sink = _sink; - sink->len = 0; - return sink; -} - -static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, - size_t len, const upb_bufhandle *handle) { - stringsink *sink = _sink; - size_t new_size = sink->size; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - while (sink->len + len > new_size) { - new_size *= 2; - } - - if (new_size != sink->size) { - sink->ptr = realloc(sink->ptr, new_size); - sink->size = new_size; - } - - memcpy(sink->ptr + sink->len, ptr, len); - sink->len += len; - - return len; -} - -void stringsink_init(stringsink *sink) { - upb_byteshandler_init(&sink->handler); - upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); - upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); - - upb_bytessink_reset(&sink->sink, &sink->handler, sink); - - sink->size = 32; - sink->ptr = malloc(sink->size); - sink->len = 0; -} - -void stringsink_uninit(stringsink *sink) { - free(sink->ptr); -} /* msgvisitor *****************************************************************/ @@ -914,13 +905,9 @@ void stringsink_uninit(stringsink *sink) { // semantics, which means that we have true field presence, we will want to // modify msgvisitor so that it emits all present fields rather than all // non-default-value fields. -// -// Likewise, when implementing JSON serialization, we may need to have a -// 'verbose' mode that outputs all fields and a 'concise' mode that outputs only -// those with non-default values. static void putmsg(VALUE msg, const Descriptor* desc, - upb_sink *sink, int depth); + upb_sink *sink, int depth, bool emit_defaults); static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { upb_selector_t ret; @@ -952,7 +939,7 @@ static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) { } static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink, - int depth) { + int depth, bool emit_defaults) { upb_sink subsink; VALUE descriptor; Descriptor* subdesc; @@ -963,12 +950,12 @@ static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink, subdesc = ruby_to_Descriptor(descriptor); upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink); - putmsg(submsg, subdesc, &subsink, depth + 1); + putmsg(submsg, subdesc, &subsink, depth + 1, emit_defaults); upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG)); } static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink, - int depth) { + int depth, bool emit_defaults) { upb_sink subsink; upb_fieldtype_t type = upb_fielddef_type(f); upb_selector_t sel = 0; @@ -1005,7 +992,7 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink, putstr(*((VALUE *)memory), f, &subsink); break; case UPB_TYPE_MESSAGE: - putsubmsg(*((VALUE *)memory), f, &subsink, depth); + putsubmsg(*((VALUE *)memory), f, &subsink, depth, emit_defaults); break; #undef T @@ -1019,7 +1006,8 @@ static void put_ruby_value(VALUE value, const upb_fielddef *f, VALUE type_class, int depth, - upb_sink *sink) { + upb_sink *sink, + bool emit_defaults) { upb_selector_t sel = 0; if (upb_fielddef_isprimitive(f)) { sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); @@ -1059,12 +1047,12 @@ static void put_ruby_value(VALUE value, putstr(value, f, sink); break; case UPB_TYPE_MESSAGE: - putsubmsg(value, f, sink, depth); + putsubmsg(value, f, sink, depth, emit_defaults); } } static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink, - int depth) { + int depth, bool emit_defaults) { Map* self; upb_sink subsink; const upb_fielddef* key_field; @@ -1090,9 +1078,9 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink, &entry_sink); upb_sink_startmsg(&entry_sink); - put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink); + put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink, emit_defaults); put_ruby_value(value, value_field, self->value_type_class, depth + 1, - &entry_sink); + &entry_sink, emit_defaults); upb_sink_endmsg(&entry_sink, &status); upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG)); @@ -1102,7 +1090,7 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink, } static void putmsg(VALUE msg_rb, const Descriptor* desc, - upb_sink *sink, int depth) { + upb_sink *sink, int depth, bool emit_defaults) { MessageHeader* msg; upb_msg_field_iter i; upb_status status; @@ -1144,31 +1132,31 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, if (is_map_field(f)) { VALUE map = DEREF(msg, offset, VALUE); - if (map != Qnil) { - putmap(map, f, sink, depth); + if (map != Qnil || emit_defaults) { + putmap(map, f, sink, depth, emit_defaults); } } else if (upb_fielddef_isseq(f)) { VALUE ary = DEREF(msg, offset, VALUE); if (ary != Qnil) { - putary(ary, f, sink, depth); + putary(ary, f, sink, depth, emit_defaults); } } else if (upb_fielddef_isstring(f)) { VALUE str = DEREF(msg, offset, VALUE); - if (is_matching_oneof || RSTRING_LEN(str) > 0) { + if (is_matching_oneof || emit_defaults || RSTRING_LEN(str) > 0) { putstr(str, f, sink); } } else if (upb_fielddef_issubmsg(f)) { - putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth); + putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth, emit_defaults); } else { upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); -#define T(upbtypeconst, upbtype, ctype, default_value) \ - case upbtypeconst: { \ - ctype value = DEREF(msg, offset, ctype); \ - if (is_matching_oneof || value != default_value) { \ - upb_sink_put##upbtype(sink, sel, value); \ - } \ - } \ +#define T(upbtypeconst, upbtype, ctype, default_value) \ + case upbtypeconst: { \ + ctype value = DEREF(msg, offset, ctype); \ + if (is_matching_oneof || emit_defaults || value != default_value) { \ + upb_sink_put##upbtype(sink, sel, value); \ + } \ + } \ break; switch (upb_fielddef_type(f)) { @@ -1191,6 +1179,11 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, } } + stringsink* unknown = msg->unknown_fields; + if (unknown != NULL) { + upb_sink_putunknown(sink, unknown->ptr, unknown->len); + } + upb_sink_endmsg(sink, &status); } @@ -1246,7 +1239,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) { stackenv_init(&se, "Error occurred during encoding: %s"); encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink); - putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0); + putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false); ret = rb_str_new(sink.ptr, sink.len); @@ -1268,6 +1261,7 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { Descriptor* desc = ruby_to_Descriptor(descriptor); VALUE msg_rb; VALUE preserve_proto_fieldnames = Qfalse; + VALUE emit_defaults = Qfalse; stringsink sink; if (argc < 1 || argc > 2) { @@ -1283,6 +1277,9 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { } preserve_proto_fieldnames = rb_hash_lookup2( hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse); + + emit_defaults = rb_hash_lookup2( + hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse); } stringsink_init(&sink); @@ -1297,7 +1294,7 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { stackenv_init(&se, "Error occurred during encoding: %s"); printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink); - putmsg(msg_rb, desc, upb_json_printer_input(printer), 0); + putmsg(msg_rb, desc, upb_json_printer_input(printer), 0, RTEST(emit_defaults)); ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding()); @@ -1308,3 +1305,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { } } +static void discard_unknown(VALUE msg_rb, const Descriptor* desc) { + MessageHeader* msg; + upb_msg_field_iter it; + + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + + stringsink* unknown = msg->unknown_fields; + if (unknown != NULL) { + stringsink_uninit(unknown); + msg->unknown_fields = NULL; + } + + for (upb_msg_field_begin(&it, desc->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + upb_fielddef *f = upb_msg_iter_field(&it); + uint32_t offset = + desc->layout->fields[upb_fielddef_index(f)].offset + + sizeof(MessageHeader); + + if (upb_fielddef_containingoneof(f)) { + uint32_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + + sizeof(MessageHeader); + // For a oneof, check that this field is actually present -- skip all the + // below if not. + if (DEREF(msg, oneof_case_offset, uint32_t) != + upb_fielddef_number(f)) { + continue; + } + // Otherwise, fall through to the appropriate singular-field handler + // below. + } + + if (!upb_fielddef_issubmsg(f)) { + continue; + } + + if (is_map_field(f)) { + if (!upb_fielddef_issubmsg(map_field_value(f))) continue; + VALUE map = DEREF(msg, offset, VALUE); + if (map == Qnil) continue; + Map_iter map_it; + for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) { + VALUE submsg = Map_iter_value(&map_it); + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } else if (upb_fielddef_isseq(f)) { + VALUE ary = DEREF(msg, offset, VALUE); + if (ary == Qnil) continue; + int size = NUM2INT(RepeatedField_length(ary)); + for (int i = 0; i < size; i++) { + void* memory = RepeatedField_index_native(ary, i); + VALUE submsg = *((VALUE *)memory); + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } else { + VALUE submsg = DEREF(msg, offset, VALUE); + if (submsg == Qnil) continue; + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } +} + +/* + * call-seq: + * Google::Protobuf.discard_unknown(msg) + * + * Discard unknown fields in the given message object and recursively discard + * unknown fields in submessages. + */ +VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) { + VALUE klass = CLASS_OF(msg_rb); + VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); + Descriptor* desc = ruby_to_Descriptor(descriptor); + if (klass == cRepeatedField || klass == cMap) { + rb_raise(rb_eArgError, "Expected proto msg for discard unknown."); + } else { + discard_unknown(msg_rb, desc); + } + return Qnil; +} |