aboutsummaryrefslogtreecommitdiff
path: root/ruby/ext/google/protobuf_c/encode_decode.c
diff options
context:
space:
mode:
Diffstat (limited to 'ruby/ext/google/protobuf_c/encode_decode.c')
-rw-r--r--ruby/ext/google/protobuf_c/encode_decode.c299
1 files changed, 192 insertions, 107 deletions
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c
index d86a1145..12080d03 100644
--- a/ruby/ext/google/protobuf_c/encode_decode.c
+++ b/ruby/ext/google/protobuf_c/encode_decode.c
@@ -44,6 +44,56 @@ VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
return rb_str;
}
+// The code below also comes from upb's prototype Ruby binding, developed by
+// haberman@.
+
+/* stringsink *****************************************************************/
+
+static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
+ stringsink *sink = _sink;
+ sink->len = 0;
+ return sink;
+}
+
+static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
+ size_t len, const upb_bufhandle *handle) {
+ stringsink *sink = _sink;
+ size_t new_size = sink->size;
+
+ UPB_UNUSED(hd);
+ UPB_UNUSED(handle);
+
+ while (sink->len + len > new_size) {
+ new_size *= 2;
+ }
+
+ if (new_size != sink->size) {
+ sink->ptr = realloc(sink->ptr, new_size);
+ sink->size = new_size;
+ }
+
+ memcpy(sink->ptr + sink->len, ptr, len);
+ sink->len += len;
+
+ return len;
+}
+
+void stringsink_init(stringsink *sink) {
+ upb_byteshandler_init(&sink->handler);
+ upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
+ upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
+
+ upb_bytessink_reset(&sink->sink, &sink->handler, sink);
+
+ sink->size = 32;
+ sink->ptr = malloc(sink->size);
+ sink->len = 0;
+}
+
+void stringsink_uninit(stringsink *sink) {
+ free(sink->ptr);
+}
+
// -----------------------------------------------------------------------------
// Parsing.
// -----------------------------------------------------------------------------
@@ -280,11 +330,6 @@ rb_data_type_t MapParseFrame_type = {
{ MapParseFrame_mark, MapParseFrame_free, NULL },
};
-// Array of Ruby objects wrapping map_parse_frame_t.
-// We don't allow multiple concurrent decodes, so we assume that this global
-// variable is specific to the "current" decode.
-VALUE map_parse_frames;
-
static map_parse_frame_t* map_push_frame(VALUE map,
const map_handlerdata_t* handlerdata) {
map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
@@ -293,16 +338,12 @@ static map_parse_frame_t* map_push_frame(VALUE map,
native_slot_init(handlerdata->key_field_type, &frame->key_storage);
native_slot_init(handlerdata->value_field_type, &frame->value_storage);
- rb_ary_push(map_parse_frames,
+ Map_set_frame(map,
TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));
return frame;
}
-static void map_pop_frame() {
- rb_ary_pop(map_parse_frames);
-}
-
// Handler to begin a map entry: allocates a temporary frame. This is the
// 'startsubmsg' handler on the msgdef that contains the map field.
static void *startmapentry_handler(void *closure, const void *hd) {
@@ -336,7 +377,7 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
&frame->value_storage);
Map_index_set(frame->map, key, value);
- map_pop_frame();
+ Map_set_frame(frame->map, Qnil);
return true;
}
@@ -622,6 +663,20 @@ static void add_handlers_for_oneof_field(upb_handlers *h,
upb_handlerattr_uninit(&attr);
}
+static bool unknown_field_handler(void* closure, const void* hd,
+ const char* buf, size_t size) {
+ UPB_UNUSED(hd);
+
+ MessageHeader* msg = (MessageHeader*)closure;
+ if (msg->unknown_fields == NULL) {
+ msg->unknown_fields = malloc(sizeof(stringsink));
+ stringsink_init(msg->unknown_fields);
+ }
+
+ stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);
+
+ return true;
+}
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
const upb_msgdef* msgdef = upb_handlers_msgdef(h);
@@ -643,6 +698,9 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
desc->layout = create_layout(desc->msgdef);
}
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlers_setunknown(h, unknown_field_handler, &attr);
+
for (upb_msg_field_begin(&i, desc->msgdef);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
@@ -775,10 +833,6 @@ VALUE Message_decode(VALUE klass, VALUE data) {
msg_rb = rb_class_new_instance(0, NULL, msgklass);
TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
- // We generally expect this to be clear already, but clear it in case parsing
- // previously got interrupted somehow.
- rb_ary_clear(map_parse_frames);
-
{
const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
@@ -823,10 +877,6 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
msg_rb = rb_class_new_instance(0, NULL, msgklass);
TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
- // We generally expect this to be clear already, but clear it in case parsing
- // previously got interrupted somehow.
- rb_ary_clear(map_parse_frames);
-
{
const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
stackenv se;
@@ -848,65 +898,6 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
// -----------------------------------------------------------------------------
// Serializing.
// -----------------------------------------------------------------------------
-//
-// The code below also comes from upb's prototype Ruby binding, developed by
-// haberman@.
-
-/* stringsink *****************************************************************/
-
-// This should probably be factored into a common upb component.
-
-typedef struct {
- upb_byteshandler handler;
- upb_bytessink sink;
- char *ptr;
- size_t len, size;
-} stringsink;
-
-static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
- stringsink *sink = _sink;
- sink->len = 0;
- return sink;
-}
-
-static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
- size_t len, const upb_bufhandle *handle) {
- stringsink *sink = _sink;
- size_t new_size = sink->size;
-
- UPB_UNUSED(hd);
- UPB_UNUSED(handle);
-
- while (sink->len + len > new_size) {
- new_size *= 2;
- }
-
- if (new_size != sink->size) {
- sink->ptr = realloc(sink->ptr, new_size);
- sink->size = new_size;
- }
-
- memcpy(sink->ptr + sink->len, ptr, len);
- sink->len += len;
-
- return len;
-}
-
-void stringsink_init(stringsink *sink) {
- upb_byteshandler_init(&sink->handler);
- upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
- upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
-
- upb_bytessink_reset(&sink->sink, &sink->handler, sink);
-
- sink->size = 32;
- sink->ptr = malloc(sink->size);
- sink->len = 0;
-}
-
-void stringsink_uninit(stringsink *sink) {
- free(sink->ptr);
-}
/* msgvisitor *****************************************************************/
@@ -914,13 +905,9 @@ void stringsink_uninit(stringsink *sink) {
// semantics, which means that we have true field presence, we will want to
// modify msgvisitor so that it emits all present fields rather than all
// non-default-value fields.
-//
-// Likewise, when implementing JSON serialization, we may need to have a
-// 'verbose' mode that outputs all fields and a 'concise' mode that outputs only
-// those with non-default values.
static void putmsg(VALUE msg, const Descriptor* desc,
- upb_sink *sink, int depth);
+ upb_sink *sink, int depth, bool emit_defaults);
static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
upb_selector_t ret;
@@ -952,7 +939,7 @@ static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
}
static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
- int depth) {
+ int depth, bool emit_defaults) {
upb_sink subsink;
VALUE descriptor;
Descriptor* subdesc;
@@ -963,12 +950,12 @@ static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
subdesc = ruby_to_Descriptor(descriptor);
upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
- putmsg(submsg, subdesc, &subsink, depth + 1);
+ putmsg(submsg, subdesc, &subsink, depth + 1, emit_defaults);
upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
}
static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
- int depth) {
+ int depth, bool emit_defaults) {
upb_sink subsink;
upb_fieldtype_t type = upb_fielddef_type(f);
upb_selector_t sel = 0;
@@ -1005,7 +992,7 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
putstr(*((VALUE *)memory), f, &subsink);
break;
case UPB_TYPE_MESSAGE:
- putsubmsg(*((VALUE *)memory), f, &subsink, depth);
+ putsubmsg(*((VALUE *)memory), f, &subsink, depth, emit_defaults);
break;
#undef T
@@ -1019,7 +1006,8 @@ static void put_ruby_value(VALUE value,
const upb_fielddef *f,
VALUE type_class,
int depth,
- upb_sink *sink) {
+ upb_sink *sink,
+ bool emit_defaults) {
upb_selector_t sel = 0;
if (upb_fielddef_isprimitive(f)) {
sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
@@ -1059,12 +1047,12 @@ static void put_ruby_value(VALUE value,
putstr(value, f, sink);
break;
case UPB_TYPE_MESSAGE:
- putsubmsg(value, f, sink, depth);
+ putsubmsg(value, f, sink, depth, emit_defaults);
}
}
static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
- int depth) {
+ int depth, bool emit_defaults) {
Map* self;
upb_sink subsink;
const upb_fielddef* key_field;
@@ -1090,9 +1078,9 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
&entry_sink);
upb_sink_startmsg(&entry_sink);
- put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
+ put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink, emit_defaults);
put_ruby_value(value, value_field, self->value_type_class, depth + 1,
- &entry_sink);
+ &entry_sink, emit_defaults);
upb_sink_endmsg(&entry_sink, &status);
upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
@@ -1102,7 +1090,7 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
}
static void putmsg(VALUE msg_rb, const Descriptor* desc,
- upb_sink *sink, int depth) {
+ upb_sink *sink, int depth, bool emit_defaults) {
MessageHeader* msg;
upb_msg_field_iter i;
upb_status status;
@@ -1144,31 +1132,31 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
if (is_map_field(f)) {
VALUE map = DEREF(msg, offset, VALUE);
- if (map != Qnil) {
- putmap(map, f, sink, depth);
+ if (map != Qnil || emit_defaults) {
+ putmap(map, f, sink, depth, emit_defaults);
}
} else if (upb_fielddef_isseq(f)) {
VALUE ary = DEREF(msg, offset, VALUE);
if (ary != Qnil) {
- putary(ary, f, sink, depth);
+ putary(ary, f, sink, depth, emit_defaults);
}
} else if (upb_fielddef_isstring(f)) {
VALUE str = DEREF(msg, offset, VALUE);
- if (is_matching_oneof || RSTRING_LEN(str) > 0) {
+ if (is_matching_oneof || emit_defaults || RSTRING_LEN(str) > 0) {
putstr(str, f, sink);
}
} else if (upb_fielddef_issubmsg(f)) {
- putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
+ putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth, emit_defaults);
} else {
upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
-#define T(upbtypeconst, upbtype, ctype, default_value) \
- case upbtypeconst: { \
- ctype value = DEREF(msg, offset, ctype); \
- if (is_matching_oneof || value != default_value) { \
- upb_sink_put##upbtype(sink, sel, value); \
- } \
- } \
+#define T(upbtypeconst, upbtype, ctype, default_value) \
+ case upbtypeconst: { \
+ ctype value = DEREF(msg, offset, ctype); \
+ if (is_matching_oneof || emit_defaults || value != default_value) { \
+ upb_sink_put##upbtype(sink, sel, value); \
+ } \
+ } \
break;
switch (upb_fielddef_type(f)) {
@@ -1191,6 +1179,11 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
}
}
+ stringsink* unknown = msg->unknown_fields;
+ if (unknown != NULL) {
+ upb_sink_putunknown(sink, unknown->ptr, unknown->len);
+ }
+
upb_sink_endmsg(sink, &status);
}
@@ -1246,7 +1239,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) {
stackenv_init(&se, "Error occurred during encoding: %s");
encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
- putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0);
+ putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false);
ret = rb_str_new(sink.ptr, sink.len);
@@ -1268,6 +1261,7 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
Descriptor* desc = ruby_to_Descriptor(descriptor);
VALUE msg_rb;
VALUE preserve_proto_fieldnames = Qfalse;
+ VALUE emit_defaults = Qfalse;
stringsink sink;
if (argc < 1 || argc > 2) {
@@ -1283,6 +1277,9 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
}
preserve_proto_fieldnames = rb_hash_lookup2(
hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
+
+ emit_defaults = rb_hash_lookup2(
+ hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
}
stringsink_init(&sink);
@@ -1297,7 +1294,7 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
stackenv_init(&se, "Error occurred during encoding: %s");
printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
- putmsg(msg_rb, desc, upb_json_printer_input(printer), 0);
+ putmsg(msg_rb, desc, upb_json_printer_input(printer), 0, RTEST(emit_defaults));
ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
@@ -1308,3 +1305,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
}
}
+static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
+ MessageHeader* msg;
+ upb_msg_field_iter it;
+
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
+
+ stringsink* unknown = msg->unknown_fields;
+ if (unknown != NULL) {
+ stringsink_uninit(unknown);
+ msg->unknown_fields = NULL;
+ }
+
+ for (upb_msg_field_begin(&it, desc->msgdef);
+ !upb_msg_field_done(&it);
+ upb_msg_field_next(&it)) {
+ upb_fielddef *f = upb_msg_iter_field(&it);
+ uint32_t offset =
+ desc->layout->fields[upb_fielddef_index(f)].offset +
+ sizeof(MessageHeader);
+
+ if (upb_fielddef_containingoneof(f)) {
+ uint32_t oneof_case_offset =
+ desc->layout->fields[upb_fielddef_index(f)].case_offset +
+ sizeof(MessageHeader);
+ // For a oneof, check that this field is actually present -- skip all the
+ // below if not.
+ if (DEREF(msg, oneof_case_offset, uint32_t) !=
+ upb_fielddef_number(f)) {
+ continue;
+ }
+ // Otherwise, fall through to the appropriate singular-field handler
+ // below.
+ }
+
+ if (!upb_fielddef_issubmsg(f)) {
+ continue;
+ }
+
+ if (is_map_field(f)) {
+ if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
+ VALUE map = DEREF(msg, offset, VALUE);
+ if (map == Qnil) continue;
+ Map_iter map_it;
+ for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
+ VALUE submsg = Map_iter_value(&map_it);
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ } else if (upb_fielddef_isseq(f)) {
+ VALUE ary = DEREF(msg, offset, VALUE);
+ if (ary == Qnil) continue;
+ int size = NUM2INT(RepeatedField_length(ary));
+ for (int i = 0; i < size; i++) {
+ void* memory = RepeatedField_index_native(ary, i);
+ VALUE submsg = *((VALUE *)memory);
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ } else {
+ VALUE submsg = DEREF(msg, offset, VALUE);
+ if (submsg == Qnil) continue;
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ }
+}
+
+/*
+ * call-seq:
+ * Google::Protobuf.discard_unknown(msg)
+ *
+ * Discard unknown fields in the given message object and recursively discard
+ * unknown fields in submessages.
+ */
+VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
+ VALUE klass = CLASS_OF(msg_rb);
+ VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
+ Descriptor* desc = ruby_to_Descriptor(descriptor);
+ if (klass == cRepeatedField || klass == cMap) {
+ rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
+ } else {
+ discard_unknown(msg_rb, desc);
+ }
+ return Qnil;
+}