From d326277397e345b5bda4a8afbd0a9d54f01b9a06 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 14 May 2015 18:24:26 -0700 Subject: Update MRI C Ruby extension to use new version of upb. - Alter encode/decode paths to use the `upb_env` (environment) abstraction. - Update upb amalgamation to upstream `93791bfe`. - Fix a compilation warning (void*->char* cast). - Modify build flags so that upb doesn't produce warnings -- the Travis build logs were pretty cluttered previously. --- ruby/ext/google/protobuf_c/encode_decode.c | 99 ++- ruby/ext/google/protobuf_c/extconf.rb | 4 +- ruby/ext/google/protobuf_c/message.c | 2 +- ruby/ext/google/protobuf_c/protobuf.h | 4 + ruby/ext/google/protobuf_c/upb.c | 1070 +++++++++++++++++++++------- ruby/ext/google/protobuf_c/upb.h | 978 +++++++++++++------------ 6 files changed, 1408 insertions(+), 749 deletions(-) (limited to 'ruby') diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 5730504d..ba555048 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -622,6 +622,48 @@ static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) { return desc->fill_method; } + +// Stack-allocated context during an encode/decode operation. Contains the upb +// environment and its stack-based allocator, an initial buffer for allocations +// to avoid malloc() when possible, and a template for Ruby exception messages +// if any error occurs. +#define STACK_ENV_STACKBYTES 4096 +typedef struct { + upb_env env; + upb_seededalloc alloc; + const char* ruby_error_template; + char allocbuf[STACK_ENV_STACKBYTES]; +} stackenv; + +static void stackenv_init(stackenv* se, const char* errmsg); +static void stackenv_uninit(stackenv* se); + +// Callback invoked by upb if any error occurs during parsing or serialization. +static bool env_error_func(void* ud, const upb_status* status) { + stackenv* se = ud; + // Free the env -- rb_raise will longjmp up the stack past the encode/decode + // function so it would not otherwise have been freed. + stackenv_uninit(se); + rb_raise(rb_eRuntimeError, se->ruby_error_template, upb_status_errmsg(status)); + // Never reached: rb_raise() always longjmp()s up the stack, past all of our + // code, back to Ruby. + return false; +} + +static void stackenv_init(stackenv* se, const char* errmsg) { + se->ruby_error_template = errmsg; + upb_env_init(&se->env); + upb_seededalloc_init(&se->alloc, &se->allocbuf, STACK_ENV_STACKBYTES); + upb_env_setallocfunc( + &se->env, upb_seededalloc_getallocfunc(&se->alloc), &se->alloc); + upb_env_seterrorfunc(&se->env, env_error_func, se); +} + +static void stackenv_uninit(stackenv* se) { + upb_env_uninit(&se->env); + upb_seededalloc_uninit(&se->alloc); +} + /* * call-seq: * MessageClass.decode(data) => message @@ -645,21 +687,17 @@ VALUE Message_decode(VALUE klass, VALUE data) { const upb_pbdecodermethod* method = msgdef_decodermethod(desc); const upb_handlers* h = upb_pbdecodermethod_desthandlers(method); - upb_pbdecoder decoder; - upb_sink sink; - upb_status status = UPB_STATUS_INIT; + stackenv se; + stackenv_init(&se, "Error occurred during parsing: %s"); - upb_pbdecoder_init(&decoder, method, &status); + upb_sink sink; upb_sink_reset(&sink, h, msg); - upb_pbdecoder_resetoutput(&decoder, &sink); + upb_pbdecoder* decoder = + upb_pbdecoder_create(&se.env, method, &sink); upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), - upb_pbdecoder_input(&decoder)); + upb_pbdecoder_input(decoder)); - upb_pbdecoder_uninit(&decoder); - if (!upb_ok(&status)) { - rb_raise(rb_eRuntimeError, "Error occurred during parsing: %s.", - upb_status_errmsg(&status)); - } + stackenv_uninit(&se); return msg_rb; } @@ -688,21 +726,16 @@ VALUE Message_decode_json(VALUE klass, VALUE data) { MessageHeader* msg; TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - upb_status status = UPB_STATUS_INIT; - upb_json_parser parser; - upb_json_parser_init(&parser, &status); + stackenv se; + stackenv_init(&se, "Error occurred during parsing: %s"); upb_sink sink; upb_sink_reset(&sink, get_fill_handlers(desc), msg); - upb_json_parser_resetoutput(&parser, &sink); + upb_json_parser* parser = upb_json_parser_create(&se.env, &sink); upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), - upb_json_parser_input(&parser)); + upb_json_parser_input(parser)); - upb_json_parser_uninit(&parser); - if (!upb_ok(&status)) { - rb_raise(rb_eRuntimeError, "Error occurred during parsing: %s.", - upb_status_errmsg(&status)); - } + stackenv_uninit(&se); return msg_rb; } @@ -956,7 +989,7 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, // Protect against cycles (possible because users may freely reassign message // and repeated fields) by imposing a maximum recursion depth. - if (depth > UPB_SINK_MAX_NESTING) { + if (depth > ENCODE_MAX_NESTING) { rb_raise(rb_eRuntimeError, "Maximum recursion depth exceeded during encoding."); } @@ -1074,15 +1107,16 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb) { const upb_handlers* serialize_handlers = msgdef_pb_serialize_handlers(desc); - upb_pb_encoder encoder; - upb_pb_encoder_init(&encoder, serialize_handlers); - upb_pb_encoder_resetoutput(&encoder, &sink.sink); + stackenv se; + stackenv_init(&se, "Error occurred during encoding: %s"); + upb_pb_encoder* encoder = + upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink); - putmsg(msg_rb, desc, upb_pb_encoder_input(&encoder), 0); + putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0); VALUE ret = rb_str_new(sink.ptr, sink.len); - upb_pb_encoder_uninit(&encoder); + stackenv_uninit(&se); stringsink_uninit(&sink); return ret; @@ -1104,15 +1138,16 @@ VALUE Message_encode_json(VALUE klass, VALUE msg_rb) { const upb_handlers* serialize_handlers = msgdef_json_serialize_handlers(desc); - upb_json_printer printer; - upb_json_printer_init(&printer, serialize_handlers); - upb_json_printer_resetoutput(&printer, &sink.sink); + stackenv se; + stackenv_init(&se, "Error occurred during encoding: %s"); + upb_json_printer* printer = + upb_json_printer_create(&se.env, serialize_handlers, &sink.sink); - putmsg(msg_rb, desc, upb_json_printer_input(&printer), 0); + putmsg(msg_rb, desc, upb_json_printer_input(printer), 0); VALUE ret = rb_str_new(sink.ptr, sink.len); - upb_json_printer_uninit(&printer); + stackenv_uninit(&se); stringsink_uninit(&sink); return ret; diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb index 8d60392c..74203b07 100644 --- a/ruby/ext/google/protobuf_c/extconf.rb +++ b/ruby/ext/google/protobuf_c/extconf.rb @@ -2,7 +2,9 @@ require 'mkmf' -$CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG " +$CFLAGS += " -O3 -std=c99 -Wno-unused-function " + + "-Wno-declaration-after-statement -Wno-unused-variable " + + "-Wno-sign-compare -DNDEBUG " $objs = ["protobuf.o", "defs.o", "storage.o", "message.o", "repeated_field.o", "map.o", "encode_decode.o", "upb.o"] diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index 7e58a617..ee5f23a5 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -86,7 +86,7 @@ static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) { size_t case_ofs = self->descriptor->layout-> fields[upb_fielddef_index(first_field)].case_offset; - uint32_t oneof_case = *((uint32_t*)(Message_data(self) + case_ofs)); + uint32_t oneof_case = *((uint32_t*)((char*)Message_data(self) + case_ofs)); if (oneof_case == ONEOF_CASE_NONE) { return Qnil; diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 985b7f3d..400909fe 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -511,6 +511,10 @@ VALUE enum_resolve(VALUE self, VALUE sym); const upb_pbdecodermethod *new_fillmsg_decodermethod( Descriptor* descriptor, const void *owner); +// Maximum depth allowed during encoding, to avoid stack overflows due to +// cycles. +#define ENCODE_MAX_NESTING 63 + // ----------------------------------------------------------------------------- // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor // instances. diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index 20bd76bc..55a99cfe 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -1779,6 +1779,275 @@ upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + + +#include +#include +#include + +typedef struct cleanup_ent { + upb_cleanup_func *cleanup; + void *ud; + struct cleanup_ent *next; +} cleanup_ent; + +static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size); + +/* Default allocator **********************************************************/ + +// Just use realloc, keeping all allocated blocks in a linked list to destroy at +// the end. + +typedef struct mem_block { + // List is doubly-linked, because in cases where realloc() moves an existing + // block, we need to be able to remove the old pointer from the list + // efficiently. + struct mem_block *prev, *next; +#ifndef NDEBUG + size_t size; // Doesn't include mem_block structure. +#endif + char data[]; +} mem_block; + +typedef struct { + mem_block *head; +} default_alloc_ud; + +static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) { + UPB_UNUSED(oldsize); + default_alloc_ud *ud = _ud; + + mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL; + +#ifndef NDEBUG + if (from) { + assert(oldsize <= from->size); + } +#endif + + mem_block *block = realloc(from, size + sizeof(mem_block)); + if (!block) return NULL; + +#ifndef NDEBUG + block->size = size; +#endif + + if (from) { + if (block != from) { + // The block was moved, so pointers in next and prev blocks must be + // updated to its new location. + if (block->next) block->next->prev = block; + if (block->prev) block->prev->next = block; + } + } else { + // Insert at head of linked list. + block->prev = NULL; + block->next = ud->head; + if (block->next) block->next->prev = block; + ud->head = block; + } + + return &block->data; +} + +static void default_alloc_cleanup(void *_ud) { + default_alloc_ud *ud = _ud; + mem_block *block = ud->head; + + while (block) { + void *to_free = block; + block = block->next; + free(to_free); + } +} + + +/* Standard error functions ***************************************************/ + +static bool default_err(void *ud, const upb_status *status) { + UPB_UNUSED(ud); + fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status)); + return false; +} + +static bool write_err_to(void *ud, const upb_status *status) { + upb_status *copy_to = ud; + upb_status_copy(copy_to, status); + return false; +} + + +/* upb_env ********************************************************************/ + +void upb_env_init(upb_env *e) { + e->ok_ = true; + e->bytes_allocated = 0; + e->cleanup_head = NULL; + + default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud; + ud->head = NULL; + + // Set default functions. + upb_env_setallocfunc(e, default_alloc, ud); + upb_env_seterrorfunc(e, default_err, NULL); +} + +void upb_env_uninit(upb_env *e) { + cleanup_ent *ent = e->cleanup_head; + + while (ent) { + ent->cleanup(ent->ud); + ent = ent->next; + } + + // Must do this after running cleanup functions, because this will delete + // the memory we store our cleanup entries in! + if (e->alloc == default_alloc) { + default_alloc_cleanup(e->alloc_ud); + } +} + +UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc, + void *ud) { + e->alloc = alloc; + e->alloc_ud = ud; +} + +UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, + void *ud) { + e->err = func; + e->err_ud = ud; +} + +void upb_env_reporterrorsto(upb_env *e, upb_status *status) { + e->err = write_err_to; + e->err_ud = status; +} + +bool upb_env_ok(const upb_env *e) { + return e->ok_; +} + +bool upb_env_reporterror(upb_env *e, const upb_status *status) { + e->ok_ = false; + return e->err(e->err_ud, status); +} + +bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) { + cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent)); + if (!ent) return false; + + ent->cleanup = func; + ent->ud = ud; + ent->next = e->cleanup_head; + e->cleanup_head = ent; + + return true; +} + +void *upb_env_malloc(upb_env *e, size_t size) { + e->bytes_allocated += size; + if (e->alloc == seeded_alloc) { + // This is equivalent to the next branch, but allows inlining for a + // measurable perf benefit. + return seeded_alloc(e->alloc_ud, NULL, 0, size); + } else { + return e->alloc(e->alloc_ud, NULL, 0, size); + } +} + +void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) { + assert(oldsize <= size); + char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size); + +#ifndef NDEBUG + // Overwrite non-preserved memory to ensure callers are passing the oldsize + // that they truly require. + memset(ret + oldsize, 0xff, size - oldsize); +#endif + + return ret; +} + +size_t upb_env_bytesallocated(const upb_env *e) { + return e->bytes_allocated; +} + + +/* upb_seededalloc ************************************************************/ + +// Be conservative and choose 16 in case anyone is using SSE. +static const size_t maxalign = 16; + +static size_t align_up(size_t size) { + return ((size + maxalign - 1) / maxalign) * maxalign; +} + +UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, + size_t size) { + UPB_UNUSED(ptr); + + upb_seededalloc *a = ud; + size = align_up(size); + + assert(a->mem_limit >= a->mem_ptr); + + if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) { + // Fast path: we can satisfy from the initial allocation. + void *ret = a->mem_ptr; + a->mem_ptr += size; + return ret; + } else { + // Slow path: fallback to other allocator. + a->need_cleanup = true; + // Is `ptr` part of the user-provided initial block? Don't pass it to the + // default allocator if so; otherwise, it may try to realloc() the block. + char *chptr = ptr; + if (chptr >= a->mem_base && chptr < a->mem_limit) { + return a->alloc(a->alloc_ud, NULL, 0, size); + } else { + return a->alloc(a->alloc_ud, ptr, oldsize, size); + } + } +} + +void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) { + a->mem_base = mem; + a->mem_ptr = mem; + a->mem_limit = (char*)mem + len; + a->need_cleanup = false; + a->returned_allocfunc = false; + + default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud; + ud->head = NULL; + + upb_seededalloc_setfallbackalloc(a, default_alloc, ud); +} + +void upb_seededalloc_uninit(upb_seededalloc *a) { + if (a->alloc == default_alloc && a->need_cleanup) { + default_alloc_cleanup(a->alloc_ud); + } +} + +UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, + upb_alloc_func *alloc, + void *ud) { + assert(!a->returned_allocfunc); + a->alloc = alloc; + a->alloc_ud = ud; +} + +upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) { + a->returned_allocfunc = true; + return seeded_alloc; +} /* * upb - a minimalist implementation of protocol buffers. * @@ -1955,7 +2224,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, if (closure_type && *context_closure_type && closure_type != *context_closure_type) { // TODO(haberman): better message for debugging. - upb_status_seterrmsg(&h->status_, "closure type does not match"); + if (f) { + upb_status_seterrf(&h->status_, + "closure type does not match for field %s", + upb_fielddef_name(f)); + } else { + upb_status_seterrmsg( + &h->status_, "closure type does not match for message-level handler"); + } return false; } @@ -2353,7 +2629,7 @@ bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, *s = f->selector_base; break; } - assert(*s < upb_fielddef_containingtype(f)->selector_count); + assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count); return true; } @@ -4295,7 +4571,7 @@ void upb_inttable_compact(upb_inttable *t) { counts[log2ceil(key)]++; } - int arr_size; + size_t arr_size = 1; int arr_count = upb_inttable_count(t); if (upb_inttable_count(t) >= max_key * MIN_DENSITY) { @@ -5522,6 +5798,54 @@ static upb_inttable reftables[212] = { #include #include +// upb_deflist is an internal-only dynamic array for storing a growing list of +// upb_defs. +typedef struct { + upb_def **defs; + size_t len; + size_t size; + bool owned; +} upb_deflist; + +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). +typedef struct { + char *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. + int start; +} upb_descreader_frame; + +// The maximum number of nested declarations that are allowed, ie. +// message Foo { +// message Bar { +// message Baz { +// } +// } +// } +// +// This is a resource limit that affects how big our runtime stack can grow. +// TODO: make this a runtime-settable property of the Reader instance. +#define UPB_MAX_MESSAGE_NESTING 64 + +struct upb_descreader { + upb_sink sink; + upb_deflist defs; + upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING]; + int stack_len; + + uint32_t number; + char *name; + bool saw_number; + bool saw_name; + + char *default_string; + + upb_fielddef *f; +}; + static char *upb_strndup(const char *buf, size_t n) { char *ret = malloc(n + 1); if (!ret) return NULL; @@ -5601,36 +5925,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { /* upb_descreader ************************************************************/ -void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers, - upb_status *status) { - UPB_UNUSED(status); - upb_deflist_init(&r->defs); - upb_sink_reset(upb_descreader_input(r), handlers, r); - r->stack_len = 0; - r->name = NULL; - r->default_string = NULL; -} - -void upb_descreader_uninit(upb_descreader *r) { - free(r->name); - upb_deflist_uninit(&r->defs); - free(r->default_string); - while (r->stack_len > 0) { - upb_descreader_frame *f = &r->stack[--r->stack_len]; - free(f->name); - } -} - -upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { - *n = r->defs.len; - upb_deflist_donaterefs(&r->defs, owner); - return r->defs.defs; -} - -upb_sink *upb_descreader_input(upb_descreader *r) { - return &r->sink; -} - static upb_msgdef *upb_descreader_top(upb_descreader *r) { assert(r->stack_len > 1); int index = r->stack[r->stack_len-1].start - 1; @@ -5803,7 +6097,7 @@ static bool parse_default(char *str, upb_fielddef *f) { break; } case UPB_TYPE_UINT32: { - long val = strtoul(str, &end, 0); + unsigned long val = strtoul(str, &end, 0); if (val > UINT32_MAX || errno == ERANGE || *end) success = false; else @@ -6070,6 +6364,45 @@ static void reghandlers(const void *closure, upb_handlers *h) { #undef D +void descreader_cleanup(void *_r) { + upb_descreader *r = _r; + free(r->name); + upb_deflist_uninit(&r->defs); + free(r->default_string); + while (r->stack_len > 0) { + upb_descreader_frame *f = &r->stack[--r->stack_len]; + free(f->name); + } +} + + +/* Public API ****************************************************************/ + +upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) { + upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader)); + if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) { + return NULL; + } + + upb_deflist_init(&r->defs); + upb_sink_reset(upb_descreader_input(r), h, r); + r->stack_len = 0; + r->name = NULL; + r->default_string = NULL; + + return r; +} + +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { + *n = r->defs.len; + upb_deflist_donaterefs(&r->defs, owner); + return r->defs.defs; +} + +upb_sink *upb_descreader_input(upb_descreader *r) { + return &r->sink; +} + const upb_handlers *upb_descreader_newhandlers(const void *owner) { const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s); const upb_handlers *h = upb_handlers_newfrozen( @@ -6141,7 +6474,6 @@ mgroup *newgroup(const void *owner) { static void freemethod(upb_refcounted *r) { upb_pbdecodermethod *method = (upb_pbdecodermethod*)r; - upb_byteshandler_uninit(&method->input_handler_); if (method->dest_handlers_) { upb_handlers_unref(method->dest_handlers_, method); @@ -7073,10 +7405,7 @@ void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) { */ #include -#include -#include #include -#include #ifdef UPB_DUMP_BYTECODE #include @@ -7122,18 +7451,17 @@ static bool consumes_input(opcode op) { static bool in_residual_buf(const upb_pbdecoder *d, const char *p); -// It's unfortunate that we have to micro-manage the compiler this way, -// especially since this tuning is necessarily specific to one hardware -// configuration. But emperically on a Core i7, performance increases 30-50% -// with these annotations. Every instance where these appear, gcc 4.2.1 made -// the wrong decision and degraded performance in benchmarks. -#define FORCEINLINE static inline __attribute__((always_inline)) -#define NOINLINE __attribute__((noinline)) +// It's unfortunate that we have to micro-manage the compiler with +// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily +// specific to one hardware configuration. But empirically on a Core i7, +// performance increases 30-50% with these annotations. Every instance where +// these appear, gcc 4.2.1 made the wrong decision and degraded performance in +// benchmarks. static void seterr(upb_pbdecoder *d, const char *msg) { - // TODO(haberman): encapsulate this access to pipeline->status, but not sure - // exactly what that interface should look like. - upb_status_seterrmsg(d->status, msg); + upb_status status = UPB_STATUS_INIT; + upb_status_seterrmsg(&status, msg); + upb_env_reporterror(d->env, &status); } void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { @@ -7176,7 +7504,7 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { // and the parsing stack, so must be called whenever either is updated. static void set_delim_end(upb_pbdecoder *d) { size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; - if (delim_ofs <= (d->end - d->buf)) { + if (delim_ofs <= (size_t)(d->end - d->buf)) { d->delim_end = d->buf + delim_ofs; d->data_end = d->delim_end; } else { @@ -7301,7 +7629,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { // Copies the next "bytes" bytes into "buf" and advances the stream. // Requires that this many bytes are available in the current buffer. -FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, + size_t bytes) { assert(bytes <= curbufleft(d)); memcpy(buf, d->ptr, bytes); advance(d, bytes); @@ -7310,8 +7639,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { // Slow path for getting the next "bytes" bytes, regardless of whether they are // available in the current buffer or not. Returns a status code as described // in decoder.int.h. -static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { +UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { const size_t avail = curbufleft(d); consumebytes(d, buf, avail); bytes -= avail; @@ -7320,7 +7649,7 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, advancetobuf(d, d->buf_param, d->size_param); } if (curbufleft(d) >= bytes) { - consumebytes(d, buf + avail, bytes); + consumebytes(d, (char *)buf + avail, bytes); return DECODE_OK; } else if (d->data_end == d->delim_end) { seterr(d, "Submessage ended in the middle of a value or group"); @@ -7332,7 +7661,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, // Gets the next "bytes" bytes, regardless of whether they are available in the // current buffer or not. Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, + size_t bytes) { if (curbufleft(d) >= bytes) { // Buffer has enough data to satisfy. consumebytes(d, buf, bytes); @@ -7342,19 +7672,20 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { } } -static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { +UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { size_t ret = curbufleft(d); memcpy(buf, d->ptr, ret); if (in_residual_buf(d, d->ptr)) { size_t copy = UPB_MIN(bytes - ret, d->size_param); - memcpy(buf + ret, d->buf_param, copy); + memcpy((char *)buf + ret, d->buf_param, copy); ret += copy; } return ret; } -FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, + size_t bytes) { if (curbufleft(d) >= bytes) { memcpy(buf, d->ptr, bytes); return bytes; @@ -7368,8 +7699,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) { // Slow path for decoding a varint from the current buffer position. // Returns a status code as described in decoder.int.h. -NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, - uint64_t *u64) { +UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, + uint64_t *u64) { *u64 = 0; uint8_t byte = 0x80; int bitpos; @@ -7387,7 +7718,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, // Decodes a varint from the current buffer position. // Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { +UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { *u64 = *d->ptr; advance(d, 1); @@ -7410,7 +7741,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { // Decodes a 32-bit varint from the current buffer position. // Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { +UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { uint64_t u64; int32_t ret = decode_varint(d, &u64); if (ret >= 0) return ret; @@ -7429,14 +7760,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { // Decodes a fixed32 from the current buffer position. // Returns a status code as described in decoder.int.h. // TODO: proper byte swapping for big-endian machines. -FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { +UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { return getbytes(d, u32, 4); } // Decodes a fixed64 from the current buffer position. // Returns a status code as described in decoder.int.h. // TODO: proper byte swapping for big-endian machines. -FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { +UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { return getbytes(d, u64, 8); } @@ -7460,7 +7791,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) { if (end > fr->end_ofs) { seterr(d, "Submessage end extends past enclosing submessage."); return false; - } else if ((fr + 1) == d->limit) { + } else if (fr == d->limit) { seterr(d, kPbDecoderStackOverflow); return false; } @@ -7487,8 +7818,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { // Pops a frame from the decoder stack. static void decoder_pop(upb_pbdecoder *d) { d->top--; } -NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, - uint64_t expected) { +UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, + uint64_t expected) { uint64_t data = 0; size_t bytes = upb_value_size(expected); size_t read = peekbytes(d, &data, bytes); @@ -7640,10 +7971,17 @@ static int32_t dispatch(upb_pbdecoder *d) { if (ret == DECODE_ENDGROUP) { goto_endmsg(d); return DECODE_OK; - } else { - d->pc = d->last - 1; // Rewind to CHECKDELIM. - return ret; + } else if (ret == DECODE_OK) { + // We just consumed some input, so we might now have consumed all the data + // in the delmited region. Since every opcode that can trigger dispatch is + // directly preceded by OP_CHECKDELIM, rewind to it now to re-check the + // delimited end. + d->pc = d->last - 1; + assert(getop(*d->pc) == OP_CHECKDELIM); + return DECODE_OK; } + + return ret; } // Callers know that the stack is more than one deep because the opcodes that @@ -7866,7 +8204,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { upb_pbdecoder *d = closure; UPB_UNUSED(size_hint); + d->top->end_ofs = UINT64_MAX; + d->bufstart_ofs = 0; d->call_len = 1; + d->callstack[0] = &halt; d->pc = pc; return d; } @@ -7875,6 +8216,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) { UPB_UNUSED(hd); UPB_UNUSED(size_hint); upb_pbdecoder *d = closure; + d->top->end_ofs = UINT64_MAX; + d->bufstart_ofs = 0; d->call_len = 0; return d; } @@ -7931,58 +8274,120 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { return true; } -void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m, - upb_status *s) { - d->limit = &d->stack[UPB_DECODER_MAX_NESTING]; - upb_bytessink_reset(&d->input_, &m->input_handler_, d); - d->method_ = m; - d->callstack[0] = &halt; - d->status = s; - upb_pbdecoder_reset(d); -} - void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; - d->top->end_ofs = UINT64_MAX; d->top->groupnum = 0; - d->bufstart_ofs = 0; d->ptr = d->residual; d->buf = d->residual; d->end = d->residual; d->residual_end = d->residual; - d->call_len = 1; } -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { - return offset(d); +static size_t stacksize(upb_pbdecoder *d, size_t entries) { + UPB_UNUSED(d); + return entries * sizeof(upb_pbdecoder_frame); } -// Not currently required, but to support outgrowing the static stack we need -// this. -void upb_pbdecoder_uninit(upb_pbdecoder *d) { +static size_t callstacksize(upb_pbdecoder *d, size_t entries) { UPB_UNUSED(d); -} -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { - return d->method_; +#ifdef UPB_USE_JIT_X64 + if (d->method_->is_native_) { + // Each native stack frame needs two pointers, plus we need a few frames for + // the enter/exit trampolines. + size_t ret = entries * sizeof(void*) * 2; + ret += sizeof(void*) * 10; + return ret; + } +#endif + + return entries * sizeof(uint32_t*); } -bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) { - // TODO(haberman): do we need to test whether the decoder is already on the - // stack (like calling this from within a callback)? Should we support - // rebinding the output at all? +upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m, + upb_sink *sink) { + const size_t default_max_nesting = 64; +#ifndef NDEBUG + size_t size_before = upb_env_bytesallocated(e); +#endif + + upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder)); + if (!d) return NULL; + + d->method_ = m; + d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting)); + d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting)); + if (!d->stack || !d->callstack) { + return NULL; + } + + d->env = e; + d->limit = d->stack + default_max_nesting - 1; + d->stack_size = default_max_nesting; + + upb_pbdecoder_reset(d); + upb_bytessink_reset(&d->input_, &m->input_handler_, d); + assert(sink); if (d->method_->dest_handlers_) { if (sink->handlers != d->method_->dest_handlers_) - return false; + return NULL; } upb_sink_reset(&d->top->sink, sink->handlers, sink->closure); - return true; + + // If this fails, increase the value in decoder.h. + assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE); + return d; +} + +uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { + return offset(d); +} + +const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { + return d->method_; } upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) { return &d->input_; } + +size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { + return d->stack_size; +} + +bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { + assert(d->top >= d->stack); + + if (max < (size_t)(d->top - d->stack)) { + // Can't set a limit smaller than what we are currently at. + return false; + } + + if (max > d->stack_size) { + // Need to reallocate stack and callstack to accommodate. + size_t old_size = stacksize(d, d->stack_size); + size_t new_size = stacksize(d, max); + void *p = upb_env_realloc(d->env, d->stack, old_size, new_size); + if (!p) { + return false; + } + d->stack = p; + + old_size = callstacksize(d, d->stack_size); + new_size = callstacksize(d, max); + p = upb_env_realloc(d->env, d->callstack, old_size, new_size); + if (!p) { + return false; + } + d->callstack = p; + + d->stack_size = max; + } + + d->limit = d->stack + max - 1; + return true; +} /* * upb - a minimalist implementation of protocol buffers. * @@ -8045,6 +8450,68 @@ upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) { #include +// The output buffer is divided into segments; a segment is a string of data +// that is "ready to go" -- it does not need any varint lengths inserted into +// the middle. The seams between segments are where varints will be inserted +// once they are known. +// +// We also use the concept of a "run", which is a range of encoded bytes that +// occur at a single submessage level. Every segment contains one or more runs. +// +// A segment can span messages. Consider: +// +// .--Submessage lengths---------. +// | | | +// | V V +// V | |--------------- | |----------------- +// Submessages: | |----------------------------------------------- +// Top-level msg: ------------------------------------------------------------ +// +// Segments: ----- ------------------- ----------------- +// Runs: *---- *--------------*--- *---------------- +// (* marks the start) +// +// Note that the top-level menssage is not in any segment because it does not +// have any length preceding it. +// +// A segment is only interrupted when another length needs to be inserted. So +// observe how the second segment spans both the inner submessage and part of +// the next enclosing message. +typedef struct { + uint32_t msglen; // The length to varint-encode before this segment. + uint32_t seglen; // Length of the segment. +} upb_pb_encoder_segment; + +struct upb_pb_encoder { + upb_env *env; + + // Our input and output. + upb_sink input_; + upb_bytessink *output_; + + // The "subclosure" -- used as the inner closure as part of the bytessink + // protocol. + void *subc; + + // The output buffer and limit, and our current write position. "buf" + // initially points to "initbuf", but is dynamically allocated if we need to + // grow beyond the initial size. + char *buf, *ptr, *limit; + + // The beginning of the current run, or undefined if we are at the top level. + char *runbegin; + + // The list of segments we are accumulating. + upb_pb_encoder_segment *segbuf, *segptr, *seglimit; + + // The stack of enclosing submessages. Each entry in the stack points to the + // segment where this submessage's length is being accumulated. + int *stack, *top, *stacklimit; + + // Depth of startmsg/endmsg calls. + int depth; +}; + /* low-level buffering ********************************************************/ // Low-level functions for interacting with the output buffer. @@ -8062,25 +8529,23 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) { // Call to ensure that at least "bytes" bytes are available for writing at // e->ptr. Returns false if the bytes could not be allocated. static bool reserve(upb_pb_encoder *e, size_t bytes) { - if ((e->limit - e->ptr) < bytes) { + if ((size_t)(e->limit - e->ptr) < bytes) { + // Grow buffer. size_t needed = bytes + (e->ptr - e->buf); size_t old_size = e->limit - e->buf; + size_t new_size = old_size; + while (new_size < needed) { new_size *= 2; } - char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf; - char *new_buf = realloc(realloc_from, new_size); + char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); if (new_buf == NULL) { return false; } - if (realloc_from == NULL) { - memcpy(new_buf, e->initbuf, old_size); - } - e->ptr = new_buf + (e->ptr - e->buf); e->runbegin = new_buf + (e->runbegin - e->buf); e->limit = new_buf + new_size; @@ -8093,7 +8558,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) { // Call when "bytes" bytes have been writte at e->ptr. The caller *must* have // previously called reserve() with at least this many bytes. static void encoder_advance(upb_pb_encoder *e, size_t bytes) { - assert((e->limit - e->ptr) >= bytes); + assert((size_t)(e->limit - e->ptr) >= bytes); e->ptr += bytes; } @@ -8149,21 +8614,17 @@ static bool start_delim(upb_pb_encoder *e) { } if (++e->segptr == e->seglimit) { - upb_pb_encoder_segment *realloc_from = - (e->segbuf == e->seginitbuf) ? NULL : e->segbuf; + // Grow segment buffer. size_t old_size = (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); size_t new_size = old_size * 2; - upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size); + upb_pb_encoder_segment *new_buf = + upb_env_realloc(e->env, e->segbuf, old_size, new_size); if (new_buf == NULL) { return false; } - if (realloc_from == NULL) { - memcpy(new_buf, e->seginitbuf, old_size); - } - e->segptr = new_buf + (e->segptr - e->segbuf); e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); e->segbuf = new_buf; @@ -8434,6 +8895,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) { } } +void upb_pb_encoder_reset(upb_pb_encoder *e) { + e->segptr = NULL; + e->top = NULL; + e->depth = 0; +} + /* public API *****************************************************************/ @@ -8442,40 +8909,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL); } -#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0])) +upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output) { + const size_t initial_bufsize = 256; + const size_t initial_segbufsize = 16; + // TODO(haberman): make this configurable. + const size_t stack_size = 64; +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif -void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) { - e->output_ = NULL; - e->subc = NULL; - e->buf = e->initbuf; - e->ptr = e->buf; - e->limit = e->buf + ARRAYSIZE(e->initbuf); - e->segbuf = e->seginitbuf; - e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf); - e->stacklimit = e->stack + ARRAYSIZE(e->stack); - upb_sink_reset(&e->input_, h, e); -} + upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder)); + if (!e) return NULL; -void upb_pb_encoder_uninit(upb_pb_encoder *e) { - if (e->buf != e->initbuf) { - free(e->buf); - } + e->buf = upb_env_malloc(env, initial_bufsize); + e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf)); + e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack)); - if (e->segbuf != e->seginitbuf) { - free(e->segbuf); + if (!e->buf || !e->segbuf || !e->stack) { + return NULL; } -} -void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) { + e->limit = e->buf + initial_bufsize; + e->seglimit = e->segbuf + initial_segbufsize; + e->stacklimit = e->stack + stack_size; + upb_pb_encoder_reset(e); + upb_sink_reset(&e->input_, h, e); + + e->env = env; e->output_ = output; e->subc = output->closure; -} + e->ptr = e->buf; -void upb_pb_encoder_reset(upb_pb_encoder *e) { - e->segptr = NULL; - e->top = NULL; - e->depth = 0; + // If this fails, increase the value in encoder.h. + assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE); + return e; } upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; } @@ -8500,26 +8969,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, const upb_pbdecodermethod *decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m); - upb_pbdecoder decoder; - upb_descreader reader; + upb_env env; + upb_env_init(&env); + upb_env_reporterrorsto(&env, status); - upb_pbdecoder_init(&decoder, decoder_m, status); - upb_descreader_init(&reader, reader_h, status); - upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader)); + upb_descreader *reader = upb_descreader_create(&env, reader_h); + upb_pbdecoder *decoder = + upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader)); // Push input data. - bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder)); + bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder)); upb_def **ret = NULL; if (!ok) goto cleanup; - upb_def **defs = upb_descreader_getdefs(&reader, owner, n); + upb_def **defs = upb_descreader_getdefs(reader, owner, n); ret = malloc(sizeof(upb_def*) * (*n)); memcpy(ret, defs, sizeof(upb_def*) * (*n)); cleanup: - upb_pbdecoder_uninit(&decoder); - upb_descreader_uninit(&reader); + upb_env_uninit(&env); upb_handlers_unref(reader_h, &reader_h); upb_pbdecodermethod_unref(decoder_m, &decoder_m); return ret; @@ -8584,6 +9053,14 @@ bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname, #include +struct upb_textprinter { + upb_sink input_; + upb_bytessink *output_; + int indent_depth_; + bool single_line_; + void *subc; +}; + #define CHECK(x) if ((x) < 0) goto err; static const char *shortname(const char *longname) { @@ -8801,24 +9278,6 @@ err: return false; } - -/* Public API *****************************************************************/ - -void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) { - p->single_line_ = false; - p->indent_depth_ = 0; - upb_sink_reset(&p->input_, h, p); -} - -void upb_textprinter_uninit(upb_textprinter *p) { - UPB_UNUSED(p); -} - -void upb_textprinter_reset(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; - p->indent_depth_ = 0; -} - static void onmreg(const void *c, upb_handlers *h) { UPB_UNUSED(c); const upb_msgdef *m = upb_handlers_msgdef(h); @@ -8878,6 +9337,26 @@ static void onmreg(const void *c, upb_handlers *h) { } } +static void textprinter_reset(upb_textprinter *p, bool single_line) { + p->single_line_ = single_line; + p->indent_depth_ = 0; +} + + +/* Public API *****************************************************************/ + +upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output) { + upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter)); + if (!p) return NULL; + + p->output_ = output; + upb_sink_reset(&p->input_, h, p); + textprinter_reset(p, false); + + return p; +} + const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, const void *owner) { return upb_handlers_newfrozen(m, owner, &onmreg, NULL); @@ -8885,11 +9364,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; } -bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) { - p->output_ = output; - return true; -} - void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { p->single_line_ = single_line; } @@ -9052,6 +9526,71 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { #include +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -9254,12 +9793,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -10008,11 +10548,11 @@ static void end_object(upb_json_parser *p) { // final state once, when the closing '"' is seen. -#line 1085 "upb/json/parser.rl" +#line 1151 "upb/json/parser.rl" -#line 997 "upb/json/parser.c" +#line 1063 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -10154,8 +10694,6 @@ static const char _json_trans_actions[] = { }; static const int json_start = 1; -static const int json_first_final = 56; -static const int json_error = 0; static const int json_en_number_machine = 10; static const int json_en_string_machine = 19; @@ -10163,7 +10701,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 1088 "upb/json/parser.rl" +#line 1154 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -10183,7 +10721,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 1168 "upb/json/parser.c" +#line 1232 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -10258,118 +10796,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 1000 "upb/json/parser.rl" +#line 1066 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 1001 "upb/json/parser.rl" +#line 1067 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 1005 "upb/json/parser.rl" +#line 1071 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 1006 "upb/json/parser.rl" +#line 1072 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 1012 "upb/json/parser.rl" +#line 1078 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 1013 "upb/json/parser.rl" +#line 1079 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 1014 "upb/json/parser.rl" +#line 1080 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 1020 "upb/json/parser.rl" +#line 1086 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 1026 "upb/json/parser.rl" +#line 1092 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 1029 "upb/json/parser.rl" +#line 1095 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 1031 "upb/json/parser.rl" +#line 1097 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 1036 "upb/json/parser.rl" +#line 1102 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 1037 "upb/json/parser.rl" +#line 1103 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 1040 "upb/json/parser.rl" +#line 1106 "upb/json/parser.rl" { end_member(parser); } break; case 14: -#line 1046 "upb/json/parser.rl" +#line 1112 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 1049 "upb/json/parser.rl" +#line 1115 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 1055 "upb/json/parser.rl" +#line 1121 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 1059 "upb/json/parser.rl" +#line 1125 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 1064 "upb/json/parser.rl" +#line 1130 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 1065 "upb/json/parser.rl" +#line 1131 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 1067 "upb/json/parser.rl" +#line 1133 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 1068 "upb/json/parser.rl" +#line 1134 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 1070 "upb/json/parser.rl" +#line 1136 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 1072 "upb/json/parser.rl" +#line 1138 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 1074 "upb/json/parser.rl" +#line 1140 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 1076 "upb/json/parser.rl" +#line 1142 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 1077 "upb/json/parser.rl" +#line 1143 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 1082 "upb/json/parser.rl" +#line 1148 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1354 "upb/json/parser.c" +#line 1418 "upb/json/parser.c" } } @@ -10382,7 +10920,7 @@ _again: _out: {} } -#line 1107 "upb/json/parser.rl" +#line 1173 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -10401,29 +10939,17 @@ error: bool end(void *closure, const void *hd) { UPB_UNUSED(closure); UPB_UNUSED(hd); - return true; -} - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); + // Prevent compile warning on unused static constants. + UPB_UNUSED(json_start); + UPB_UNUSED(json_en_number_machine); + UPB_UNUSED(json_en_string_machine); + UPB_UNUSED(json_en_value_machine); + UPB_UNUSED(json_en_main); + return true; } -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; p->top->is_map = false; @@ -10433,25 +10959,48 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 1418 "upb/json/parser.c" +#line 1470 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1157 "upb/json/parser.rl" +#line 1211 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { @@ -10473,6 +11022,27 @@ upb_bytessink *upb_json_parser_input(upb_json_parser *p) { #include #include +struct upb_json_printer { + upb_sink input_; + // BytesSink closure. + void *subc_; + upb_bytessink *output_; + + // We track the depth so that we know when to emit startstr/endstr on the + // output. + int depth_; + + // Have we emitted the first element? This state is necessary to emit commas + // without leaving a trailing comma in arrays/maps. We keep this state per + // frame depth. + // + // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. + // We count frames (contexts in which we separate elements by commas) as both + // repeated fields and messages (maps), and the worst case is a + // message->repeated field->submessage->repeated field->... nesting. + bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; +}; + // StringPiece; a pointer plus a length. typedef struct { const char *ptr; @@ -10620,7 +11190,7 @@ static bool putkey(void *closure, const void *handler_data) { return true; } -#define CHKFMT(val) if ((val) == -1) return false; +#define CHKFMT(val) if ((val) == (size_t)-1) return false; #define CHK(val) if (!(val)) return false; #define TYPE_HANDLERS(type, fmt_func) \ @@ -11189,25 +11759,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { #undef TYPE } -/* Public API *****************************************************************/ - -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) { - p->output_ = NULL; +static void json_printer_reset(upb_json_printer *p) { p->depth_ = 0; - upb_sink_reset(&p->input_, h, p); } -void upb_json_printer_uninit(upb_json_printer *p) { - UPB_UNUSED(p); -} -void upb_json_printer_reset(upb_json_printer *p) { - p->depth_ = 0; -} +/* Public API *****************************************************************/ + +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output) { +#ifndef NDEBUG + size_t size_before = upb_env_bytesallocated(e); +#endif + + upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer)); + if (!p) return NULL; -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) { - upb_json_printer_reset(p); p->output_ = output; + json_printer_reset(p); + upb_sink_reset(&p->input_, h, p); + + // If this fails, increase the value in printer.h. + assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE); + return p; } upb_sink *upb_json_printer_input(upb_json_printer *p) { diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h index 8f6d3643..97df943a 100644 --- a/ruby/ext/google/protobuf_c/upb.h +++ b/ruby/ext/google/protobuf_c/upb.h @@ -99,6 +99,15 @@ #define UPB_INLINE static inline #endif +// For use in C/C++ source files (not headers), forces inlining within the file. +#ifdef __GNUC__ +#define UPB_FORCEINLINE inline __attribute__((always_inline)) +#define UPB_NOINLINE __attribute__((noinline)) +#else +#define UPB_FORCEINLINE +#define UPB_NOINLINE +#endif + #if __STDC_VERSION__ >= 199901L #define UPB_C99 #endif @@ -4805,10 +4814,8 @@ UPB_DEFINE_STRUCT0(upb_byteshandler, )); void upb_byteshandler_init(upb_byteshandler *h); -void upb_byteshandler_uninit(upb_byteshandler *h); // Caller must ensure that "d" outlives the handlers. -// TODO(haberman): support handlerfree function for the data. // TODO(haberman): should this have a "freeze" operation? It's not necessary // for memory management, but could be useful to force immutability and provide // a convenient moment to verify that all registration succeeded. @@ -4983,12 +4990,17 @@ template struct disable_if_same {}; template void DeletePointer(void *p) { delete static_cast(p); } template -struct FirstUnlessVoid { +struct FirstUnlessVoidOrBool { typedef T1 value; }; template -struct FirstUnlessVoid { +struct FirstUnlessVoidOrBool { + typedef T2 value; +}; + +template +struct FirstUnlessVoidOrBool { typedef T2 value; }; @@ -5370,10 +5382,14 @@ inline MethodSig4 MatchFunc(R (C::*f)(P1, P2, P3, P4)) { // // 1. If the function returns void, make it return the expected type and with // a value that always indicates success. -// 2. If the function is expected to return void* but doesn't, wrap it so it -// does (either by returning the closure param if the wrapped function -// returns void or by casting a different pointer type to void* for -// return). +// 2. If the function returns bool, make it return the expected type with a +// value that indicates success or failure. +// +// The "expected type" for return is: +// 1. void* for start handlers. If the closure parameter has a different type +// we will cast it to void* for the return in the success case. +// 2. size_t for string buffer handlers. +// 3. bool for everything else. // Template parameters are FuncN type and desired return type. template @@ -5762,10 +5778,13 @@ inline Handler::Handler(F func) attr_.SetClosureType(UniquePtrForType()); // We use the closure type (from the first parameter) if the return type is - // void. This is all nonsense for non START* handlers, but it doesn't matter - // because in that case the value will be ignored. - typedef typename FirstUnlessVoid::value + // void or bool, since these are the two cases we wrap to return the closure's + // type anyway. + // + // This is all nonsense for non START* handlers, but it doesn't matter because + // in that case the value will be ignored. + typedef typename FirstUnlessVoidOrBool::value EffectiveReturn; attr_.SetReturnClosureType(UniquePtrForType()); } @@ -5960,9 +5979,7 @@ inline BytesHandler::BytesHandler() { upb_byteshandler_init(this); } -inline BytesHandler::~BytesHandler() { - upb_byteshandler_uninit(this); -} +inline BytesHandler::~BytesHandler() {} } // namespace upb @@ -5983,6 +6000,261 @@ inline BytesHandler::~BytesHandler() { #endif // UPB_HANDLERS_INL_H_ #endif // UPB_HANDLERS_H +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A upb::Environment provides a means for injecting malloc and an + * error-reporting callback into encoders/decoders. This allows them to be + * independent of nearly all assumptions about their actual environment. + * + * It is also a container for allocating the encoders/decoders themselves that + * insulates clients from knowing their actual size. This provides ABI + * compatibility even if the size of the objects change. And this allows the + * structure definitions to be in the .c files instead of the .h files, making + * the .h files smaller and more readable. + */ + + +#ifndef UPB_ENV_H_ +#define UPB_ENV_H_ + +#ifdef __cplusplus +namespace upb { +class Environment; +class SeededAllocator; +} +#endif + +UPB_DECLARE_TYPE(upb::Environment, upb_env); +UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc); + +typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size); +typedef void upb_cleanup_func(void *ud); +typedef bool upb_error_func(void *ud, const upb_status *status); + +// An environment is *not* thread-safe. +UPB_DEFINE_CLASS0(upb::Environment, + public: + Environment(); + ~Environment(); + + // Set a custom memory allocation function for the environment. May ONLY + // be called before any calls to Malloc()/Realloc()/AddCleanup() below. + // If this is not called, the system realloc() function will be used. + // The given user pointer "ud" will be passed to the allocation function. + // + // The allocation function will not receive corresponding "free" calls. it + // must ensure that the memory is valid for the lifetime of the Environment, + // but it may be reclaimed any time thereafter. The likely usage is that + // "ud" points to a stateful allocator, and that the allocator frees all + // memory, arena-style, when it is destroyed. In this case the allocator must + // outlive the Environment. Another possibility is that the allocation + // function returns GC-able memory that is guaranteed to be GC-rooted for the + // life of the Environment. + void SetAllocationFunction(upb_alloc_func* alloc, void* ud); + + template + void SetAllocator(T* allocator) { + SetAllocationFunction(allocator->GetAllocationFunction(), allocator); + } + + // Set a custom error reporting function. + void SetErrorFunction(upb_error_func* func, void* ud); + + // Set the error reporting function to simply copy the status to the given + // status and abort. + void ReportErrorsTo(Status* status); + + // Returns true if all allocations and AddCleanup() calls have succeeded, + // and no errors were reported with ReportError() (except ones that recovered + // successfully). + bool ok() const; + + ////////////////////////////////////////////////////////////////////////////// + // Functions for use by encoders/decoders. + + // Reports an error to this environment's callback, returning true if + // the caller should try to recover. + bool ReportError(const Status* status); + + // Allocate memory. Uses the environment's allocation function. + // + // There is no need to free(). All memory will be freed automatically, but is + // guaranteed to outlive the Environment. + void* Malloc(size_t size); + + // Reallocate memory. Preserves "oldsize" bytes from the existing buffer + // Requires: oldsize <= existing_size. + // + // TODO(haberman): should we also enforce that oldsize <= size? + void* Realloc(void* ptr, size_t oldsize, size_t size); + + // Add a cleanup function to run when the environment is destroyed. + // Returns false on out-of-memory. + // + // The first call to AddCleanup() after SetAllocationFunction() is guaranteed + // to return true -- this makes it possible to robustly set a cleanup handler + // for a custom allocation function. + bool AddCleanup(upb_cleanup_func* func, void* ud); + + // Total number of bytes that have been allocated. It is undefined what + // Realloc() does to this counter. + size_t BytesAllocated() const; + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Environment); +, +UPB_DEFINE_STRUCT0(upb_env, + bool ok_; + size_t bytes_allocated; + + // Alloc function. + upb_alloc_func *alloc; + void *alloc_ud; + + // Error-reporting function. + upb_error_func *err; + void *err_ud; + + // Userdata for default alloc func. + void *default_alloc_ud; + + // Cleanup entries. Pointer to a cleanup_ent, defined in env.c + void *cleanup_head; + + // For future expansion, since the size of this struct is exposed to users. + void *future1; + void *future2; +)); + +UPB_BEGIN_EXTERN_C + +void upb_env_init(upb_env *e); +void upb_env_uninit(upb_env *e); +void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud); +void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud); +void upb_env_reporterrorsto(upb_env *e, upb_status *status); +bool upb_env_ok(const upb_env *e); +bool upb_env_reporterror(upb_env *e, const upb_status *status); +void *upb_env_malloc(upb_env *e, size_t size); +void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size); +bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud); +size_t upb_env_bytesallocated(const upb_env *e); + +UPB_END_EXTERN_C + +// An allocator that allocates from an initial memory region (likely the stack) +// before falling back to another allocator. +UPB_DEFINE_CLASS0(upb::SeededAllocator, + public: + SeededAllocator(void *mem, size_t len); + ~SeededAllocator(); + + // Set a custom fallback memory allocation function for the allocator, to use + // once the initial region runs out. + // + // May ONLY be called before GetAllocationFunction(). If this is not + // called, the system realloc() will be the fallback allocator. + void SetFallbackAllocator(upb_alloc_func *alloc, void *ud); + + // Gets the allocation function for this allocator. + upb_alloc_func* GetAllocationFunction(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator); +, +UPB_DEFINE_STRUCT0(upb_seededalloc, + // Fallback alloc function. + upb_alloc_func *alloc; + upb_cleanup_func *alloc_cleanup; + void *alloc_ud; + bool need_cleanup; + bool returned_allocfunc; + + // Userdata for default alloc func. + void *default_alloc_ud; + + // Pointers for the initial memory region. + char *mem_base; + char *mem_ptr; + char *mem_limit; + + // For future expansion, since the size of this struct is exposed to users. + void *future1; + void *future2; +)); + +UPB_BEGIN_EXTERN_C + +void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len); +void upb_seededalloc_uninit(upb_seededalloc *a); +void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func, + void *ud); +upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a); + +UPB_END_EXTERN_C + +#ifdef __cplusplus + +namespace upb { + +inline Environment::Environment() { + upb_env_init(this); +} +inline Environment::~Environment() { + upb_env_uninit(this); +} +inline void Environment::SetAllocationFunction(upb_alloc_func *alloc, + void *ud) { + upb_env_setallocfunc(this, alloc, ud); +} +inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) { + upb_env_seterrorfunc(this, func, ud); +} +inline void Environment::ReportErrorsTo(Status* status) { + upb_env_reporterrorsto(this, status); +} +inline bool Environment::ok() const { + return upb_env_ok(this); +} +inline bool Environment::ReportError(const Status* status) { + return upb_env_reporterror(this, status); +} +inline void *Environment::Malloc(size_t size) { + return upb_env_malloc(this, size); +} +inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) { + return upb_env_realloc(this, ptr, oldsize, size); +} +inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) { + return upb_env_addcleanup(this, func, ud); +} +inline size_t Environment::BytesAllocated() const { + return upb_env_bytesallocated(this); +} + +inline SeededAllocator::SeededAllocator(void *mem, size_t len) { + upb_seededalloc_init(this, mem, len); +} +inline SeededAllocator::~SeededAllocator() { + upb_seededalloc_uninit(this); +} +inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc, + void *ud) { + upb_seededalloc_setfallbackalloc(this, alloc, ud); +} +inline upb_alloc_func *SeededAllocator::GetAllocationFunction() { + return upb_seededalloc_getallocfunc(this); +} + +} // namespace upb + +#endif // __cplusplus + +#endif // UPB_ENV_H_ /* * upb - a minimalist implementation of protocol buffers. * @@ -6018,27 +6290,6 @@ UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc); UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink); UPB_DECLARE_TYPE(upb::Sink, upb_sink); -// Internal-only struct for the sink. -struct upb_sinkframe { - UPB_PRIVATE_FOR_CPP - const upb_handlers *h; - void *closure; - - // For any frames besides the top, this is the END* callback that will run - // when the subframe is popped (for example, for a "sequence" frame the frame - // above it will be a UPB_HANDLER_ENDSEQ handler). But this is only - // necessary for assertion checking inside upb_sink and can be omitted if the - // sink has only one caller. - // - // TODO(haberman): have a mechanism for ensuring that a sink only has one - // caller. - upb_selector_t selector; -}; - -// The maximum nesting depth that upb::Sink will allow. Matches proto2's limit. -// TODO: make this a runtime-settable property of Sink. -#define UPB_SINK_MAX_NESTING 64 - // A upb::Sink is an object that binds a upb::Handlers object to some runtime // state. It represents an endpoint to which data can be sent. // @@ -6598,45 +6849,11 @@ class Reader; UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader); -// Internal-only structs used by Reader. - -// upb_deflist is an internal-only dynamic array for storing a growing list of -// upb_defs. -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_def **defs; - size_t len; - size_t size; - bool owned; -} upb_deflist; - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - UPB_PRIVATE_FOR_CPP - char *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_descreader_frame; - -// The maximum number of nested declarations that are allowed, ie. -// message Foo { -// message Bar { -// message Baz { -// } -// } -// } -// -// This is a resource limit that affects how big our runtime stack can grow. -// TODO: make this a runtime-settable property of the Reader instance. -#define UPB_MAX_MESSAGE_NESTING 64 +#ifdef __cplusplus // Class that receives descriptor data according to the descriptor.proto schema // and use it to build upb::Defs corresponding to that schema. -UPB_DEFINE_CLASS0(upb::descriptor::Reader, +class upb::descriptor::Reader { public: // These handlers must have come from NewHandlers() and must outlive the // Reader. @@ -6646,11 +6863,7 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader, // to build/memory-manage the handlers at runtime at all). Unfortunately this // is a bit tricky to implement for Handlers, but necessary to simplify this // interface. - Reader(const Handlers* handlers, Status* status); - ~Reader(); - - // Resets the reader's state and discards any defs it may have built. - void Reset(); + static Reader* Create(Environment* env, const Handlers* handlers); // The reader's input; this is where descriptor.proto data should be sent. Sink* input(); @@ -6666,45 +6879,30 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader, // Builds and returns handlers for the reader, owned by "owner." static Handlers* NewHandlers(const void* owner); -, -UPB_DEFINE_STRUCT0(upb_descreader, - upb_sink sink; - upb_deflist defs; - upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING]; - int stack_len; - - uint32_t number; - char *name; - bool saw_number; - bool saw_name; - char *default_string; + private: + UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader); +}; - upb_fielddef *f; -)); +#endif -UPB_BEGIN_EXTERN_C // { +UPB_BEGIN_EXTERN_C // C API. -void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers, - upb_status *status); -void upb_descreader_uninit(upb_descreader *r); -void upb_descreader_reset(upb_descreader *r); +upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h); upb_sink *upb_descreader_input(upb_descreader *r); upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n); const upb_handlers *upb_descreader_newhandlers(const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus // C++ implementation details. ///////////////////////////////////////////////// namespace upb { namespace descriptor { -inline Reader::Reader(const Handlers *h, Status *s) { - upb_descreader_init(this, h, s); +inline Reader* Reader::Create(Environment* e, const Handlers *h) { + return upb_descreader_create(e, h); } -inline Reader::~Reader() { upb_descreader_uninit(this); } -inline void Reader::Reset() { upb_descreader_reset(this); } inline Sink* Reader::input() { return upb_descreader_input(this); } inline upb::Def** Reader::GetDefs(void* owner, int* n) { return upb_descreader_getdefs(this, owner, n); @@ -6764,44 +6962,6 @@ UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder); UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod); UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts); -// The maximum that any submessages can be nested. Matches proto2's limit. -// This specifies the size of the decoder's statically-sized array and therefore -// setting it high will cause the upb::pb::Decoder object to be larger. -// -// If necessary we can add a runtime-settable property to Decoder that allow -// this to be larger than the compile-time setting, but this would add -// complexity, particularly since we would have to decide how/if to give users -// the ability to set a custom memory allocation function. -#define UPB_DECODER_MAX_NESTING 64 - -// Internal-only struct used by the decoder. -typedef struct { - UPB_PRIVATE_FOR_CPP - // Space optimization note: we store two pointers here that the JIT - // doesn't need at all; the upb_handlers* inside the sink and - // the dispatch table pointer. We can optimze so that the JIT uses - // smaller stack frames than the interpreter. The only thing we need - // to guarantee is that the fallback routines can find end_ofs. - upb_sink sink; - - // The absolute stream offset of the end-of-frame delimiter. - // Non-delimited frames (groups and non-packed repeated fields) reuse the - // delimiter of their parent, even though the frame may not end there. - // - // NOTE: the JIT stores a slightly different value here for non-top frames. - // It stores the value relative to the end of the enclosed message. But the - // top frame is still stored the same way, which is important for ensuring - // that calls from the JIT into C work correctly. - uint64_t end_ofs; - const uint32_t *base; - - // 0 indicates a length-delimited field. - // A positive number indicates a known group. - // A negative number indicates an unknown group. - int32_t groupnum; - upb_inttable *dispatch; // Not used by the JIT. -} upb_pbdecoder_frame; - // The parameters one uses to construct a DecoderMethod. // TODO(haberman): move allowjit here? Seems more convenient for users. UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions, @@ -6879,22 +7039,31 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted, upb_inttable dispatch; )); +// Preallocation hint: decoder won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the decoder library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_PB_DECODER_SIZE 4400 + +#ifdef __cplusplus + // A Decoder receives binary protobuf data on its input sink and pushes the // decoded data to its output sink. -UPB_DEFINE_CLASS0(upb::pb::Decoder, +class upb::pb::Decoder { public: // Constructs a decoder instance for the given method, which must outlive this // decoder. Any errors during parsing will be set on the given status, which // must also outlive this decoder. - Decoder(const DecoderMethod* method, Status* status); - ~Decoder(); + // + // The sink must match the given method. + static Decoder* Create(Environment* env, const DecoderMethod* method, + Sink* output); // Returns the DecoderMethod this decoder is parsing from. - // TODO(haberman): Do users need to be able to rebind this? const DecoderMethod* method() const; - // Resets the state of the decoder. - void Reset(); + // The sink on which this decoder receives input. + BytesSink* input(); // Returns number of bytes successfully parsed. // @@ -6905,76 +7074,25 @@ UPB_DEFINE_CLASS0(upb::pb::Decoder, // callback. uint64_t BytesParsed() const; - // Resets the output sink of the Decoder. - // The given sink must match method()->dest_handlers(). + // Gets/sets the parsing nexting limit. If the total number of nested + // submessages and repeated fields hits this limit, parsing will fail. This + // is a resource limit that controls the amount of memory used by the parsing + // stack. // - // This must be called at least once before the decoder can be used. It may - // only be called with the decoder is in a state where it was just created or - // reset with pipeline.Reset(). The given sink must be from the same pipeline - // as this decoder. - bool ResetOutput(Sink* sink); - - // The sink on which this decoder receives input. - BytesSink* input(); - - private: - UPB_DISALLOW_COPY_AND_ASSIGN(Decoder); -, -UPB_DEFINE_STRUCT0(upb_pbdecoder, UPB_QUOTE( - // Our input sink. - upb_bytessink input_; - - // The decoder method we are parsing with (owned). - const upb_pbdecodermethod *method_; - - size_t call_len; - const uint32_t *pc, *last; - - // Current input buffer and its stream offset. - const char *buf, *ptr, *end, *checkpoint; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - - // End of the delimited region, relative to ptr, or end if not in this buf. - const char *data_end; - - // Overall stream offset of "buf." - uint64_t bufstart_ofs; - - // Buffer for residual bytes not parsed from the previous buffer. - // The maximum number of residual bytes we require is 12; a five-byte - // unknown tag plus an eight-byte value, less one because the value - // is only a partial value. - char residual[12]; - char *residual_end; + // Setting the limit will fail if the parser is currently suspended at a depth + // greater than this, or if memory allocation of the stack fails. + size_t max_nesting() const; + bool set_max_nesting(size_t max); - // Stores the user buffer passed to our decode function. - const char *buf_param; - size_t size_param; - const upb_bufhandle *handle; - -#ifdef UPB_USE_JIT_X64 - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; + void Reset(); - const void *saved_rsp; -#endif + static const size_t kSize = UPB_PB_DECODER_SIZE; - upb_status *status; + private: + UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder); +}; - // Our internal stack. - upb_pbdecoder_frame *top, *limit; - upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; -#ifdef UPB_USE_JIT_X64 - // Each native stack frame needs two pointers, plus we need a few frames for - // the enter/exit trampolines. - const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10]; -#else - const uint32_t *callstack[UPB_DECODER_MAX_NESTING]; -#endif -))); +#endif // __cplusplus // A class for caching protobuf processing code, whether bytecode for the // interpreted decoder or machine code for the JIT. @@ -7023,14 +7141,15 @@ UPB_DEFINE_STRUCT0(upb_pbcodecache, UPB_BEGIN_EXTERN_C // { -void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method, - upb_status *status); -void upb_pbdecoder_uninit(upb_pbdecoder *d); -void upb_pbdecoder_reset(upb_pbdecoder *d); +upb_pbdecoder *upb_pbdecoder_create(upb_env *e, + const upb_pbdecodermethod *method, + upb_sink *output); const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); -bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d); +size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d); +bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max); +void upb_pbdecoder_reset(upb_pbdecoder *d); void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, const upb_handlers *h); @@ -7065,27 +7184,27 @@ namespace upb { namespace pb { -inline Decoder::Decoder(const DecoderMethod* m, Status* s) { - upb_pbdecoder_init(this, m, s); -} -inline Decoder::~Decoder() { - upb_pbdecoder_uninit(this); +// static +inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m, + Sink* sink) { + return upb_pbdecoder_create(env, m, sink); } inline const DecoderMethod* Decoder::method() const { return upb_pbdecoder_method(this); } -inline void Decoder::Reset() { - upb_pbdecoder_reset(this); +inline BytesSink* Decoder::input() { + return upb_pbdecoder_input(this); } inline uint64_t Decoder::BytesParsed() const { return upb_pbdecoder_bytesparsed(this); } -inline bool Decoder::ResetOutput(Sink* sink) { - return upb_pbdecoder_resetoutput(this, sink); +inline size_t Decoder::max_nesting() const { + return upb_pbdecoder_maxnesting(this); } -inline BytesSink* Decoder::input() { - return upb_pbdecoder_input(this); +inline bool Decoder::set_max_nesting(size_t max) { + return upb_pbdecoder_setmaxnesting(this, max); } +inline void Decoder::Reset() { upb_pbdecoder_reset(this); } inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) { upb_pbdecodermethodopts_init(this, h); @@ -7242,6 +7361,95 @@ typedef struct { #endif } mgroup; +// The maximum that any submessages can be nested. Matches proto2's limit. +// This specifies the size of the decoder's statically-sized array and therefore +// setting it high will cause the upb::pb::Decoder object to be larger. +// +// If necessary we can add a runtime-settable property to Decoder that allow +// this to be larger than the compile-time setting, but this would add +// complexity, particularly since we would have to decide how/if to give users +// the ability to set a custom memory allocation function. +#define UPB_DECODER_MAX_NESTING 64 + +// Internal-only struct used by the decoder. +typedef struct { + // Space optimization note: we store two pointers here that the JIT + // doesn't need at all; the upb_handlers* inside the sink and + // the dispatch table pointer. We can optimze so that the JIT uses + // smaller stack frames than the interpreter. The only thing we need + // to guarantee is that the fallback routines can find end_ofs. + upb_sink sink; + + // The absolute stream offset of the end-of-frame delimiter. + // Non-delimited frames (groups and non-packed repeated fields) reuse the + // delimiter of their parent, even though the frame may not end there. + // + // NOTE: the JIT stores a slightly different value here for non-top frames. + // It stores the value relative to the end of the enclosed message. But the + // top frame is still stored the same way, which is important for ensuring + // that calls from the JIT into C work correctly. + uint64_t end_ofs; + const uint32_t *base; + + // 0 indicates a length-delimited field. + // A positive number indicates a known group. + // A negative number indicates an unknown group. + int32_t groupnum; + upb_inttable *dispatch; // Not used by the JIT. +} upb_pbdecoder_frame; + +struct upb_pbdecoder { + upb_env *env; + + // Our input sink. + upb_bytessink input_; + + // The decoder method we are parsing with (owned). + const upb_pbdecodermethod *method_; + + size_t call_len; + const uint32_t *pc, *last; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + + // End of the delimited region, relative to ptr, or end if not in this buf. + const char *data_end; + + // Overall stream offset of "buf." + uint64_t bufstart_ofs; + + // Buffer for residual bytes not parsed from the previous buffer. + // The maximum number of residual bytes we require is 12; a five-byte + // unknown tag plus an eight-byte value, less one because the value + // is only a partial value. + char residual[12]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + const upb_bufhandle *handle; + + // Our internal stack. + upb_pbdecoder_frame *stack, *top, *limit; + const uint32_t **callstack; + size_t stack_size; + + upb_status *status; + +#ifdef UPB_USE_JIT_X64 + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rsp; +#endif +}; + // Decoder entry points; used as handlers. void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint); void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint); @@ -7509,101 +7717,42 @@ UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder); /* upb::pb::Encoder ***********************************************************/ -// The output buffer is divided into segments; a segment is a string of data -// that is "ready to go" -- it does not need any varint lengths inserted into -// the middle. The seams between segments are where varints will be inserted -// once they are known. -// -// We also use the concept of a "run", which is a range of encoded bytes that -// occur at a single submessage level. Every segment contains one or more runs. -// -// A segment can span messages. Consider: -// -// .--Submessage lengths---------. -// | | | -// | V V -// V | |--------------- | |----------------- -// Submessages: | |----------------------------------------------- -// Top-level msg: ------------------------------------------------------------ -// -// Segments: ----- ------------------- ----------------- -// Runs: *---- *--------------*--- *---------------- -// (* marks the start) -// -// Note that the top-level menssage is not in any segment because it does not -// have any length preceding it. -// -// A segment is only interrupted when another length needs to be inserted. So -// observe how the second segment spans both the inner submessage and part of -// the next enclosing message. -typedef struct { - UPB_PRIVATE_FOR_CPP - uint32_t msglen; // The length to varint-encode before this segment. - uint32_t seglen; // Length of the segment. -} upb_pb_encoder_segment; - -UPB_DEFINE_CLASS0(upb::pb::Encoder, - public: - Encoder(const upb::Handlers* handlers); - ~Encoder(); - - static reffed_ptr NewHandlers(const upb::MessageDef* msg); +// Preallocation hint: decoder won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the decoder library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_PB_ENCODER_SIZE 768 - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); +#ifdef __cplusplus - // Resets the output pointer which will serve as our closure. - void ResetOutput(BytesSink* output); +class upb::pb::Encoder { + public: + // Creates a new encoder in the given environment. The Handlers must have + // come from NewHandlers() below. + static Encoder* Create(Environment* env, const Handlers* handlers, + BytesSink* output); // The input to the encoder. Sink* input(); - private: - UPB_DISALLOW_COPY_AND_ASSIGN(Encoder); -, -UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE( - // Our input and output. - upb_sink input_; - upb_bytessink *output_; + // Creates a new set of handlers for this MessageDef. + static reffed_ptr NewHandlers(const MessageDef* msg); - // The "subclosure" -- used as the inner closure as part of the bytessink - // protocol. - void *subc; + static const size_t kSize = UPB_PB_ENCODER_SIZE; - // The output buffer and limit, and our current write position. "buf" - // initially points to "initbuf", but is dynamically allocated if we need to - // grow beyond the initial size. - char *buf, *ptr, *limit; - - // The beginning of the current run, or undefined if we are at the top level. - char *runbegin; - - // The list of segments we are accumulating. - upb_pb_encoder_segment *segbuf, *segptr, *seglimit; - - // The stack of enclosing submessages. Each entry in the stack points to the - // segment where this submessage's length is being accumulated. - int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit; - - // Depth of startmsg/endmsg calls. - int depth; + private: + UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder); +}; - // Initial buffers for the output buffer and segment buffer. If we outgrow - // these we will dynamically allocate bigger ones. - char initbuf[256]; - upb_pb_encoder_segment seginitbuf[32]; -))); +#endif UPB_BEGIN_EXTERN_C const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, const void *owner); -void upb_pb_encoder_reset(upb_pb_encoder *e); upb_sink *upb_pb_encoder_input(upb_pb_encoder *p); -void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h); -void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output); -void upb_pb_encoder_uninit(upb_pb_encoder *e); +upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h, + upb_bytessink* output); UPB_END_EXTERN_C @@ -7611,17 +7760,9 @@ UPB_END_EXTERN_C namespace upb { namespace pb { -inline Encoder::Encoder(const upb::Handlers* handlers) { - upb_pb_encoder_init(this, handlers); -} -inline Encoder::~Encoder() { - upb_pb_encoder_uninit(this); -} -inline void Encoder::Reset() { - upb_pb_encoder_reset(this); -} -inline void Encoder::ResetOutput(BytesSink* output) { - upb_pb_encoder_resetoutput(this, output); +inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers, + BytesSink* output) { + return upb_pb_encoder_create(env, handlers, output); } inline Sink* Encoder::input() { return upb_pb_encoder_input(this); @@ -7739,58 +7880,51 @@ class TextPrinter; UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter); -UPB_DEFINE_CLASS0(upb::pb::TextPrinter, +#ifdef __cplusplus + +class upb::pb::TextPrinter { public: // The given handlers must have come from NewHandlers(). It must outlive the // TextPrinter. - explicit TextPrinter(const upb::Handlers* handlers); + static TextPrinter *Create(Environment *env, const upb::Handlers *handlers, + BytesSink *output); void SetSingleLineMode(bool single_line); - bool ResetOutput(BytesSink* output); Sink* input(); // If handler caching becomes a requirement we can add a code cache as in // decoder.h static reffed_ptr NewHandlers(const MessageDef* md); +}; - private: -, -UPB_DEFINE_STRUCT0(upb_textprinter, - upb_sink input_; - upb_bytessink *output_; - int indent_depth_; - bool single_line_; - void *subc; -)); +#endif -UPB_BEGIN_EXTERN_C // { +UPB_BEGIN_EXTERN_C // C API. -void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h); -void upb_textprinter_uninit(upb_textprinter *p); -bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output); +upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output); void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line); upb_sink *upb_textprinter_input(upb_textprinter *p); const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace pb { -inline TextPrinter::TextPrinter(const upb::Handlers* handlers) { - upb_textprinter_init(this, handlers); +inline TextPrinter *TextPrinter::Create(Environment *env, + const upb::Handlers *handlers, + BytesSink *output) { + return upb_textprinter_create(env, handlers, output); } inline void TextPrinter::SetSingleLineMode(bool single_line) { upb_textprinter_setsingleline(this, single_line); } -inline bool TextPrinter::ResetOutput(BytesSink* output) { - return upb_textprinter_resetoutput(this, output); -} inline Sink* TextPrinter::input() { return upb_textprinter_input(this); } @@ -7829,96 +7963,32 @@ class Parser; UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser); -// Internal-only struct used by the parser. A parser frame corresponds -// one-to-one with a handler (sink) frame. -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_sink sink; - // The current message in which we're parsing, and the field whose value we're - // expecting next. - const upb_msgdef *m; - const upb_fielddef *f; - - // We are in a repeated-field context, ready to emit mapentries as - // submessages. This flag alters the start-of-object (open-brace) behavior to - // begin a sequence of mapentry messages rather than a single submessage. - bool is_map; - // We are in a map-entry message context. This flag is set when parsing the - // value field of a single map entry and indicates to all value-field parsers - // (subobjects, strings, numbers, and bools) that the map-entry submessage - // should end as soon as the value is parsed. - bool is_mapentry; - // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent - // message's map field that we're currently parsing. This differs from |f| - // because |f| is the field in the *current* message (i.e., the map-entry - // message itself), not the parent's field that leads to this map. - const upb_fielddef *mapfield; -} upb_jsonparser_frame; - - /* upb::json::Parser **********************************************************/ -#define UPB_JSON_MAX_DEPTH 64 +// Preallocation hint: parser won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the parser library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_JSON_PARSER_SIZE 3568 + +#ifdef __cplusplus // Parses an incoming BytesStream, pushing the results to the destination sink. -UPB_DEFINE_CLASS0(upb::json::Parser, +class upb::json::Parser { public: - Parser(Status* status); - ~Parser(); - - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); + static Parser* Create(Environment* env, Sink* output); - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(Sink* output); - - // The input to the printer. BytesSink* input(); -, -UPB_DEFINE_STRUCT0(upb_json_parser, - upb_byteshandler input_handler_; - upb_bytessink input_; - - // Stack to track the JSON scopes we are in. - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - - upb_status *status; - - // Ragel's internal parsing stack for the parsing state machine. - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - // The handle for the current buffer. - const upb_bufhandle *handle; - - // Accumulate buffer. See details in parser.rl. - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - // Multi-part text data. See details in parser.rl. - int multipart_state; - upb_selector_t string_selector; - // Input capture. See details in parser.rl. - const char *capture; + private: + UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser); +}; - // Intermediate result of parsing a unicode escape sequence. - uint32_t digit; -)); +#endif UPB_BEGIN_EXTERN_C -void upb_json_parser_init(upb_json_parser *p, upb_status *status); -void upb_json_parser_uninit(upb_json_parser *p); -void upb_json_parser_reset(upb_json_parser *p); -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output); +upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output); upb_bytessink *upb_json_parser_input(upb_json_parser *p); UPB_END_EXTERN_C @@ -7927,11 +7997,8 @@ UPB_END_EXTERN_C namespace upb { namespace json { -inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); } -inline Parser::~Parser() { upb_json_parser_uninit(this); } -inline void Parser::Reset() { upb_json_parser_reset(this); } -inline void Parser::ResetOutput(Sink* output) { - upb_json_parser_resetoutput(this, output); +inline Parser* Parser::Create(Environment* env, Sink* output) { + return upb_json_parser_create(env, output); } inline BytesSink* Parser::input() { return upb_json_parser_input(this); @@ -7970,71 +8037,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer); /* upb::json::Printer *********************************************************/ -// Prints an incoming stream of data to a BytesSink in JSON format. -UPB_DEFINE_CLASS0(upb::json::Printer, - public: - Printer(const upb::Handlers* handlers); - ~Printer(); +#define UPB_JSON_PRINTER_SIZE 168 - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); +#ifdef __cplusplus - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(BytesSink* output); +// Prints an incoming stream of data to a BytesSink in JSON format. +class upb::json::Printer { + public: + static Printer* Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output); // The input to the printer. Sink* input(); // Returns handlers for printing according to the specified schema. static reffed_ptr NewHandlers(const upb::MessageDef* md); -, -UPB_DEFINE_STRUCT0(upb_json_printer, - upb_sink input_; - // BytesSink closure. - void *subc_; - upb_bytessink *output_; - - // We track the depth so that we know when to emit startstr/endstr on the - // output. - int depth_; - // Have we emitted the first element? This state is necessary to emit commas - // without leaving a trailing comma in arrays/maps. We keep this state per - // frame depth. - // - // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. - // We count frames (contexts in which we separate elements by commas) as both - // repeated fields and messages (maps), and the worst case is a - // message->repeated field->submessage->repeated field->... nesting. - bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; -)); -UPB_BEGIN_EXTERN_C // { + static const size_t kSize = UPB_JSON_PRINTER_SIZE; -// Native C API. + private: + UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer); +}; + +#endif -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h); -void upb_json_printer_uninit(upb_json_printer *p); -void upb_json_printer_reset(upb_json_printer *p); -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output); +UPB_BEGIN_EXTERN_C + +// Native C API. +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output); upb_sink *upb_json_printer_input(upb_json_printer *p); const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace json { -inline Printer::Printer(const upb::Handlers* handlers) { - upb_json_printer_init(this, handlers); -} -inline Printer::~Printer() { upb_json_printer_uninit(this); } -inline void Printer::Reset() { upb_json_printer_reset(this); } -inline void Printer::ResetOutput(BytesSink* output) { - upb_json_printer_resetoutput(this, output); +inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output) { + return upb_json_printer_create(env, handlers, output); } inline Sink* Printer::input() { return upb_json_printer_input(this); } inline reffed_ptr Printer::NewHandlers( -- cgit v1.2.3