aboutsummaryrefslogtreecommitdiff
path: root/ruby/ext/google/protobuf_c/upb.h
diff options
context:
space:
mode:
authorChris Fallin <cfallin@google.com>2015-05-14 18:24:26 -0700
committerChris Fallin <cfallin@google.com>2015-05-15 11:36:12 -0700
commitd326277397e345b5bda4a8afbd0a9d54f01b9a06 (patch)
tree06374379f0c2290e69110c787a5ad347e262eac6 /ruby/ext/google/protobuf_c/upb.h
parentb481a4f920d2f5872bc1e9bfd6b0656f6ad0b713 (diff)
downloadprotobuf-d326277397e345b5bda4a8afbd0a9d54f01b9a06.tar.gz
protobuf-d326277397e345b5bda4a8afbd0a9d54f01b9a06.tar.bz2
protobuf-d326277397e345b5bda4a8afbd0a9d54f01b9a06.zip
Update MRI C Ruby extension to use new version of upb.
- Alter encode/decode paths to use the `upb_env` (environment) abstraction. - Update upb amalgamation to upstream `93791bfe`. - Fix a compilation warning (void*->char* cast). - Modify build flags so that upb doesn't produce warnings -- the Travis build logs were pretty cluttered previously.
Diffstat (limited to 'ruby/ext/google/protobuf_c/upb.h')
-rw-r--r--ruby/ext/google/protobuf_c/upb.h978
1 files changed, 511 insertions, 467 deletions
diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h
index 8f6d3643..97df943a 100644
--- a/ruby/ext/google/protobuf_c/upb.h
+++ b/ruby/ext/google/protobuf_c/upb.h
@@ -99,6 +99,15 @@
#define UPB_INLINE static inline
#endif
+// For use in C/C++ source files (not headers), forces inlining within the file.
+#ifdef __GNUC__
+#define UPB_FORCEINLINE inline __attribute__((always_inline))
+#define UPB_NOINLINE __attribute__((noinline))
+#else
+#define UPB_FORCEINLINE
+#define UPB_NOINLINE
+#endif
+
#if __STDC_VERSION__ >= 199901L
#define UPB_C99
#endif
@@ -4805,10 +4814,8 @@ UPB_DEFINE_STRUCT0(upb_byteshandler,
));
void upb_byteshandler_init(upb_byteshandler *h);
-void upb_byteshandler_uninit(upb_byteshandler *h);
// Caller must ensure that "d" outlives the handlers.
-// TODO(haberman): support handlerfree function for the data.
// TODO(haberman): should this have a "freeze" operation? It's not necessary
// for memory management, but could be useful to force immutability and provide
// a convenient moment to verify that all registration succeeded.
@@ -4983,12 +4990,17 @@ template <class T> struct disable_if_same<T, T> {};
template <class T> void DeletePointer(void *p) { delete static_cast<T>(p); }
template <class T1, class T2>
-struct FirstUnlessVoid {
+struct FirstUnlessVoidOrBool {
typedef T1 value;
};
template <class T2>
-struct FirstUnlessVoid<void, T2> {
+struct FirstUnlessVoidOrBool<void, T2> {
+ typedef T2 value;
+};
+
+template <class T2>
+struct FirstUnlessVoidOrBool<bool, T2> {
typedef T2 value;
};
@@ -5370,10 +5382,14 @@ inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
//
// 1. If the function returns void, make it return the expected type and with
// a value that always indicates success.
-// 2. If the function is expected to return void* but doesn't, wrap it so it
-// does (either by returning the closure param if the wrapped function
-// returns void or by casting a different pointer type to void* for
-// return).
+// 2. If the function returns bool, make it return the expected type with a
+// value that indicates success or failure.
+//
+// The "expected type" for return is:
+// 1. void* for start handlers. If the closure parameter has a different type
+// we will cast it to void* for the return in the success case.
+// 2. size_t for string buffer handlers.
+// 3. bool for everything else.
// Template parameters are FuncN type and desired return type.
template <class F, class R, class Enable = void>
@@ -5762,10 +5778,13 @@ inline Handler<T>::Handler(F func)
attr_.SetClosureType(UniquePtrForType<typename F::FuncInfo::Closure>());
// We use the closure type (from the first parameter) if the return type is
- // void. This is all nonsense for non START* handlers, but it doesn't matter
- // because in that case the value will be ignored.
- typedef typename FirstUnlessVoid<typename F::FuncInfo::Return,
- typename F::FuncInfo::Closure>::value
+ // void or bool, since these are the two cases we wrap to return the closure's
+ // type anyway.
+ //
+ // This is all nonsense for non START* handlers, but it doesn't matter because
+ // in that case the value will be ignored.
+ typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
+ typename F::FuncInfo::Closure>::value
EffectiveReturn;
attr_.SetReturnClosureType(UniquePtrForType<EffectiveReturn>());
}
@@ -5960,9 +5979,7 @@ inline BytesHandler::BytesHandler() {
upb_byteshandler_init(this);
}
-inline BytesHandler::~BytesHandler() {
- upb_byteshandler_uninit(this);
-}
+inline BytesHandler::~BytesHandler() {}
} // namespace upb
@@ -5986,6 +6003,261 @@ inline BytesHandler::~BytesHandler() {
/*
* upb - a minimalist implementation of protocol buffers.
*
+ * Copyright (c) 2014 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A upb::Environment provides a means for injecting malloc and an
+ * error-reporting callback into encoders/decoders. This allows them to be
+ * independent of nearly all assumptions about their actual environment.
+ *
+ * It is also a container for allocating the encoders/decoders themselves that
+ * insulates clients from knowing their actual size. This provides ABI
+ * compatibility even if the size of the objects change. And this allows the
+ * structure definitions to be in the .c files instead of the .h files, making
+ * the .h files smaller and more readable.
+ */
+
+
+#ifndef UPB_ENV_H_
+#define UPB_ENV_H_
+
+#ifdef __cplusplus
+namespace upb {
+class Environment;
+class SeededAllocator;
+}
+#endif
+
+UPB_DECLARE_TYPE(upb::Environment, upb_env);
+UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc);
+
+typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size);
+typedef void upb_cleanup_func(void *ud);
+typedef bool upb_error_func(void *ud, const upb_status *status);
+
+// An environment is *not* thread-safe.
+UPB_DEFINE_CLASS0(upb::Environment,
+ public:
+ Environment();
+ ~Environment();
+
+ // Set a custom memory allocation function for the environment. May ONLY
+ // be called before any calls to Malloc()/Realloc()/AddCleanup() below.
+ // If this is not called, the system realloc() function will be used.
+ // The given user pointer "ud" will be passed to the allocation function.
+ //
+ // The allocation function will not receive corresponding "free" calls. it
+ // must ensure that the memory is valid for the lifetime of the Environment,
+ // but it may be reclaimed any time thereafter. The likely usage is that
+ // "ud" points to a stateful allocator, and that the allocator frees all
+ // memory, arena-style, when it is destroyed. In this case the allocator must
+ // outlive the Environment. Another possibility is that the allocation
+ // function returns GC-able memory that is guaranteed to be GC-rooted for the
+ // life of the Environment.
+ void SetAllocationFunction(upb_alloc_func* alloc, void* ud);
+
+ template<class T>
+ void SetAllocator(T* allocator) {
+ SetAllocationFunction(allocator->GetAllocationFunction(), allocator);
+ }
+
+ // Set a custom error reporting function.
+ void SetErrorFunction(upb_error_func* func, void* ud);
+
+ // Set the error reporting function to simply copy the status to the given
+ // status and abort.
+ void ReportErrorsTo(Status* status);
+
+ // Returns true if all allocations and AddCleanup() calls have succeeded,
+ // and no errors were reported with ReportError() (except ones that recovered
+ // successfully).
+ bool ok() const;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Functions for use by encoders/decoders.
+
+ // Reports an error to this environment's callback, returning true if
+ // the caller should try to recover.
+ bool ReportError(const Status* status);
+
+ // Allocate memory. Uses the environment's allocation function.
+ //
+ // There is no need to free(). All memory will be freed automatically, but is
+ // guaranteed to outlive the Environment.
+ void* Malloc(size_t size);
+
+ // Reallocate memory. Preserves "oldsize" bytes from the existing buffer
+ // Requires: oldsize <= existing_size.
+ //
+ // TODO(haberman): should we also enforce that oldsize <= size?
+ void* Realloc(void* ptr, size_t oldsize, size_t size);
+
+ // Add a cleanup function to run when the environment is destroyed.
+ // Returns false on out-of-memory.
+ //
+ // The first call to AddCleanup() after SetAllocationFunction() is guaranteed
+ // to return true -- this makes it possible to robustly set a cleanup handler
+ // for a custom allocation function.
+ bool AddCleanup(upb_cleanup_func* func, void* ud);
+
+ // Total number of bytes that have been allocated. It is undefined what
+ // Realloc() does to this counter.
+ size_t BytesAllocated() const;
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(Environment);
+,
+UPB_DEFINE_STRUCT0(upb_env,
+ bool ok_;
+ size_t bytes_allocated;
+
+ // Alloc function.
+ upb_alloc_func *alloc;
+ void *alloc_ud;
+
+ // Error-reporting function.
+ upb_error_func *err;
+ void *err_ud;
+
+ // Userdata for default alloc func.
+ void *default_alloc_ud;
+
+ // Cleanup entries. Pointer to a cleanup_ent, defined in env.c
+ void *cleanup_head;
+
+ // For future expansion, since the size of this struct is exposed to users.
+ void *future1;
+ void *future2;
+));
+
+UPB_BEGIN_EXTERN_C
+
+void upb_env_init(upb_env *e);
+void upb_env_uninit(upb_env *e);
+void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud);
+void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);
+void upb_env_reporterrorsto(upb_env *e, upb_status *status);
+bool upb_env_ok(const upb_env *e);
+bool upb_env_reporterror(upb_env *e, const upb_status *status);
+void *upb_env_malloc(upb_env *e, size_t size);
+void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size);
+bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud);
+size_t upb_env_bytesallocated(const upb_env *e);
+
+UPB_END_EXTERN_C
+
+// An allocator that allocates from an initial memory region (likely the stack)
+// before falling back to another allocator.
+UPB_DEFINE_CLASS0(upb::SeededAllocator,
+ public:
+ SeededAllocator(void *mem, size_t len);
+ ~SeededAllocator();
+
+ // Set a custom fallback memory allocation function for the allocator, to use
+ // once the initial region runs out.
+ //
+ // May ONLY be called before GetAllocationFunction(). If this is not
+ // called, the system realloc() will be the fallback allocator.
+ void SetFallbackAllocator(upb_alloc_func *alloc, void *ud);
+
+ // Gets the allocation function for this allocator.
+ upb_alloc_func* GetAllocationFunction();
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator);
+,
+UPB_DEFINE_STRUCT0(upb_seededalloc,
+ // Fallback alloc function.
+ upb_alloc_func *alloc;
+ upb_cleanup_func *alloc_cleanup;
+ void *alloc_ud;
+ bool need_cleanup;
+ bool returned_allocfunc;
+
+ // Userdata for default alloc func.
+ void *default_alloc_ud;
+
+ // Pointers for the initial memory region.
+ char *mem_base;
+ char *mem_ptr;
+ char *mem_limit;
+
+ // For future expansion, since the size of this struct is exposed to users.
+ void *future1;
+ void *future2;
+));
+
+UPB_BEGIN_EXTERN_C
+
+void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len);
+void upb_seededalloc_uninit(upb_seededalloc *a);
+void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func,
+ void *ud);
+upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+
+inline Environment::Environment() {
+ upb_env_init(this);
+}
+inline Environment::~Environment() {
+ upb_env_uninit(this);
+}
+inline void Environment::SetAllocationFunction(upb_alloc_func *alloc,
+ void *ud) {
+ upb_env_setallocfunc(this, alloc, ud);
+}
+inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) {
+ upb_env_seterrorfunc(this, func, ud);
+}
+inline void Environment::ReportErrorsTo(Status* status) {
+ upb_env_reporterrorsto(this, status);
+}
+inline bool Environment::ok() const {
+ return upb_env_ok(this);
+}
+inline bool Environment::ReportError(const Status* status) {
+ return upb_env_reporterror(this, status);
+}
+inline void *Environment::Malloc(size_t size) {
+ return upb_env_malloc(this, size);
+}
+inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) {
+ return upb_env_realloc(this, ptr, oldsize, size);
+}
+inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) {
+ return upb_env_addcleanup(this, func, ud);
+}
+inline size_t Environment::BytesAllocated() const {
+ return upb_env_bytesallocated(this);
+}
+
+inline SeededAllocator::SeededAllocator(void *mem, size_t len) {
+ upb_seededalloc_init(this, mem, len);
+}
+inline SeededAllocator::~SeededAllocator() {
+ upb_seededalloc_uninit(this);
+}
+inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc,
+ void *ud) {
+ upb_seededalloc_setfallbackalloc(this, alloc, ud);
+}
+inline upb_alloc_func *SeededAllocator::GetAllocationFunction() {
+ return upb_seededalloc_getallocfunc(this);
+}
+
+} // namespace upb
+
+#endif // __cplusplus
+
+#endif // UPB_ENV_H_
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
* Copyright (c) 2010-2012 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
@@ -6018,27 +6290,6 @@ UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc);
UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink);
UPB_DECLARE_TYPE(upb::Sink, upb_sink);
-// Internal-only struct for the sink.
-struct upb_sinkframe {
- UPB_PRIVATE_FOR_CPP
- const upb_handlers *h;
- void *closure;
-
- // For any frames besides the top, this is the END* callback that will run
- // when the subframe is popped (for example, for a "sequence" frame the frame
- // above it will be a UPB_HANDLER_ENDSEQ handler). But this is only
- // necessary for assertion checking inside upb_sink and can be omitted if the
- // sink has only one caller.
- //
- // TODO(haberman): have a mechanism for ensuring that a sink only has one
- // caller.
- upb_selector_t selector;
-};
-
-// The maximum nesting depth that upb::Sink will allow. Matches proto2's limit.
-// TODO: make this a runtime-settable property of Sink.
-#define UPB_SINK_MAX_NESTING 64
-
// A upb::Sink is an object that binds a upb::Handlers object to some runtime
// state. It represents an endpoint to which data can be sent.
//
@@ -6598,45 +6849,11 @@ class Reader;
UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader);
-// Internal-only structs used by Reader.
-
-// upb_deflist is an internal-only dynamic array for storing a growing list of
-// upb_defs.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- upb_def **defs;
- size_t len;
- size_t size;
- bool owned;
-} upb_deflist;
-
-// We keep a stack of all the messages scopes we are currently in, as well as
-// the top-level file scope. This is necessary to correctly qualify the
-// definitions that are contained inside. "name" tracks the name of the
-// message or package (a bare name -- not qualified by any enclosing scopes).
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- char *name;
- // Index of the first def that is under this scope. For msgdefs, the
- // msgdef itself is at start-1.
- int start;
-} upb_descreader_frame;
-
-// The maximum number of nested declarations that are allowed, ie.
-// message Foo {
-// message Bar {
-// message Baz {
-// }
-// }
-// }
-//
-// This is a resource limit that affects how big our runtime stack can grow.
-// TODO: make this a runtime-settable property of the Reader instance.
-#define UPB_MAX_MESSAGE_NESTING 64
+#ifdef __cplusplus
// Class that receives descriptor data according to the descriptor.proto schema
// and use it to build upb::Defs corresponding to that schema.
-UPB_DEFINE_CLASS0(upb::descriptor::Reader,
+class upb::descriptor::Reader {
public:
// These handlers must have come from NewHandlers() and must outlive the
// Reader.
@@ -6646,11 +6863,7 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// to build/memory-manage the handlers at runtime at all). Unfortunately this
// is a bit tricky to implement for Handlers, but necessary to simplify this
// interface.
- Reader(const Handlers* handlers, Status* status);
- ~Reader();
-
- // Resets the reader's state and discards any defs it may have built.
- void Reset();
+ static Reader* Create(Environment* env, const Handlers* handlers);
// The reader's input; this is where descriptor.proto data should be sent.
Sink* input();
@@ -6666,45 +6879,30 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// Builds and returns handlers for the reader, owned by "owner."
static Handlers* NewHandlers(const void* owner);
-,
-UPB_DEFINE_STRUCT0(upb_descreader,
- upb_sink sink;
- upb_deflist defs;
- upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
- int stack_len;
-
- uint32_t number;
- char *name;
- bool saw_number;
- bool saw_name;
- char *default_string;
+ private:
+ UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader);
+};
- upb_fielddef *f;
-));
+#endif
-UPB_BEGIN_EXTERN_C // {
+UPB_BEGIN_EXTERN_C
// C API.
-void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
- upb_status *status);
-void upb_descreader_uninit(upb_descreader *r);
-void upb_descreader_reset(upb_descreader *r);
+upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h);
upb_sink *upb_descreader_input(upb_descreader *r);
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
const upb_handlers *upb_descreader_newhandlers(const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ implementation details. /////////////////////////////////////////////////
namespace upb {
namespace descriptor {
-inline Reader::Reader(const Handlers *h, Status *s) {
- upb_descreader_init(this, h, s);
+inline Reader* Reader::Create(Environment* e, const Handlers *h) {
+ return upb_descreader_create(e, h);
}
-inline Reader::~Reader() { upb_descreader_uninit(this); }
-inline void Reader::Reset() { upb_descreader_reset(this); }
inline Sink* Reader::input() { return upb_descreader_input(this); }
inline upb::Def** Reader::GetDefs(void* owner, int* n) {
return upb_descreader_getdefs(this, owner, n);
@@ -6764,44 +6962,6 @@ UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder);
UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod);
UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts);
-// The maximum that any submessages can be nested. Matches proto2's limit.
-// This specifies the size of the decoder's statically-sized array and therefore
-// setting it high will cause the upb::pb::Decoder object to be larger.
-//
-// If necessary we can add a runtime-settable property to Decoder that allow
-// this to be larger than the compile-time setting, but this would add
-// complexity, particularly since we would have to decide how/if to give users
-// the ability to set a custom memory allocation function.
-#define UPB_DECODER_MAX_NESTING 64
-
-// Internal-only struct used by the decoder.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- // Space optimization note: we store two pointers here that the JIT
- // doesn't need at all; the upb_handlers* inside the sink and
- // the dispatch table pointer. We can optimze so that the JIT uses
- // smaller stack frames than the interpreter. The only thing we need
- // to guarantee is that the fallback routines can find end_ofs.
- upb_sink sink;
-
- // The absolute stream offset of the end-of-frame delimiter.
- // Non-delimited frames (groups and non-packed repeated fields) reuse the
- // delimiter of their parent, even though the frame may not end there.
- //
- // NOTE: the JIT stores a slightly different value here for non-top frames.
- // It stores the value relative to the end of the enclosed message. But the
- // top frame is still stored the same way, which is important for ensuring
- // that calls from the JIT into C work correctly.
- uint64_t end_ofs;
- const uint32_t *base;
-
- // 0 indicates a length-delimited field.
- // A positive number indicates a known group.
- // A negative number indicates an unknown group.
- int32_t groupnum;
- upb_inttable *dispatch; // Not used by the JIT.
-} upb_pbdecoder_frame;
-
// The parameters one uses to construct a DecoderMethod.
// TODO(haberman): move allowjit here? Seems more convenient for users.
UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions,
@@ -6879,22 +7039,31 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
upb_inttable dispatch;
));
+// Preallocation hint: decoder won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the decoder library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_PB_DECODER_SIZE 4400
+
+#ifdef __cplusplus
+
// A Decoder receives binary protobuf data on its input sink and pushes the
// decoded data to its output sink.
-UPB_DEFINE_CLASS0(upb::pb::Decoder,
+class upb::pb::Decoder {
public:
// Constructs a decoder instance for the given method, which must outlive this
// decoder. Any errors during parsing will be set on the given status, which
// must also outlive this decoder.
- Decoder(const DecoderMethod* method, Status* status);
- ~Decoder();
+ //
+ // The sink must match the given method.
+ static Decoder* Create(Environment* env, const DecoderMethod* method,
+ Sink* output);
// Returns the DecoderMethod this decoder is parsing from.
- // TODO(haberman): Do users need to be able to rebind this?
const DecoderMethod* method() const;
- // Resets the state of the decoder.
- void Reset();
+ // The sink on which this decoder receives input.
+ BytesSink* input();
// Returns number of bytes successfully parsed.
//
@@ -6905,76 +7074,25 @@ UPB_DEFINE_CLASS0(upb::pb::Decoder,
// callback.
uint64_t BytesParsed() const;
- // Resets the output sink of the Decoder.
- // The given sink must match method()->dest_handlers().
+ // Gets/sets the parsing nexting limit. If the total number of nested
+ // submessages and repeated fields hits this limit, parsing will fail. This
+ // is a resource limit that controls the amount of memory used by the parsing
+ // stack.
//
- // This must be called at least once before the decoder can be used. It may
- // only be called with the decoder is in a state where it was just created or
- // reset with pipeline.Reset(). The given sink must be from the same pipeline
- // as this decoder.
- bool ResetOutput(Sink* sink);
-
- // The sink on which this decoder receives input.
- BytesSink* input();
-
- private:
- UPB_DISALLOW_COPY_AND_ASSIGN(Decoder);
-,
-UPB_DEFINE_STRUCT0(upb_pbdecoder, UPB_QUOTE(
- // Our input sink.
- upb_bytessink input_;
-
- // The decoder method we are parsing with (owned).
- const upb_pbdecodermethod *method_;
-
- size_t call_len;
- const uint32_t *pc, *last;
-
- // Current input buffer and its stream offset.
- const char *buf, *ptr, *end, *checkpoint;
-
- // End of the delimited region, relative to ptr, or NULL if not in this buf.
- const char *delim_end;
-
- // End of the delimited region, relative to ptr, or end if not in this buf.
- const char *data_end;
-
- // Overall stream offset of "buf."
- uint64_t bufstart_ofs;
-
- // Buffer for residual bytes not parsed from the previous buffer.
- // The maximum number of residual bytes we require is 12; a five-byte
- // unknown tag plus an eight-byte value, less one because the value
- // is only a partial value.
- char residual[12];
- char *residual_end;
+ // Setting the limit will fail if the parser is currently suspended at a depth
+ // greater than this, or if memory allocation of the stack fails.
+ size_t max_nesting() const;
+ bool set_max_nesting(size_t max);
- // Stores the user buffer passed to our decode function.
- const char *buf_param;
- size_t size_param;
- const upb_bufhandle *handle;
-
-#ifdef UPB_USE_JIT_X64
- // Used momentarily by the generated code to store a value while a user
- // function is called.
- uint32_t tmp_len;
+ void Reset();
- const void *saved_rsp;
-#endif
+ static const size_t kSize = UPB_PB_DECODER_SIZE;
- upb_status *status;
+ private:
+ UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder);
+};
- // Our internal stack.
- upb_pbdecoder_frame *top, *limit;
- upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
-#ifdef UPB_USE_JIT_X64
- // Each native stack frame needs two pointers, plus we need a few frames for
- // the enter/exit trampolines.
- const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10];
-#else
- const uint32_t *callstack[UPB_DECODER_MAX_NESTING];
-#endif
-)));
+#endif // __cplusplus
// A class for caching protobuf processing code, whether bytecode for the
// interpreted decoder or machine code for the JIT.
@@ -7023,14 +7141,15 @@ UPB_DEFINE_STRUCT0(upb_pbcodecache,
UPB_BEGIN_EXTERN_C // {
-void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method,
- upb_status *status);
-void upb_pbdecoder_uninit(upb_pbdecoder *d);
-void upb_pbdecoder_reset(upb_pbdecoder *d);
+upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
+ const upb_pbdecodermethod *method,
+ upb_sink *output);
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
-bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink);
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
+size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
+bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
+void upb_pbdecoder_reset(upb_pbdecoder *d);
void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
const upb_handlers *h);
@@ -7065,27 +7184,27 @@ namespace upb {
namespace pb {
-inline Decoder::Decoder(const DecoderMethod* m, Status* s) {
- upb_pbdecoder_init(this, m, s);
-}
-inline Decoder::~Decoder() {
- upb_pbdecoder_uninit(this);
+// static
+inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
+ Sink* sink) {
+ return upb_pbdecoder_create(env, m, sink);
}
inline const DecoderMethod* Decoder::method() const {
return upb_pbdecoder_method(this);
}
-inline void Decoder::Reset() {
- upb_pbdecoder_reset(this);
+inline BytesSink* Decoder::input() {
+ return upb_pbdecoder_input(this);
}
inline uint64_t Decoder::BytesParsed() const {
return upb_pbdecoder_bytesparsed(this);
}
-inline bool Decoder::ResetOutput(Sink* sink) {
- return upb_pbdecoder_resetoutput(this, sink);
+inline size_t Decoder::max_nesting() const {
+ return upb_pbdecoder_maxnesting(this);
}
-inline BytesSink* Decoder::input() {
- return upb_pbdecoder_input(this);
+inline bool Decoder::set_max_nesting(size_t max) {
+ return upb_pbdecoder_setmaxnesting(this, max);
}
+inline void Decoder::Reset() { upb_pbdecoder_reset(this); }
inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) {
upb_pbdecodermethodopts_init(this, h);
@@ -7242,6 +7361,95 @@ typedef struct {
#endif
} mgroup;
+// The maximum that any submessages can be nested. Matches proto2's limit.
+// This specifies the size of the decoder's statically-sized array and therefore
+// setting it high will cause the upb::pb::Decoder object to be larger.
+//
+// If necessary we can add a runtime-settable property to Decoder that allow
+// this to be larger than the compile-time setting, but this would add
+// complexity, particularly since we would have to decide how/if to give users
+// the ability to set a custom memory allocation function.
+#define UPB_DECODER_MAX_NESTING 64
+
+// Internal-only struct used by the decoder.
+typedef struct {
+ // Space optimization note: we store two pointers here that the JIT
+ // doesn't need at all; the upb_handlers* inside the sink and
+ // the dispatch table pointer. We can optimze so that the JIT uses
+ // smaller stack frames than the interpreter. The only thing we need
+ // to guarantee is that the fallback routines can find end_ofs.
+ upb_sink sink;
+
+ // The absolute stream offset of the end-of-frame delimiter.
+ // Non-delimited frames (groups and non-packed repeated fields) reuse the
+ // delimiter of their parent, even though the frame may not end there.
+ //
+ // NOTE: the JIT stores a slightly different value here for non-top frames.
+ // It stores the value relative to the end of the enclosed message. But the
+ // top frame is still stored the same way, which is important for ensuring
+ // that calls from the JIT into C work correctly.
+ uint64_t end_ofs;
+ const uint32_t *base;
+
+ // 0 indicates a length-delimited field.
+ // A positive number indicates a known group.
+ // A negative number indicates an unknown group.
+ int32_t groupnum;
+ upb_inttable *dispatch; // Not used by the JIT.
+} upb_pbdecoder_frame;
+
+struct upb_pbdecoder {
+ upb_env *env;
+
+ // Our input sink.
+ upb_bytessink input_;
+
+ // The decoder method we are parsing with (owned).
+ const upb_pbdecodermethod *method_;
+
+ size_t call_len;
+ const uint32_t *pc, *last;
+
+ // Current input buffer and its stream offset.
+ const char *buf, *ptr, *end, *checkpoint;
+
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
+
+ // End of the delimited region, relative to ptr, or end if not in this buf.
+ const char *data_end;
+
+ // Overall stream offset of "buf."
+ uint64_t bufstart_ofs;
+
+ // Buffer for residual bytes not parsed from the previous buffer.
+ // The maximum number of residual bytes we require is 12; a five-byte
+ // unknown tag plus an eight-byte value, less one because the value
+ // is only a partial value.
+ char residual[12];
+ char *residual_end;
+
+ // Stores the user buffer passed to our decode function.
+ const char *buf_param;
+ size_t size_param;
+ const upb_bufhandle *handle;
+
+ // Our internal stack.
+ upb_pbdecoder_frame *stack, *top, *limit;
+ const uint32_t **callstack;
+ size_t stack_size;
+
+ upb_status *status;
+
+#ifdef UPB_USE_JIT_X64
+ // Used momentarily by the generated code to store a value while a user
+ // function is called.
+ uint32_t tmp_len;
+
+ const void *saved_rsp;
+#endif
+};
+
// Decoder entry points; used as handlers.
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
@@ -7509,101 +7717,42 @@ UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
/* upb::pb::Encoder ***********************************************************/
-// The output buffer is divided into segments; a segment is a string of data
-// that is "ready to go" -- it does not need any varint lengths inserted into
-// the middle. The seams between segments are where varints will be inserted
-// once they are known.
-//
-// We also use the concept of a "run", which is a range of encoded bytes that
-// occur at a single submessage level. Every segment contains one or more runs.
-//
-// A segment can span messages. Consider:
-//
-// .--Submessage lengths---------.
-// | | |
-// | V V
-// V | |--------------- | |-----------------
-// Submessages: | |-----------------------------------------------
-// Top-level msg: ------------------------------------------------------------
-//
-// Segments: ----- ------------------- -----------------
-// Runs: *---- *--------------*--- *----------------
-// (* marks the start)
-//
-// Note that the top-level menssage is not in any segment because it does not
-// have any length preceding it.
-//
-// A segment is only interrupted when another length needs to be inserted. So
-// observe how the second segment spans both the inner submessage and part of
-// the next enclosing message.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- uint32_t msglen; // The length to varint-encode before this segment.
- uint32_t seglen; // Length of the segment.
-} upb_pb_encoder_segment;
-
-UPB_DEFINE_CLASS0(upb::pb::Encoder,
- public:
- Encoder(const upb::Handlers* handlers);
- ~Encoder();
-
- static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* msg);
+// Preallocation hint: decoder won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the decoder library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_PB_ENCODER_SIZE 768
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+#ifdef __cplusplus
- // Resets the output pointer which will serve as our closure.
- void ResetOutput(BytesSink* output);
+class upb::pb::Encoder {
+ public:
+ // Creates a new encoder in the given environment. The Handlers must have
+ // come from NewHandlers() below.
+ static Encoder* Create(Environment* env, const Handlers* handlers,
+ BytesSink* output);
// The input to the encoder.
Sink* input();
- private:
- UPB_DISALLOW_COPY_AND_ASSIGN(Encoder);
-,
-UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE(
- // Our input and output.
- upb_sink input_;
- upb_bytessink *output_;
+ // Creates a new set of handlers for this MessageDef.
+ static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
- // The "subclosure" -- used as the inner closure as part of the bytessink
- // protocol.
- void *subc;
+ static const size_t kSize = UPB_PB_ENCODER_SIZE;
- // The output buffer and limit, and our current write position. "buf"
- // initially points to "initbuf", but is dynamically allocated if we need to
- // grow beyond the initial size.
- char *buf, *ptr, *limit;
-
- // The beginning of the current run, or undefined if we are at the top level.
- char *runbegin;
-
- // The list of segments we are accumulating.
- upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
-
- // The stack of enclosing submessages. Each entry in the stack points to the
- // segment where this submessage's length is being accumulated.
- int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit;
-
- // Depth of startmsg/endmsg calls.
- int depth;
+ private:
+ UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder);
+};
- // Initial buffers for the output buffer and segment buffer. If we outgrow
- // these we will dynamically allocate bigger ones.
- char initbuf[256];
- upb_pb_encoder_segment seginitbuf[32];
-)));
+#endif
UPB_BEGIN_EXTERN_C
const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
const void *owner);
-void upb_pb_encoder_reset(upb_pb_encoder *e);
upb_sink *upb_pb_encoder_input(upb_pb_encoder *p);
-void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h);
-void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output);
-void upb_pb_encoder_uninit(upb_pb_encoder *e);
+upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h,
+ upb_bytessink* output);
UPB_END_EXTERN_C
@@ -7611,17 +7760,9 @@ UPB_END_EXTERN_C
namespace upb {
namespace pb {
-inline Encoder::Encoder(const upb::Handlers* handlers) {
- upb_pb_encoder_init(this, handlers);
-}
-inline Encoder::~Encoder() {
- upb_pb_encoder_uninit(this);
-}
-inline void Encoder::Reset() {
- upb_pb_encoder_reset(this);
-}
-inline void Encoder::ResetOutput(BytesSink* output) {
- upb_pb_encoder_resetoutput(this, output);
+inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers,
+ BytesSink* output) {
+ return upb_pb_encoder_create(env, handlers, output);
}
inline Sink* Encoder::input() {
return upb_pb_encoder_input(this);
@@ -7739,58 +7880,51 @@ class TextPrinter;
UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter);
-UPB_DEFINE_CLASS0(upb::pb::TextPrinter,
+#ifdef __cplusplus
+
+class upb::pb::TextPrinter {
public:
// The given handlers must have come from NewHandlers(). It must outlive the
// TextPrinter.
- explicit TextPrinter(const upb::Handlers* handlers);
+ static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
+ BytesSink *output);
void SetSingleLineMode(bool single_line);
- bool ResetOutput(BytesSink* output);
Sink* input();
// If handler caching becomes a requirement we can add a code cache as in
// decoder.h
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
+};
- private:
-,
-UPB_DEFINE_STRUCT0(upb_textprinter,
- upb_sink input_;
- upb_bytessink *output_;
- int indent_depth_;
- bool single_line_;
- void *subc;
-));
+#endif
-UPB_BEGIN_EXTERN_C // {
+UPB_BEGIN_EXTERN_C
// C API.
-void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h);
-void upb_textprinter_uninit(upb_textprinter *p);
-bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output);
+upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
+ upb_bytessink *output);
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
upb_sink *upb_textprinter_input(upb_textprinter *p);
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace pb {
-inline TextPrinter::TextPrinter(const upb::Handlers* handlers) {
- upb_textprinter_init(this, handlers);
+inline TextPrinter *TextPrinter::Create(Environment *env,
+ const upb::Handlers *handlers,
+ BytesSink *output) {
+ return upb_textprinter_create(env, handlers, output);
}
inline void TextPrinter::SetSingleLineMode(bool single_line) {
upb_textprinter_setsingleline(this, single_line);
}
-inline bool TextPrinter::ResetOutput(BytesSink* output) {
- return upb_textprinter_resetoutput(this, output);
-}
inline Sink* TextPrinter::input() {
return upb_textprinter_input(this);
}
@@ -7829,96 +7963,32 @@ class Parser;
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
-// Internal-only struct used by the parser. A parser frame corresponds
-// one-to-one with a handler (sink) frame.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- upb_sink sink;
- // The current message in which we're parsing, and the field whose value we're
- // expecting next.
- const upb_msgdef *m;
- const upb_fielddef *f;
-
- // We are in a repeated-field context, ready to emit mapentries as
- // submessages. This flag alters the start-of-object (open-brace) behavior to
- // begin a sequence of mapentry messages rather than a single submessage.
- bool is_map;
- // We are in a map-entry message context. This flag is set when parsing the
- // value field of a single map entry and indicates to all value-field parsers
- // (subobjects, strings, numbers, and bools) that the map-entry submessage
- // should end as soon as the value is parsed.
- bool is_mapentry;
- // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
- // message's map field that we're currently parsing. This differs from |f|
- // because |f| is the field in the *current* message (i.e., the map-entry
- // message itself), not the parent's field that leads to this map.
- const upb_fielddef *mapfield;
-} upb_jsonparser_frame;
-
-
/* upb::json::Parser **********************************************************/
-#define UPB_JSON_MAX_DEPTH 64
+// Preallocation hint: parser won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the parser library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_JSON_PARSER_SIZE 3568
+
+#ifdef __cplusplus
// Parses an incoming BytesStream, pushing the results to the destination sink.
-UPB_DEFINE_CLASS0(upb::json::Parser,
+class upb::json::Parser {
public:
- Parser(Status* status);
- ~Parser();
-
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+ static Parser* Create(Environment* env, Sink* output);
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(Sink* output);
-
- // The input to the printer.
BytesSink* input();
-,
-UPB_DEFINE_STRUCT0(upb_json_parser,
- upb_byteshandler input_handler_;
- upb_bytessink input_;
-
- // Stack to track the JSON scopes we are in.
- upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
- upb_jsonparser_frame *top;
- upb_jsonparser_frame *limit;
-
- upb_status *status;
-
- // Ragel's internal parsing stack for the parsing state machine.
- int current_state;
- int parser_stack[UPB_JSON_MAX_DEPTH];
- int parser_top;
-
- // The handle for the current buffer.
- const upb_bufhandle *handle;
-
- // Accumulate buffer. See details in parser.rl.
- const char *accumulated;
- size_t accumulated_len;
- char *accumulate_buf;
- size_t accumulate_buf_size;
-
- // Multi-part text data. See details in parser.rl.
- int multipart_state;
- upb_selector_t string_selector;
- // Input capture. See details in parser.rl.
- const char *capture;
+ private:
+ UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser);
+};
- // Intermediate result of parsing a unicode escape sequence.
- uint32_t digit;
-));
+#endif
UPB_BEGIN_EXTERN_C
-void upb_json_parser_init(upb_json_parser *p, upb_status *status);
-void upb_json_parser_uninit(upb_json_parser *p);
-void upb_json_parser_reset(upb_json_parser *p);
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output);
+upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
upb_bytessink *upb_json_parser_input(upb_json_parser *p);
UPB_END_EXTERN_C
@@ -7927,11 +7997,8 @@ UPB_END_EXTERN_C
namespace upb {
namespace json {
-inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); }
-inline Parser::~Parser() { upb_json_parser_uninit(this); }
-inline void Parser::Reset() { upb_json_parser_reset(this); }
-inline void Parser::ResetOutput(Sink* output) {
- upb_json_parser_resetoutput(this, output);
+inline Parser* Parser::Create(Environment* env, Sink* output) {
+ return upb_json_parser_create(env, output);
}
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);
@@ -7970,71 +8037,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
/* upb::json::Printer *********************************************************/
-// Prints an incoming stream of data to a BytesSink in JSON format.
-UPB_DEFINE_CLASS0(upb::json::Printer,
- public:
- Printer(const upb::Handlers* handlers);
- ~Printer();
+#define UPB_JSON_PRINTER_SIZE 168
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+#ifdef __cplusplus
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(BytesSink* output);
+// Prints an incoming stream of data to a BytesSink in JSON format.
+class upb::json::Printer {
+ public:
+ static Printer* Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output);
// The input to the printer.
Sink* input();
// Returns handlers for printing according to the specified schema.
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
-,
-UPB_DEFINE_STRUCT0(upb_json_printer,
- upb_sink input_;
- // BytesSink closure.
- void *subc_;
- upb_bytessink *output_;
-
- // We track the depth so that we know when to emit startstr/endstr on the
- // output.
- int depth_;
- // Have we emitted the first element? This state is necessary to emit commas
- // without leaving a trailing comma in arrays/maps. We keep this state per
- // frame depth.
- //
- // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
- // We count frames (contexts in which we separate elements by commas) as both
- // repeated fields and messages (maps), and the worst case is a
- // message->repeated field->submessage->repeated field->... nesting.
- bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
-));
-UPB_BEGIN_EXTERN_C // {
+ static const size_t kSize = UPB_JSON_PRINTER_SIZE;
-// Native C API.
+ private:
+ UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer);
+};
+
+#endif
-void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h);
-void upb_json_printer_uninit(upb_json_printer *p);
-void upb_json_printer_reset(upb_json_printer *p);
-void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output);
+UPB_BEGIN_EXTERN_C
+
+// Native C API.
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+ upb_bytessink *output);
upb_sink *upb_json_printer_input(upb_json_printer *p);
const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace json {
-inline Printer::Printer(const upb::Handlers* handlers) {
- upb_json_printer_init(this, handlers);
-}
-inline Printer::~Printer() { upb_json_printer_uninit(this); }
-inline void Printer::Reset() { upb_json_printer_reset(this); }
-inline void Printer::ResetOutput(BytesSink* output) {
- upb_json_printer_resetoutput(this, output);
+inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output) {
+ return upb_json_printer_create(env, handlers, output);
}
inline Sink* Printer::input() { return upb_json_printer_input(this); }
inline reffed_ptr<const Handlers> Printer::NewHandlers(