diff options
author | Feng Xiao <xfxyjwf@gmail.com> | 2018-08-08 17:00:41 -0700 |
---|---|---|
committer | Feng Xiao <xfxyjwf@gmail.com> | 2018-08-08 17:00:41 -0700 |
commit | 6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3 (patch) | |
tree | e575738adf52d24b883cca5e8928a5ded31caba1 /src/google/protobuf/stubs | |
parent | e7746f487cb9cca685ffb1b3d7dccc5554b618a4 (diff) | |
download | protobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.tar.gz protobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.tar.bz2 protobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.zip |
Down-integrate from google3.
Diffstat (limited to 'src/google/protobuf/stubs')
-rw-r--r-- | src/google/protobuf/stubs/common.cc | 1 | ||||
-rw-r--r-- | src/google/protobuf/stubs/hash.h | 324 | ||||
-rw-r--r-- | src/google/protobuf/stubs/io_win32_unittest.cc | 12 | ||||
-rw-r--r-- | src/google/protobuf/stubs/port.h | 4 | ||||
-rw-r--r-- | src/google/protobuf/stubs/stringpiece.h | 3 | ||||
-rw-r--r-- | src/google/protobuf/stubs/strutil.cc | 113 | ||||
-rw-r--r-- | src/google/protobuf/stubs/strutil.h | 41 |
7 files changed, 166 insertions, 332 deletions
diff --git a/src/google/protobuf/stubs/common.cc b/src/google/protobuf/stubs/common.cc index 6544c6ed..b54fd1c8 100644 --- a/src/google/protobuf/stubs/common.cc +++ b/src/google/protobuf/stubs/common.cc @@ -30,7 +30,6 @@ // Author: kenton@google.com (Kenton Varda) -#include <google/protobuf/message_lite.h> // TODO(gerbens) ideally remove this. #include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/once.h> #include <google/protobuf/stubs/status.h> diff --git a/src/google/protobuf/stubs/hash.h b/src/google/protobuf/stubs/hash.h index fd8ba156..a093b406 100644 --- a/src/google/protobuf/stubs/hash.h +++ b/src/google/protobuf/stubs/hash.h @@ -29,8 +29,6 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Author: kenton@google.com (Kenton Varda) -// -// Deals with the fact that hash_map is not defined everywhere. #ifndef GOOGLE_PROTOBUF_STUBS_HASH_H__ #define GOOGLE_PROTOBUF_STUBS_HASH_H__ @@ -38,304 +36,19 @@ #include <string.h> #include <google/protobuf/stubs/common.h> -#define GOOGLE_PROTOBUF_HAVE_HASH_MAP 1 -#define GOOGLE_PROTOBUF_HAVE_HASH_SET 1 - -// Use C++11 unordered_{map|set} if available. -#if ((defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11) || \ - (((__cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X)) && \ - (__GLIBCXX__ > 20090421))) -# define GOOGLE_PROTOBUF_HAS_CXX11_HASH - -// For XCode >= 4.6: the compiler is clang with libc++. -// For earlier XCode version: the compiler is gcc-4.2.1 with libstdc++. -// libc++ provides <unordered_map> and friends even in non C++11 mode, -// and it does not provide the tr1 library. Therefore the following macro -// checks against this special case. -// Note that we should not test the __APPLE_CC__ version number or the -// __clang__ macro, since the new compiler can still use -stdlib=libstdc++, in -// which case <unordered_map> is not compilable without -std=c++11 -#elif defined(__APPLE_CC__) -# if __GNUC__ >= 4 -# define GOOGLE_PROTOBUF_HAS_TR1 -# else -// Not tested for gcc < 4... These setting can compile under 4.2.1 though. -# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx -# include <ext/hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <ext/hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# endif - -// Version checks for gcc. -#elif defined(__GNUC__) -// For GCC 4.x+, use tr1::unordered_map/set; otherwise, follow the -// instructions from: -// https://gcc.gnu.org/onlinedocs/libstdc++/manual/backwards.html -# if __GNUC__ >= 4 -# define GOOGLE_PROTOBUF_HAS_TR1 -# elif __GNUC__ >= 3 -# include <backward/hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <backward/hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# if __GNUC__ == 3 && __GNUC_MINOR__ == 0 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std // GCC 3.0 -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx // GCC 3.1 and later -# endif -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE -# include <hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# endif - -// GCC <= 4.1 does not define std::tr1::hash for `long long int` or `long long unsigned int` -# if __GNUC__ == 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ <= 1 -# undef GOOGLE_PROTOBUF_HAS_TR1 -# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP -# undef GOOGLE_PROTOBUF_HAVE_HASH_SET -# endif - -// Version checks for MSC. -// Apparently Microsoft decided to move hash_map *back* to the std namespace in -// MSVC 2010: -// http://blogs.msdn.com/vcblog/archive/2009/05/25/stl-breaking-changes-in-visual-studio-2010-beta-1.aspx -// And.. they are moved back to stdext in MSVC 2013 (haven't checked 2012). That -// said, use unordered_map for MSVC 2010 and beyond is our safest bet. -#elif defined(_MSC_VER) -# if _MSC_VER >= 1600 // Since Visual Studio 2010 -# define GOOGLE_PROTOBUF_HAS_CXX11_HASH -# define GOOGLE_PROTOBUF_HASH_COMPARE std::hash_compare -# elif _MSC_VER >= 1500 // Since Visual Studio 2008 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext -# include <hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# define GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE -# elif _MSC_VER >= 1310 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext -# include <hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std -# include <hash_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include <hash_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# endif - -// **ADD NEW COMPILERS SUPPORT HERE.** -// For other compilers, undefine the macro and fallback to use std::map, in -// google/protobuf/stubs/hash.h -#else -# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP -# undef GOOGLE_PROTOBUF_HAVE_HASH_SET -#endif - -#if defined(GOOGLE_PROTOBUF_HAS_CXX11_HASH) -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std -# include <unordered_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map -# include <unordered_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set -#elif defined(GOOGLE_PROTOBUF_HAS_TR1) -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std::tr1 -# include <tr1/unordered_map> -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map -# include <tr1/unordered_set> -# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set -#endif +#include <unordered_map> +#include <unordered_set> # define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START \ namespace google { \ namespace protobuf { # define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END }} -#undef GOOGLE_PROTOBUF_HAS_CXX11_HASH -#undef GOOGLE_PROTOBUF_HAS_TR1 - -#if defined(GOOGLE_PROTOBUF_HAVE_HASH_MAP) && \ - defined(GOOGLE_PROTOBUF_HAVE_HASH_SET) -#else -#define GOOGLE_PROTOBUF_MISSING_HASH -#include <map> -#include <set> -#endif - namespace google { namespace protobuf { -#ifdef GOOGLE_PROTOBUF_MISSING_HASH -#undef GOOGLE_PROTOBUF_MISSING_HASH - -// This system doesn't have hash_map or hash_set. Emulate them using map and -// set. - -// Make hash<T> be the same as less<T>. Note that everywhere where custom -// hash functions are defined in the protobuf code, they are also defined such -// that they can be used as "less" functions, which is required by MSVC anyway. template <typename Key> -struct hash { - // Dummy, just to make derivative hash functions compile. - int operator()(const Key& key) { - GOOGLE_LOG(FATAL) << "Should never be called."; - return 0; - } - - inline bool operator()(const Key& a, const Key& b) const { - return a < b; - } -}; - -// Make sure char* is compared by value. -template <> -struct hash<const char*> { - // Dummy, just to make derivative hash functions compile. - int operator()(const char* key) { - GOOGLE_LOG(FATAL) << "Should never be called."; - return 0; - } - - inline bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } -}; - -template <typename Key, typename Data, - typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key>, - typename Alloc = std::allocator< std::pair<const Key, Data> > > -class hash_map : public std::map<Key, Data, HashFcn, Alloc> { - typedef std::map<Key, Data, HashFcn, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(b, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template <typename Key, - typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key> > -class hash_set : public std::set<Key, HashFcn> { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#elif defined(_MSC_VER) && !defined(_STLPORT_VERSION) && \ - !(defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11) - -template <typename Key> -struct hash : public GOOGLE_PROTOBUF_HASH_COMPARE<Key> { -}; - -// MSVC's hash_compare<const char*> hashes based on the string contents but -// compares based on the string pointer. WTF? -class CstringLess { - public: - inline bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } -}; - -template <> -struct hash<const char*> - : public GOOGLE_PROTOBUF_HASH_COMPARE<const char*, CstringLess> {}; - -#ifdef GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -template <typename Key, typename HashFcn, typename EqualKey> -struct InternalHashCompare : public GOOGLE_PROTOBUF_HASH_COMPARE<Key> { - InternalHashCompare() {} - InternalHashCompare(HashFcn hashfcn, EqualKey equalkey) - : hashfcn_(hashfcn), equalkey_(equalkey) {} - size_t operator()(const Key& key) const { return hashfcn_(key); } - bool operator()(const Key& key1, const Key& key2) const { - return !equalkey_(key1, key2); - } - HashFcn hashfcn_; - EqualKey equalkey_; -}; - -template <typename Key, typename Data, - typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key>, - typename Alloc = std::allocator< std::pair<const Key, Data> > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, InternalHashCompare<Key, HashFcn, EqualKey>, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, InternalHashCompare<Key, HashFcn, EqualKey>, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), const Alloc& d = Alloc()) - : BaseClass(InternalHashCompare<Key, HashFcn, EqualKey>(b, c), d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template <typename Key, typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key> > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, InternalHashCompare<Key, HashFcn, EqualKey> > { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#else // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -template <typename Key, typename Data, - typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key>, - typename Alloc = std::allocator< std::pair<const Key, Data> > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template <typename Key, typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key> > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, HashFcn, EqualKey> { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; -#endif // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -#else // defined(_MSC_VER) && !defined(_STLPORT_VERSION) - -template <typename Key> -struct hash : public GOOGLE_PROTOBUF_HASH_NAMESPACE::hash<Key> { -}; +struct hash : public std::hash<Key> {}; template <typename Key> struct hash<const Key*> { @@ -364,37 +77,6 @@ struct hash<bool> { } }; -template <typename Key, typename Data, - typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key>, - typename Alloc = std::allocator< std::pair<const Key, Data> > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template <typename Key, typename HashFcn = hash<Key>, - typename EqualKey = std::equal_to<Key> > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, HashFcn, EqualKey> { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#endif // !GOOGLE_PROTOBUF_MISSING_HASH - template <> struct hash<string> { inline size_t operator()(const string& key) const { diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index c933757c..9085ed91 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -63,17 +63,17 @@ namespace { const char kUtf8Text[] = { 'h', 'i', ' ', // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F - 0xd0, 0x9f, + static_cast<char>(0xd0), static_cast<char>(0x9f), // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 - 0xd1, 0x80, + static_cast<char>(0xd1), static_cast<char>(0x80), // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 - 0xd0, 0xb8, + static_cast<char>(0xd0), static_cast<char>(0xb8), // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 - 0xd0, 0xb2, + static_cast<char>(0xd0), static_cast<char>(0xb2), // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 - 0xd0, 0xb5, + static_cast<char>(0xd0), static_cast<char>(0xb5), // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 - 0xd1, 0x82, 0 + static_cast<char>(0xd1), static_cast<char>(0x82), 0 }; const wchar_t kUtf16Text[] = { diff --git a/src/google/protobuf/stubs/port.h b/src/google/protobuf/stubs/port.h index 3aa6403b..4d3d0008 100644 --- a/src/google/protobuf/stubs/port.h +++ b/src/google/protobuf/stubs/port.h @@ -99,6 +99,7 @@ #endif #define PROTOBUF_RUNTIME_DEPRECATED(message) +#define GOOGLE_PROTOBUF_RUNTIME_DEPRECATED(message) // =================================================================== // from google3/base/port.h @@ -175,6 +176,9 @@ static const int64 kint64min = -kint64max - 1; static const uint32 kuint32max = 0xFFFFFFFFu; static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF); +#define GOOGLE_PROTOBUF_NAMESPACE "google::protobuf" +#define GOOGLE_PROTOBUF_NAMESPACE_ID google::protobuf + // ------------------------------------------------------------------- // Annotations: Some parts of the code have been annotated in ways that might // be useful to some compilers or tools, but are not supported universally. diff --git a/src/google/protobuf/stubs/stringpiece.h b/src/google/protobuf/stubs/stringpiece.h index 563ff75d..c7bdd810 100644 --- a/src/google/protobuf/stubs/stringpiece.h +++ b/src/google/protobuf/stubs/stringpiece.h @@ -463,6 +463,9 @@ struct StringPiecePod { std::string ToString() const { return std::string(data_, static_cast<size_t>(size_)); } + + operator string() const { return ToString(); } + private: const char* data_; stringpiece_ssize_type size_; diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc index 552d416f..65f22a96 100644 --- a/src/google/protobuf/stubs/strutil.cc +++ b/src/google/protobuf/stubs/strutil.cc @@ -31,14 +31,14 @@ // from google3/strings/strutil.cc #include <google/protobuf/stubs/strutil.h> -#include <google/protobuf/stubs/mathlimits.h> #include <errno.h> #include <float.h> // FLT_DIG and DBL_DIG -#include <limits> #include <limits.h> #include <stdio.h> +#include <cmath> #include <iterator> +#include <limits> #include <google/protobuf/stubs/stl_util.h> @@ -1268,7 +1268,7 @@ char* DoubleToBuffer(double value, char* buffer) { } else if (value == -std::numeric_limits<double>::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits<double>::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -1386,7 +1386,7 @@ char* FloatToBuffer(float value, char* buffer) { } else if (value == -std::numeric_limits<double>::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits<float>::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -2300,5 +2300,110 @@ int UTF8FirstLetterNumBytes(const char* src, int len) { return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)]; } +// ---------------------------------------------------------------------- +// CleanStringLineEndings() +// Clean up a multi-line string to conform to Unix line endings. +// Reads from src and appends to dst, so usually dst should be empty. +// +// If there is no line ending at the end of a non-empty string, it can +// be added automatically. +// +// Four different types of input are correctly handled: +// +// - Unix/Linux files: line ending is LF: pass through unchanged +// +// - DOS/Windows files: line ending is CRLF: convert to LF +// +// - Legacy Mac files: line ending is CR: convert to LF +// +// - Garbled files: random line endings: convert gracefully +// lonely CR, lonely LF, CRLF: convert to LF +// +// @param src The multi-line string to convert +// @param dst The converted string is appended to this string +// @param auto_end_last_line Automatically terminate the last line +// +// Limitations: +// +// This does not do the right thing for CRCRLF files created by +// broken programs that do another Unix->DOS conversion on files +// that are already in CRLF format. For this, a two-pass approach +// brute-force would be needed that +// +// (1) determines the presence of LF (first one is ok) +// (2) if yes, removes any CR, else convert every CR to LF + +void CleanStringLineEndings(const string &src, string *dst, + bool auto_end_last_line) { + if (dst->empty()) { + dst->append(src); + CleanStringLineEndings(dst, auto_end_last_line); + } else { + string tmp = src; + CleanStringLineEndings(&tmp, auto_end_last_line); + dst->append(tmp); + } +} + +void CleanStringLineEndings(string *str, bool auto_end_last_line) { + ptrdiff_t output_pos = 0; + bool r_seen = false; + ptrdiff_t len = str->size(); + + char *p = &(*str)[0]; + + for (ptrdiff_t input_pos = 0; input_pos < len;) { + if (!r_seen && input_pos + 8 < len) { + uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos); + // Loop over groups of 8 bytes at a time until we come across + // a word that has a byte whose value is less than or equal to + // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ). + // + // We use a has_less macro that quickly tests a whole 64-bit + // word to see if any of the bytes has a value < N. + // + // For more details, see: + // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord +#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128) + if (!has_less(v, '\r' + 1)) { +#undef has_less + // No byte in this word has a value that could be a \r or a \n + if (output_pos != input_pos) { + GOOGLE_UNALIGNED_STORE64(p + output_pos, v); + } + input_pos += 8; + output_pos += 8; + continue; + } + } + string::const_reference in = p[input_pos]; + if (in == '\r') { + if (r_seen) p[output_pos++] = '\n'; + r_seen = true; + } else if (in == '\n') { + if (input_pos != output_pos) + p[output_pos++] = '\n'; + else + output_pos++; + r_seen = false; + } else { + if (r_seen) p[output_pos++] = '\n'; + r_seen = false; + if (input_pos != output_pos) + p[output_pos++] = in; + else + output_pos++; + } + input_pos++; + } + if (r_seen || + (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) { + str->resize(output_pos + 1); + str->operator[](output_pos) = '\n'; + } else if (output_pos < len) { + str->resize(output_pos); + } +} + } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/stubs/strutil.h b/src/google/protobuf/stubs/strutil.h index a839b8b3..d16ffc34 100644 --- a/src/google/protobuf/stubs/strutil.h +++ b/src/google/protobuf/stubs/strutil.h @@ -872,6 +872,47 @@ LIBPROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); // ---------------------------------------------------------------------- LIBPROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); +// From google3/third_party/absl/strings/escaping.h + +// ---------------------------------------------------------------------- +// CleanStringLineEndings() +// Clean up a multi-line string to conform to Unix line endings. +// Reads from src and appends to dst, so usually dst should be empty. +// +// If there is no line ending at the end of a non-empty string, it can +// be added automatically. +// +// Four different types of input are correctly handled: +// +// - Unix/Linux files: line ending is LF: pass through unchanged +// +// - DOS/Windows files: line ending is CRLF: convert to LF +// +// - Legacy Mac files: line ending is CR: convert to LF +// +// - Garbled files: random line endings: convert gracefully +// lonely CR, lonely LF, CRLF: convert to LF +// +// @param src The multi-line string to convert +// @param dst The converted string is appended to this string +// @param auto_end_last_line Automatically terminate the last line +// +// Limitations: +// +// This does not do the right thing for CRCRLF files created by +// broken programs that do another Unix->DOS conversion on files +// that are already in CRLF format. For this, a two-pass approach +// brute-force would be needed that +// +// (1) determines the presence of LF (first one is ok) +// (2) if yes, removes any CR, else convert every CR to LF +LIBPROTOBUF_EXPORT void CleanStringLineEndings(const string& src, string* dst, + bool auto_end_last_line); + +// Same as above, but transforms the argument in place. +LIBPROTOBUF_EXPORT void CleanStringLineEndings(string* str, + bool auto_end_last_line); + } // namespace protobuf } // namespace google |