aboutsummaryrefslogtreecommitdiff
path: root/src/google/protobuf/stubs
diff options
context:
space:
mode:
authorFeng Xiao <xfxyjwf@gmail.com>2018-08-08 17:00:41 -0700
committerFeng Xiao <xfxyjwf@gmail.com>2018-08-08 17:00:41 -0700
commit6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3 (patch)
treee575738adf52d24b883cca5e8928a5ded31caba1 /src/google/protobuf/stubs
parente7746f487cb9cca685ffb1b3d7dccc5554b618a4 (diff)
downloadprotobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.tar.gz
protobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.tar.bz2
protobuf-6bbe197e9c1b6fc38cbdc45e3bf83fa7ced792a3.zip
Down-integrate from google3.
Diffstat (limited to 'src/google/protobuf/stubs')
-rw-r--r--src/google/protobuf/stubs/common.cc1
-rw-r--r--src/google/protobuf/stubs/hash.h324
-rw-r--r--src/google/protobuf/stubs/io_win32_unittest.cc12
-rw-r--r--src/google/protobuf/stubs/port.h4
-rw-r--r--src/google/protobuf/stubs/stringpiece.h3
-rw-r--r--src/google/protobuf/stubs/strutil.cc113
-rw-r--r--src/google/protobuf/stubs/strutil.h41
7 files changed, 166 insertions, 332 deletions
diff --git a/src/google/protobuf/stubs/common.cc b/src/google/protobuf/stubs/common.cc
index 6544c6ed..b54fd1c8 100644
--- a/src/google/protobuf/stubs/common.cc
+++ b/src/google/protobuf/stubs/common.cc
@@ -30,7 +30,6 @@
// Author: kenton@google.com (Kenton Varda)
-#include <google/protobuf/message_lite.h> // TODO(gerbens) ideally remove this.
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/stubs/once.h>
#include <google/protobuf/stubs/status.h>
diff --git a/src/google/protobuf/stubs/hash.h b/src/google/protobuf/stubs/hash.h
index fd8ba156..a093b406 100644
--- a/src/google/protobuf/stubs/hash.h
+++ b/src/google/protobuf/stubs/hash.h
@@ -29,8 +29,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Author: kenton@google.com (Kenton Varda)
-//
-// Deals with the fact that hash_map is not defined everywhere.
#ifndef GOOGLE_PROTOBUF_STUBS_HASH_H__
#define GOOGLE_PROTOBUF_STUBS_HASH_H__
@@ -38,304 +36,19 @@
#include <string.h>
#include <google/protobuf/stubs/common.h>
-#define GOOGLE_PROTOBUF_HAVE_HASH_MAP 1
-#define GOOGLE_PROTOBUF_HAVE_HASH_SET 1
-
-// Use C++11 unordered_{map|set} if available.
-#if ((defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11) || \
- (((__cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X)) && \
- (__GLIBCXX__ > 20090421)))
-# define GOOGLE_PROTOBUF_HAS_CXX11_HASH
-
-// For XCode >= 4.6: the compiler is clang with libc++.
-// For earlier XCode version: the compiler is gcc-4.2.1 with libstdc++.
-// libc++ provides <unordered_map> and friends even in non C++11 mode,
-// and it does not provide the tr1 library. Therefore the following macro
-// checks against this special case.
-// Note that we should not test the __APPLE_CC__ version number or the
-// __clang__ macro, since the new compiler can still use -stdlib=libstdc++, in
-// which case <unordered_map> is not compilable without -std=c++11
-#elif defined(__APPLE_CC__)
-# if __GNUC__ >= 4
-# define GOOGLE_PROTOBUF_HAS_TR1
-# else
-// Not tested for gcc < 4... These setting can compile under 4.2.1 though.
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx
-# include <ext/hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <ext/hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# endif
-
-// Version checks for gcc.
-#elif defined(__GNUC__)
-// For GCC 4.x+, use tr1::unordered_map/set; otherwise, follow the
-// instructions from:
-// https://gcc.gnu.org/onlinedocs/libstdc++/manual/backwards.html
-# if __GNUC__ >= 4
-# define GOOGLE_PROTOBUF_HAS_TR1
-# elif __GNUC__ >= 3
-# include <backward/hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <backward/hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# if __GNUC__ == 3 && __GNUC_MINOR__ == 0
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE std // GCC 3.0
-# else
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx // GCC 3.1 and later
-# endif
-# else
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE
-# include <hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# endif
-
-// GCC <= 4.1 does not define std::tr1::hash for `long long int` or `long long unsigned int`
-# if __GNUC__ == 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ <= 1
-# undef GOOGLE_PROTOBUF_HAS_TR1
-# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP
-# undef GOOGLE_PROTOBUF_HAVE_HASH_SET
-# endif
-
-// Version checks for MSC.
-// Apparently Microsoft decided to move hash_map *back* to the std namespace in
-// MSVC 2010:
-// http://blogs.msdn.com/vcblog/archive/2009/05/25/stl-breaking-changes-in-visual-studio-2010-beta-1.aspx
-// And.. they are moved back to stdext in MSVC 2013 (haven't checked 2012). That
-// said, use unordered_map for MSVC 2010 and beyond is our safest bet.
-#elif defined(_MSC_VER)
-# if _MSC_VER >= 1600 // Since Visual Studio 2010
-# define GOOGLE_PROTOBUF_HAS_CXX11_HASH
-# define GOOGLE_PROTOBUF_HASH_COMPARE std::hash_compare
-# elif _MSC_VER >= 1500 // Since Visual Studio 2008
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext
-# include <hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare
-# define GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE
-# elif _MSC_VER >= 1310
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext
-# include <hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare
-# else
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE std
-# include <hash_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map
-# include <hash_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set
-# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare
-# endif
-
-// **ADD NEW COMPILERS SUPPORT HERE.**
-// For other compilers, undefine the macro and fallback to use std::map, in
-// google/protobuf/stubs/hash.h
-#else
-# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP
-# undef GOOGLE_PROTOBUF_HAVE_HASH_SET
-#endif
-
-#if defined(GOOGLE_PROTOBUF_HAS_CXX11_HASH)
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE std
-# include <unordered_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map
-# include <unordered_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set
-#elif defined(GOOGLE_PROTOBUF_HAS_TR1)
-# define GOOGLE_PROTOBUF_HASH_NAMESPACE std::tr1
-# include <tr1/unordered_map>
-# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map
-# include <tr1/unordered_set>
-# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set
-#endif
+#include <unordered_map>
+#include <unordered_set>
# define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START \
namespace google { \
namespace protobuf {
# define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END }}
-#undef GOOGLE_PROTOBUF_HAS_CXX11_HASH
-#undef GOOGLE_PROTOBUF_HAS_TR1
-
-#if defined(GOOGLE_PROTOBUF_HAVE_HASH_MAP) && \
- defined(GOOGLE_PROTOBUF_HAVE_HASH_SET)
-#else
-#define GOOGLE_PROTOBUF_MISSING_HASH
-#include <map>
-#include <set>
-#endif
-
namespace google {
namespace protobuf {
-#ifdef GOOGLE_PROTOBUF_MISSING_HASH
-#undef GOOGLE_PROTOBUF_MISSING_HASH
-
-// This system doesn't have hash_map or hash_set. Emulate them using map and
-// set.
-
-// Make hash<T> be the same as less<T>. Note that everywhere where custom
-// hash functions are defined in the protobuf code, they are also defined such
-// that they can be used as "less" functions, which is required by MSVC anyway.
template <typename Key>
-struct hash {
- // Dummy, just to make derivative hash functions compile.
- int operator()(const Key& key) {
- GOOGLE_LOG(FATAL) << "Should never be called.";
- return 0;
- }
-
- inline bool operator()(const Key& a, const Key& b) const {
- return a < b;
- }
-};
-
-// Make sure char* is compared by value.
-template <>
-struct hash<const char*> {
- // Dummy, just to make derivative hash functions compile.
- int operator()(const char* key) {
- GOOGLE_LOG(FATAL) << "Should never be called.";
- return 0;
- }
-
- inline bool operator()(const char* a, const char* b) const {
- return strcmp(a, b) < 0;
- }
-};
-
-template <typename Key, typename Data,
- typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key>,
- typename Alloc = std::allocator< std::pair<const Key, Data> > >
-class hash_map : public std::map<Key, Data, HashFcn, Alloc> {
- typedef std::map<Key, Data, HashFcn, Alloc> BaseClass;
-
- public:
- hash_map(int a = 0, const HashFcn& b = HashFcn(),
- const EqualKey& c = EqualKey(),
- const Alloc& d = Alloc()) : BaseClass(b, d) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-template <typename Key,
- typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key> >
-class hash_set : public std::set<Key, HashFcn> {
- public:
- hash_set(int = 0) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-#elif defined(_MSC_VER) && !defined(_STLPORT_VERSION) && \
- !(defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11)
-
-template <typename Key>
-struct hash : public GOOGLE_PROTOBUF_HASH_COMPARE<Key> {
-};
-
-// MSVC's hash_compare<const char*> hashes based on the string contents but
-// compares based on the string pointer. WTF?
-class CstringLess {
- public:
- inline bool operator()(const char* a, const char* b) const {
- return strcmp(a, b) < 0;
- }
-};
-
-template <>
-struct hash<const char*>
- : public GOOGLE_PROTOBUF_HASH_COMPARE<const char*, CstringLess> {};
-
-#ifdef GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE
-
-template <typename Key, typename HashFcn, typename EqualKey>
-struct InternalHashCompare : public GOOGLE_PROTOBUF_HASH_COMPARE<Key> {
- InternalHashCompare() {}
- InternalHashCompare(HashFcn hashfcn, EqualKey equalkey)
- : hashfcn_(hashfcn), equalkey_(equalkey) {}
- size_t operator()(const Key& key) const { return hashfcn_(key); }
- bool operator()(const Key& key1, const Key& key2) const {
- return !equalkey_(key1, key2);
- }
- HashFcn hashfcn_;
- EqualKey equalkey_;
-};
-
-template <typename Key, typename Data,
- typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key>,
- typename Alloc = std::allocator< std::pair<const Key, Data> > >
-class hash_map
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, InternalHashCompare<Key, HashFcn, EqualKey>, Alloc> {
- typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, InternalHashCompare<Key, HashFcn, EqualKey>, Alloc> BaseClass;
-
- public:
- hash_map(int a = 0, const HashFcn& b = HashFcn(),
- const EqualKey& c = EqualKey(), const Alloc& d = Alloc())
- : BaseClass(InternalHashCompare<Key, HashFcn, EqualKey>(b, c), d) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-template <typename Key, typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key> >
-class hash_set
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS<
- Key, InternalHashCompare<Key, HashFcn, EqualKey> > {
- public:
- hash_set(int = 0) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-#else // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE
-
-template <typename Key, typename Data,
- typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key>,
- typename Alloc = std::allocator< std::pair<const Key, Data> > >
-class hash_map
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, HashFcn, EqualKey, Alloc> {
- typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, HashFcn, EqualKey, Alloc> BaseClass;
-
- public:
- hash_map(int a = 0, const HashFcn& b = HashFcn(),
- const EqualKey& c = EqualKey(),
- const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-template <typename Key, typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key> >
-class hash_set
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS<
- Key, HashFcn, EqualKey> {
- public:
- hash_set(int = 0) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-#endif // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE
-
-#else // defined(_MSC_VER) && !defined(_STLPORT_VERSION)
-
-template <typename Key>
-struct hash : public GOOGLE_PROTOBUF_HASH_NAMESPACE::hash<Key> {
-};
+struct hash : public std::hash<Key> {};
template <typename Key>
struct hash<const Key*> {
@@ -364,37 +77,6 @@ struct hash<bool> {
}
};
-template <typename Key, typename Data,
- typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key>,
- typename Alloc = std::allocator< std::pair<const Key, Data> > >
-class hash_map
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, HashFcn, EqualKey, Alloc> {
- typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS<
- Key, Data, HashFcn, EqualKey, Alloc> BaseClass;
-
- public:
- hash_map(int a = 0, const HashFcn& b = HashFcn(),
- const EqualKey& c = EqualKey(),
- const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-template <typename Key, typename HashFcn = hash<Key>,
- typename EqualKey = std::equal_to<Key> >
-class hash_set
- : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS<
- Key, HashFcn, EqualKey> {
- public:
- hash_set(int = 0) {}
-
- HashFcn hash_function() const { return HashFcn(); }
-};
-
-#endif // !GOOGLE_PROTOBUF_MISSING_HASH
-
template <>
struct hash<string> {
inline size_t operator()(const string& key) const {
diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc
index c933757c..9085ed91 100644
--- a/src/google/protobuf/stubs/io_win32_unittest.cc
+++ b/src/google/protobuf/stubs/io_win32_unittest.cc
@@ -63,17 +63,17 @@ namespace {
const char kUtf8Text[] = {
'h', 'i', ' ',
// utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F
- 0xd0, 0x9f,
+ static_cast<char>(0xd0), static_cast<char>(0x9f),
// utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440
- 0xd1, 0x80,
+ static_cast<char>(0xd1), static_cast<char>(0x80),
// utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438
- 0xd0, 0xb8,
+ static_cast<char>(0xd0), static_cast<char>(0xb8),
// utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432
- 0xd0, 0xb2,
+ static_cast<char>(0xd0), static_cast<char>(0xb2),
// utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435
- 0xd0, 0xb5,
+ static_cast<char>(0xd0), static_cast<char>(0xb5),
// utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442
- 0xd1, 0x82, 0
+ static_cast<char>(0xd1), static_cast<char>(0x82), 0
};
const wchar_t kUtf16Text[] = {
diff --git a/src/google/protobuf/stubs/port.h b/src/google/protobuf/stubs/port.h
index 3aa6403b..4d3d0008 100644
--- a/src/google/protobuf/stubs/port.h
+++ b/src/google/protobuf/stubs/port.h
@@ -99,6 +99,7 @@
#endif
#define PROTOBUF_RUNTIME_DEPRECATED(message)
+#define GOOGLE_PROTOBUF_RUNTIME_DEPRECATED(message)
// ===================================================================
// from google3/base/port.h
@@ -175,6 +176,9 @@ static const int64 kint64min = -kint64max - 1;
static const uint32 kuint32max = 0xFFFFFFFFu;
static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF);
+#define GOOGLE_PROTOBUF_NAMESPACE "google::protobuf"
+#define GOOGLE_PROTOBUF_NAMESPACE_ID google::protobuf
+
// -------------------------------------------------------------------
// Annotations: Some parts of the code have been annotated in ways that might
// be useful to some compilers or tools, but are not supported universally.
diff --git a/src/google/protobuf/stubs/stringpiece.h b/src/google/protobuf/stubs/stringpiece.h
index 563ff75d..c7bdd810 100644
--- a/src/google/protobuf/stubs/stringpiece.h
+++ b/src/google/protobuf/stubs/stringpiece.h
@@ -463,6 +463,9 @@ struct StringPiecePod {
std::string ToString() const {
return std::string(data_, static_cast<size_t>(size_));
}
+
+ operator string() const { return ToString(); }
+
private:
const char* data_;
stringpiece_ssize_type size_;
diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc
index 552d416f..65f22a96 100644
--- a/src/google/protobuf/stubs/strutil.cc
+++ b/src/google/protobuf/stubs/strutil.cc
@@ -31,14 +31,14 @@
// from google3/strings/strutil.cc
#include <google/protobuf/stubs/strutil.h>
-#include <google/protobuf/stubs/mathlimits.h>
#include <errno.h>
#include <float.h> // FLT_DIG and DBL_DIG
-#include <limits>
#include <limits.h>
#include <stdio.h>
+#include <cmath>
#include <iterator>
+#include <limits>
#include <google/protobuf/stubs/stl_util.h>
@@ -1268,7 +1268,7 @@ char* DoubleToBuffer(double value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<double>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -1386,7 +1386,7 @@ char* FloatToBuffer(float value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<float>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -2300,5 +2300,110 @@ int UTF8FirstLetterNumBytes(const char* src, int len) {
return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
}
+// ----------------------------------------------------------------------
+// CleanStringLineEndings()
+// Clean up a multi-line string to conform to Unix line endings.
+// Reads from src and appends to dst, so usually dst should be empty.
+//
+// If there is no line ending at the end of a non-empty string, it can
+// be added automatically.
+//
+// Four different types of input are correctly handled:
+//
+// - Unix/Linux files: line ending is LF: pass through unchanged
+//
+// - DOS/Windows files: line ending is CRLF: convert to LF
+//
+// - Legacy Mac files: line ending is CR: convert to LF
+//
+// - Garbled files: random line endings: convert gracefully
+// lonely CR, lonely LF, CRLF: convert to LF
+//
+// @param src The multi-line string to convert
+// @param dst The converted string is appended to this string
+// @param auto_end_last_line Automatically terminate the last line
+//
+// Limitations:
+//
+// This does not do the right thing for CRCRLF files created by
+// broken programs that do another Unix->DOS conversion on files
+// that are already in CRLF format. For this, a two-pass approach
+// brute-force would be needed that
+//
+// (1) determines the presence of LF (first one is ok)
+// (2) if yes, removes any CR, else convert every CR to LF
+
+void CleanStringLineEndings(const string &src, string *dst,
+ bool auto_end_last_line) {
+ if (dst->empty()) {
+ dst->append(src);
+ CleanStringLineEndings(dst, auto_end_last_line);
+ } else {
+ string tmp = src;
+ CleanStringLineEndings(&tmp, auto_end_last_line);
+ dst->append(tmp);
+ }
+}
+
+void CleanStringLineEndings(string *str, bool auto_end_last_line) {
+ ptrdiff_t output_pos = 0;
+ bool r_seen = false;
+ ptrdiff_t len = str->size();
+
+ char *p = &(*str)[0];
+
+ for (ptrdiff_t input_pos = 0; input_pos < len;) {
+ if (!r_seen && input_pos + 8 < len) {
+ uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos);
+ // Loop over groups of 8 bytes at a time until we come across
+ // a word that has a byte whose value is less than or equal to
+ // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ).
+ //
+ // We use a has_less macro that quickly tests a whole 64-bit
+ // word to see if any of the bytes has a value < N.
+ //
+ // For more details, see:
+ // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128)
+ if (!has_less(v, '\r' + 1)) {
+#undef has_less
+ // No byte in this word has a value that could be a \r or a \n
+ if (output_pos != input_pos) {
+ GOOGLE_UNALIGNED_STORE64(p + output_pos, v);
+ }
+ input_pos += 8;
+ output_pos += 8;
+ continue;
+ }
+ }
+ string::const_reference in = p[input_pos];
+ if (in == '\r') {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = true;
+ } else if (in == '\n') {
+ if (input_pos != output_pos)
+ p[output_pos++] = '\n';
+ else
+ output_pos++;
+ r_seen = false;
+ } else {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = false;
+ if (input_pos != output_pos)
+ p[output_pos++] = in;
+ else
+ output_pos++;
+ }
+ input_pos++;
+ }
+ if (r_seen ||
+ (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) {
+ str->resize(output_pos + 1);
+ str->operator[](output_pos) = '\n';
+ } else if (output_pos < len) {
+ str->resize(output_pos);
+ }
+}
+
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/stubs/strutil.h b/src/google/protobuf/stubs/strutil.h
index a839b8b3..d16ffc34 100644
--- a/src/google/protobuf/stubs/strutil.h
+++ b/src/google/protobuf/stubs/strutil.h
@@ -872,6 +872,47 @@ LIBPROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output);
// ----------------------------------------------------------------------
LIBPROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len);
+// From google3/third_party/absl/strings/escaping.h
+
+// ----------------------------------------------------------------------
+// CleanStringLineEndings()
+// Clean up a multi-line string to conform to Unix line endings.
+// Reads from src and appends to dst, so usually dst should be empty.
+//
+// If there is no line ending at the end of a non-empty string, it can
+// be added automatically.
+//
+// Four different types of input are correctly handled:
+//
+// - Unix/Linux files: line ending is LF: pass through unchanged
+//
+// - DOS/Windows files: line ending is CRLF: convert to LF
+//
+// - Legacy Mac files: line ending is CR: convert to LF
+//
+// - Garbled files: random line endings: convert gracefully
+// lonely CR, lonely LF, CRLF: convert to LF
+//
+// @param src The multi-line string to convert
+// @param dst The converted string is appended to this string
+// @param auto_end_last_line Automatically terminate the last line
+//
+// Limitations:
+//
+// This does not do the right thing for CRCRLF files created by
+// broken programs that do another Unix->DOS conversion on files
+// that are already in CRLF format. For this, a two-pass approach
+// brute-force would be needed that
+//
+// (1) determines the presence of LF (first one is ok)
+// (2) if yes, removes any CR, else convert every CR to LF
+LIBPROTOBUF_EXPORT void CleanStringLineEndings(const string& src, string* dst,
+ bool auto_end_last_line);
+
+// Same as above, but transforms the argument in place.
+LIBPROTOBUF_EXPORT void CleanStringLineEndings(string* str,
+ bool auto_end_last_line);
+
} // namespace protobuf
} // namespace google