diff options
Diffstat (limited to 'src/google/protobuf/stubs/strutil.cc')
-rw-r--r-- | src/google/protobuf/stubs/strutil.cc | 145 |
1 files changed, 125 insertions, 20 deletions
diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc index 552d416f..5bf9f2e6 100644 --- a/src/google/protobuf/stubs/strutil.cc +++ b/src/google/protobuf/stubs/strutil.cc @@ -31,14 +31,14 @@ // from google3/strings/strutil.cc #include <google/protobuf/stubs/strutil.h> -#include <google/protobuf/stubs/mathlimits.h> #include <errno.h> #include <float.h> // FLT_DIG and DBL_DIG -#include <limits> #include <limits.h> #include <stdio.h> +#include <cmath> #include <iterator> +#include <limits> #include <google/protobuf/stubs/stl_util.h> @@ -87,7 +87,7 @@ void StripString(string* s, const char* remove, char replacewith) { const char * str_start = s->c_str(); const char * str = str_start; for (str = strpbrk(str, remove); - str != NULL; + str != nullptr; str = strpbrk(str + 1, remove)) { (*s)[str - str_start] = replacewith; } @@ -102,7 +102,7 @@ void ReplaceCharacters(string *s, const char *remove, char replacewith) { const char *str_start = s->c_str(); const char *str = str_start; for (str = strpbrk(str, remove); - str != NULL; + str != nullptr; str = strpbrk(str + 1, remove)) { (*s)[str - str_start] = replacewith; } @@ -280,7 +280,7 @@ static void JoinStringsIterator(const ITERATOR& start, const ITERATOR& end, const char* delim, string* result) { - GOOGLE_CHECK(result != NULL); + GOOGLE_CHECK(result != nullptr); result->clear(); int delim_length = strlen(delim); @@ -318,7 +318,7 @@ void JoinStrings(const std::vector<string>& components, // result is truncated to 8 bits. // // The second call stores its errors in a supplied string vector. -// If the string vector pointer is NULL, it reports the errors with LOG(). +// If the string vector pointer is nullptr, it reports the errors with LOG(). // ---------------------------------------------------------------------- #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) @@ -328,12 +328,12 @@ void JoinStrings(const std::vector<string>& components, #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) int UnescapeCEscapeSequences(const char* source, char* dest) { - return UnescapeCEscapeSequences(source, dest, NULL); + return UnescapeCEscapeSequences(source, dest, nullptr); } int UnescapeCEscapeSequences(const char* source, char* dest, std::vector<string> *errors) { - GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; + GOOGLE_DCHECK(errors == nullptr) << "Error reporting not implemented."; char* d = dest; const char* p = source; @@ -458,13 +458,13 @@ int UnescapeCEscapeSequences(const char* source, char* dest, // to be the same. // // The second call stores its errors in a supplied string vector. -// If the string vector pointer is NULL, it reports the errors with LOG(). +// If the string vector pointer is nullptr, it reports the errors with LOG(). // // In the first and second calls, the length of dest is returned. In the // the third call, the new string is returned. // ---------------------------------------------------------------------- int UnescapeCEscapeString(const string& src, string* dest) { - return UnescapeCEscapeString(src, dest, NULL); + return UnescapeCEscapeString(src, dest, nullptr); } int UnescapeCEscapeString(const string& src, string* dest, @@ -478,7 +478,7 @@ int UnescapeCEscapeString(const string& src, string* dest, string UnescapeCEscapeString(const string& src) { std::unique_ptr<char[]> unescaped(new char[src.size() + 1]); - int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); + int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr); return string(unescaped.get(), len); } @@ -982,7 +982,7 @@ static const char two_ASCII_digits[100][2] = { char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { uint32 digits; - const char *ASCII_digits = NULL; + const char *ASCII_digits = nullptr; // The idea of this implementation is to trim the number of divides to as few // as possible by using multiplication and subtraction rather than mod (%), // and by outputting two digits at a time rather than one. @@ -1073,7 +1073,7 @@ char* FastInt32ToBufferLeft(int32 i, char* buffer) { char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { int digits; - const char *ASCII_digits = NULL; + const char *ASCII_digits = nullptr; uint32 u = static_cast<uint32>(u64); if (u == u64) return FastUInt32ToBufferLeft(u, buffer); @@ -1231,7 +1231,7 @@ static inline bool IsValidFloatChar(char c) { void DelocalizeRadix(char* buffer) { // Fast check: if the buffer has a normal decimal point, assume no // translation is needed. - if (strchr(buffer, '.') != NULL) return; + if (strchr(buffer, '.') != nullptr) return; // Find the first unknown character. while (IsValidFloatChar(*buffer)) ++buffer; @@ -1268,7 +1268,7 @@ char* DoubleToBuffer(double value, char* buffer) { } else if (value == -std::numeric_limits<double>::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits<double>::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -1286,7 +1286,7 @@ char* DoubleToBuffer(double value, char* buffer) { // of a double. This long double may have extra bits that make it compare // unequal to "value" even though it would be exactly equal if it were // truncated to a double. - volatile double parsed_value = strtod(buffer, NULL); + volatile double parsed_value = strtod(buffer, nullptr); if (parsed_value != value) { int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); @@ -1318,7 +1318,7 @@ inline bool CaseEqual(StringPiece s1, StringPiece s2) { } bool safe_strtob(StringPiece str, bool* value) { - GOOGLE_CHECK(value != NULL) << "NULL output boolean given."; + GOOGLE_CHECK(value != nullptr) << "nullptr output boolean given."; if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") || CaseEqual(str, "y") || CaseEqual(str, "1")) { @@ -1386,7 +1386,7 @@ char* FloatToBuffer(float value, char* buffer) { } else if (value == -std::numeric_limits<double>::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits<float>::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -1619,7 +1619,7 @@ void StrAppend(string *result, int GlobalReplaceSubstring(const string& substring, const string& replacement, string* s) { - GOOGLE_CHECK(s != NULL); + GOOGLE_CHECK(s != nullptr); if (s->empty() || substring.empty()) return 0; string tmp; @@ -1969,7 +1969,7 @@ int Base64UnescapeInternal(const char *src_param, int szsrc, // for (i = 0; i < 255; i += 8) { // for (j = i; j < i + 8; j++) { // pos = strchr(Base64, j); -// if ((pos == NULL) || (j == 0)) +// if ((pos == nullptr) || (j == 0)) // idx = -1; // else // idx = pos - Base64; @@ -2300,5 +2300,110 @@ int UTF8FirstLetterNumBytes(const char* src, int len) { return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)]; } +// ---------------------------------------------------------------------- +// CleanStringLineEndings() +// Clean up a multi-line string to conform to Unix line endings. +// Reads from src and appends to dst, so usually dst should be empty. +// +// If there is no line ending at the end of a non-empty string, it can +// be added automatically. +// +// Four different types of input are correctly handled: +// +// - Unix/Linux files: line ending is LF: pass through unchanged +// +// - DOS/Windows files: line ending is CRLF: convert to LF +// +// - Legacy Mac files: line ending is CR: convert to LF +// +// - Garbled files: random line endings: convert gracefully +// lonely CR, lonely LF, CRLF: convert to LF +// +// @param src The multi-line string to convert +// @param dst The converted string is appended to this string +// @param auto_end_last_line Automatically terminate the last line +// +// Limitations: +// +// This does not do the right thing for CRCRLF files created by +// broken programs that do another Unix->DOS conversion on files +// that are already in CRLF format. For this, a two-pass approach +// brute-force would be needed that +// +// (1) determines the presence of LF (first one is ok) +// (2) if yes, removes any CR, else convert every CR to LF + +void CleanStringLineEndings(const string &src, string *dst, + bool auto_end_last_line) { + if (dst->empty()) { + dst->append(src); + CleanStringLineEndings(dst, auto_end_last_line); + } else { + string tmp = src; + CleanStringLineEndings(&tmp, auto_end_last_line); + dst->append(tmp); + } +} + +void CleanStringLineEndings(string *str, bool auto_end_last_line) { + ptrdiff_t output_pos = 0; + bool r_seen = false; + ptrdiff_t len = str->size(); + + char *p = &(*str)[0]; + + for (ptrdiff_t input_pos = 0; input_pos < len;) { + if (!r_seen && input_pos + 8 < len) { + uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos); + // Loop over groups of 8 bytes at a time until we come across + // a word that has a byte whose value is less than or equal to + // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ). + // + // We use a has_less macro that quickly tests a whole 64-bit + // word to see if any of the bytes has a value < N. + // + // For more details, see: + // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord +#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128) + if (!has_less(v, '\r' + 1)) { +#undef has_less + // No byte in this word has a value that could be a \r or a \n + if (output_pos != input_pos) { + GOOGLE_UNALIGNED_STORE64(p + output_pos, v); + } + input_pos += 8; + output_pos += 8; + continue; + } + } + string::const_reference in = p[input_pos]; + if (in == '\r') { + if (r_seen) p[output_pos++] = '\n'; + r_seen = true; + } else if (in == '\n') { + if (input_pos != output_pos) + p[output_pos++] = '\n'; + else + output_pos++; + r_seen = false; + } else { + if (r_seen) p[output_pos++] = '\n'; + r_seen = false; + if (input_pos != output_pos) + p[output_pos++] = in; + else + output_pos++; + } + input_pos++; + } + if (r_seen || + (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) { + str->resize(output_pos + 1); + str->operator[](output_pos) = '\n'; + } else if (output_pos < len) { + str->resize(output_pos); + } +} + } // namespace protobuf } // namespace google |