aboutsummaryrefslogtreecommitdiff
path: root/src/google/protobuf/stubs/strutil.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/stubs/strutil.cc')
-rw-r--r--src/google/protobuf/stubs/strutil.cc145
1 files changed, 125 insertions, 20 deletions
diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc
index 552d416f..5bf9f2e6 100644
--- a/src/google/protobuf/stubs/strutil.cc
+++ b/src/google/protobuf/stubs/strutil.cc
@@ -31,14 +31,14 @@
// from google3/strings/strutil.cc
#include <google/protobuf/stubs/strutil.h>
-#include <google/protobuf/stubs/mathlimits.h>
#include <errno.h>
#include <float.h> // FLT_DIG and DBL_DIG
-#include <limits>
#include <limits.h>
#include <stdio.h>
+#include <cmath>
#include <iterator>
+#include <limits>
#include <google/protobuf/stubs/stl_util.h>
@@ -87,7 +87,7 @@ void StripString(string* s, const char* remove, char replacewith) {
const char * str_start = s->c_str();
const char * str = str_start;
for (str = strpbrk(str, remove);
- str != NULL;
+ str != nullptr;
str = strpbrk(str + 1, remove)) {
(*s)[str - str_start] = replacewith;
}
@@ -102,7 +102,7 @@ void ReplaceCharacters(string *s, const char *remove, char replacewith) {
const char *str_start = s->c_str();
const char *str = str_start;
for (str = strpbrk(str, remove);
- str != NULL;
+ str != nullptr;
str = strpbrk(str + 1, remove)) {
(*s)[str - str_start] = replacewith;
}
@@ -280,7 +280,7 @@ static void JoinStringsIterator(const ITERATOR& start,
const ITERATOR& end,
const char* delim,
string* result) {
- GOOGLE_CHECK(result != NULL);
+ GOOGLE_CHECK(result != nullptr);
result->clear();
int delim_length = strlen(delim);
@@ -318,7 +318,7 @@ void JoinStrings(const std::vector<string>& components,
// result is truncated to 8 bits.
//
// The second call stores its errors in a supplied string vector.
-// If the string vector pointer is NULL, it reports the errors with LOG().
+// If the string vector pointer is nullptr, it reports the errors with LOG().
// ----------------------------------------------------------------------
#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
@@ -328,12 +328,12 @@ void JoinStrings(const std::vector<string>& components,
#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
int UnescapeCEscapeSequences(const char* source, char* dest) {
- return UnescapeCEscapeSequences(source, dest, NULL);
+ return UnescapeCEscapeSequences(source, dest, nullptr);
}
int UnescapeCEscapeSequences(const char* source, char* dest,
std::vector<string> *errors) {
- GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
+ GOOGLE_DCHECK(errors == nullptr) << "Error reporting not implemented.";
char* d = dest;
const char* p = source;
@@ -458,13 +458,13 @@ int UnescapeCEscapeSequences(const char* source, char* dest,
// to be the same.
//
// The second call stores its errors in a supplied string vector.
-// If the string vector pointer is NULL, it reports the errors with LOG().
+// If the string vector pointer is nullptr, it reports the errors with LOG().
//
// In the first and second calls, the length of dest is returned. In the
// the third call, the new string is returned.
// ----------------------------------------------------------------------
int UnescapeCEscapeString(const string& src, string* dest) {
- return UnescapeCEscapeString(src, dest, NULL);
+ return UnescapeCEscapeString(src, dest, nullptr);
}
int UnescapeCEscapeString(const string& src, string* dest,
@@ -478,7 +478,7 @@ int UnescapeCEscapeString(const string& src, string* dest,
string UnescapeCEscapeString(const string& src) {
std::unique_ptr<char[]> unescaped(new char[src.size() + 1]);
- int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
+ int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr);
return string(unescaped.get(), len);
}
@@ -982,7 +982,7 @@ static const char two_ASCII_digits[100][2] = {
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
uint32 digits;
- const char *ASCII_digits = NULL;
+ const char *ASCII_digits = nullptr;
// The idea of this implementation is to trim the number of divides to as few
// as possible by using multiplication and subtraction rather than mod (%),
// and by outputting two digits at a time rather than one.
@@ -1073,7 +1073,7 @@ char* FastInt32ToBufferLeft(int32 i, char* buffer) {
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
int digits;
- const char *ASCII_digits = NULL;
+ const char *ASCII_digits = nullptr;
uint32 u = static_cast<uint32>(u64);
if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
@@ -1231,7 +1231,7 @@ static inline bool IsValidFloatChar(char c) {
void DelocalizeRadix(char* buffer) {
// Fast check: if the buffer has a normal decimal point, assume no
// translation is needed.
- if (strchr(buffer, '.') != NULL) return;
+ if (strchr(buffer, '.') != nullptr) return;
// Find the first unknown character.
while (IsValidFloatChar(*buffer)) ++buffer;
@@ -1268,7 +1268,7 @@ char* DoubleToBuffer(double value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<double>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -1286,7 +1286,7 @@ char* DoubleToBuffer(double value, char* buffer) {
// of a double. This long double may have extra bits that make it compare
// unequal to "value" even though it would be exactly equal if it were
// truncated to a double.
- volatile double parsed_value = strtod(buffer, NULL);
+ volatile double parsed_value = strtod(buffer, nullptr);
if (parsed_value != value) {
int snprintf_result =
snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
@@ -1318,7 +1318,7 @@ inline bool CaseEqual(StringPiece s1, StringPiece s2) {
}
bool safe_strtob(StringPiece str, bool* value) {
- GOOGLE_CHECK(value != NULL) << "NULL output boolean given.";
+ GOOGLE_CHECK(value != nullptr) << "nullptr output boolean given.";
if (CaseEqual(str, "true") || CaseEqual(str, "t") ||
CaseEqual(str, "yes") || CaseEqual(str, "y") ||
CaseEqual(str, "1")) {
@@ -1386,7 +1386,7 @@ char* FloatToBuffer(float value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<float>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -1619,7 +1619,7 @@ void StrAppend(string *result,
int GlobalReplaceSubstring(const string& substring,
const string& replacement,
string* s) {
- GOOGLE_CHECK(s != NULL);
+ GOOGLE_CHECK(s != nullptr);
if (s->empty() || substring.empty())
return 0;
string tmp;
@@ -1969,7 +1969,7 @@ int Base64UnescapeInternal(const char *src_param, int szsrc,
// for (i = 0; i < 255; i += 8) {
// for (j = i; j < i + 8; j++) {
// pos = strchr(Base64, j);
-// if ((pos == NULL) || (j == 0))
+// if ((pos == nullptr) || (j == 0))
// idx = -1;
// else
// idx = pos - Base64;
@@ -2300,5 +2300,110 @@ int UTF8FirstLetterNumBytes(const char* src, int len) {
return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
}
+// ----------------------------------------------------------------------
+// CleanStringLineEndings()
+// Clean up a multi-line string to conform to Unix line endings.
+// Reads from src and appends to dst, so usually dst should be empty.
+//
+// If there is no line ending at the end of a non-empty string, it can
+// be added automatically.
+//
+// Four different types of input are correctly handled:
+//
+// - Unix/Linux files: line ending is LF: pass through unchanged
+//
+// - DOS/Windows files: line ending is CRLF: convert to LF
+//
+// - Legacy Mac files: line ending is CR: convert to LF
+//
+// - Garbled files: random line endings: convert gracefully
+// lonely CR, lonely LF, CRLF: convert to LF
+//
+// @param src The multi-line string to convert
+// @param dst The converted string is appended to this string
+// @param auto_end_last_line Automatically terminate the last line
+//
+// Limitations:
+//
+// This does not do the right thing for CRCRLF files created by
+// broken programs that do another Unix->DOS conversion on files
+// that are already in CRLF format. For this, a two-pass approach
+// brute-force would be needed that
+//
+// (1) determines the presence of LF (first one is ok)
+// (2) if yes, removes any CR, else convert every CR to LF
+
+void CleanStringLineEndings(const string &src, string *dst,
+ bool auto_end_last_line) {
+ if (dst->empty()) {
+ dst->append(src);
+ CleanStringLineEndings(dst, auto_end_last_line);
+ } else {
+ string tmp = src;
+ CleanStringLineEndings(&tmp, auto_end_last_line);
+ dst->append(tmp);
+ }
+}
+
+void CleanStringLineEndings(string *str, bool auto_end_last_line) {
+ ptrdiff_t output_pos = 0;
+ bool r_seen = false;
+ ptrdiff_t len = str->size();
+
+ char *p = &(*str)[0];
+
+ for (ptrdiff_t input_pos = 0; input_pos < len;) {
+ if (!r_seen && input_pos + 8 < len) {
+ uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos);
+ // Loop over groups of 8 bytes at a time until we come across
+ // a word that has a byte whose value is less than or equal to
+ // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ).
+ //
+ // We use a has_less macro that quickly tests a whole 64-bit
+ // word to see if any of the bytes has a value < N.
+ //
+ // For more details, see:
+ // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128)
+ if (!has_less(v, '\r' + 1)) {
+#undef has_less
+ // No byte in this word has a value that could be a \r or a \n
+ if (output_pos != input_pos) {
+ GOOGLE_UNALIGNED_STORE64(p + output_pos, v);
+ }
+ input_pos += 8;
+ output_pos += 8;
+ continue;
+ }
+ }
+ string::const_reference in = p[input_pos];
+ if (in == '\r') {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = true;
+ } else if (in == '\n') {
+ if (input_pos != output_pos)
+ p[output_pos++] = '\n';
+ else
+ output_pos++;
+ r_seen = false;
+ } else {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = false;
+ if (input_pos != output_pos)
+ p[output_pos++] = in;
+ else
+ output_pos++;
+ }
+ input_pos++;
+ }
+ if (r_seen ||
+ (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) {
+ str->resize(output_pos + 1);
+ str->operator[](output_pos) = '\n';
+ } else if (output_pos < len) {
+ str->resize(output_pos);
+ }
+}
+
} // namespace protobuf
} // namespace google