aboutsummaryrefslogtreecommitdiff
path: root/src/google/protobuf/stubs/strutil.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/stubs/strutil.cc')
-rw-r--r--src/google/protobuf/stubs/strutil.cc113
1 files changed, 109 insertions, 4 deletions
diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc
index 552d416f..65f22a96 100644
--- a/src/google/protobuf/stubs/strutil.cc
+++ b/src/google/protobuf/stubs/strutil.cc
@@ -31,14 +31,14 @@
// from google3/strings/strutil.cc
#include <google/protobuf/stubs/strutil.h>
-#include <google/protobuf/stubs/mathlimits.h>
#include <errno.h>
#include <float.h> // FLT_DIG and DBL_DIG
-#include <limits>
#include <limits.h>
#include <stdio.h>
+#include <cmath>
#include <iterator>
+#include <limits>
#include <google/protobuf/stubs/stl_util.h>
@@ -1268,7 +1268,7 @@ char* DoubleToBuffer(double value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<double>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -1386,7 +1386,7 @@ char* FloatToBuffer(float value, char* buffer) {
} else if (value == -std::numeric_limits<double>::infinity()) {
strcpy(buffer, "-inf");
return buffer;
- } else if (MathLimits<float>::IsNaN(value)) {
+ } else if (std::isnan(value)) {
strcpy(buffer, "nan");
return buffer;
}
@@ -2300,5 +2300,110 @@ int UTF8FirstLetterNumBytes(const char* src, int len) {
return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
}
+// ----------------------------------------------------------------------
+// CleanStringLineEndings()
+// Clean up a multi-line string to conform to Unix line endings.
+// Reads from src and appends to dst, so usually dst should be empty.
+//
+// If there is no line ending at the end of a non-empty string, it can
+// be added automatically.
+//
+// Four different types of input are correctly handled:
+//
+// - Unix/Linux files: line ending is LF: pass through unchanged
+//
+// - DOS/Windows files: line ending is CRLF: convert to LF
+//
+// - Legacy Mac files: line ending is CR: convert to LF
+//
+// - Garbled files: random line endings: convert gracefully
+// lonely CR, lonely LF, CRLF: convert to LF
+//
+// @param src The multi-line string to convert
+// @param dst The converted string is appended to this string
+// @param auto_end_last_line Automatically terminate the last line
+//
+// Limitations:
+//
+// This does not do the right thing for CRCRLF files created by
+// broken programs that do another Unix->DOS conversion on files
+// that are already in CRLF format. For this, a two-pass approach
+// brute-force would be needed that
+//
+// (1) determines the presence of LF (first one is ok)
+// (2) if yes, removes any CR, else convert every CR to LF
+
+void CleanStringLineEndings(const string &src, string *dst,
+ bool auto_end_last_line) {
+ if (dst->empty()) {
+ dst->append(src);
+ CleanStringLineEndings(dst, auto_end_last_line);
+ } else {
+ string tmp = src;
+ CleanStringLineEndings(&tmp, auto_end_last_line);
+ dst->append(tmp);
+ }
+}
+
+void CleanStringLineEndings(string *str, bool auto_end_last_line) {
+ ptrdiff_t output_pos = 0;
+ bool r_seen = false;
+ ptrdiff_t len = str->size();
+
+ char *p = &(*str)[0];
+
+ for (ptrdiff_t input_pos = 0; input_pos < len;) {
+ if (!r_seen && input_pos + 8 < len) {
+ uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos);
+ // Loop over groups of 8 bytes at a time until we come across
+ // a word that has a byte whose value is less than or equal to
+ // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ).
+ //
+ // We use a has_less macro that quickly tests a whole 64-bit
+ // word to see if any of the bytes has a value < N.
+ //
+ // For more details, see:
+ // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128)
+ if (!has_less(v, '\r' + 1)) {
+#undef has_less
+ // No byte in this word has a value that could be a \r or a \n
+ if (output_pos != input_pos) {
+ GOOGLE_UNALIGNED_STORE64(p + output_pos, v);
+ }
+ input_pos += 8;
+ output_pos += 8;
+ continue;
+ }
+ }
+ string::const_reference in = p[input_pos];
+ if (in == '\r') {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = true;
+ } else if (in == '\n') {
+ if (input_pos != output_pos)
+ p[output_pos++] = '\n';
+ else
+ output_pos++;
+ r_seen = false;
+ } else {
+ if (r_seen) p[output_pos++] = '\n';
+ r_seen = false;
+ if (input_pos != output_pos)
+ p[output_pos++] = in;
+ else
+ output_pos++;
+ }
+ input_pos++;
+ }
+ if (r_seen ||
+ (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) {
+ str->resize(output_pos + 1);
+ str->operator[](output_pos) = '\n';
+ } else if (output_pos < len) {
+ str->resize(output_pos);
+ }
+}
+
} // namespace protobuf
} // namespace google