aboutsummaryrefslogtreecommitdiff
path: root/java/src/main/java/com/google/protobuf/TextFormat.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/main/java/com/google/protobuf/TextFormat.java')
-rw-r--r--java/src/main/java/com/google/protobuf/TextFormat.java121
1 files changed, 102 insertions, 19 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index d5fbdabf..ed462899 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -55,15 +55,18 @@ import java.util.regex.Pattern;
public final class TextFormat {
private TextFormat() {}
- private static final Printer DEFAULT_PRINTER = new Printer(false);
- private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
+ private static final Printer DEFAULT_PRINTER = new Printer();
+ private static final Printer SINGLE_LINE_PRINTER =
+ (new Printer()).setSingleLineMode(true);
+ private static final Printer UNICODE_PRINTER =
+ (new Printer()).setEscapeNonAscii(false);
/**
* Outputs a textual representation of the Protocol Message supplied into
* the parameter output. (This representation is the new version of the
* classic "ProtocolPrinter" output from the original Protocol Buffer system)
*/
- public static void print(final Message message, final Appendable output)
+ public static void print(final MessageOrBuilder message, final Appendable output)
throws IOException {
DEFAULT_PRINTER.print(message, new TextGenerator(output));
}
@@ -79,7 +82,7 @@ public final class TextFormat {
* Generates a human readable form of this message, useful for debugging and
* other purposes, with no newline characters.
*/
- public static String shortDebugString(final Message message) {
+ public static String shortDebugString(final MessageOrBuilder message) {
try {
final StringBuilder sb = new StringBuilder();
SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
@@ -109,7 +112,7 @@ public final class TextFormat {
* Like {@code print()}, but writes directly to a {@code String} and
* returns it.
*/
- public static String printToString(final Message message) {
+ public static String printToString(final MessageOrBuilder message) {
try {
final StringBuilder text = new StringBuilder();
print(message, text);
@@ -133,6 +136,34 @@ public final class TextFormat {
}
}
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.print(message, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
public static void printField(final FieldDescriptor field,
final Object value,
final Appendable output)
@@ -216,13 +247,26 @@ public final class TextFormat {
/** Helper class for converting protobufs to text. */
private static final class Printer {
/** Whether to omit newlines from the output. */
- final boolean singleLineMode;
+ boolean singleLineMode = false;
+
+ /** Whether to escape non ASCII characters with backslash and octal. */
+ boolean escapeNonAscii = true;
+
+ private Printer() {}
- private Printer(final boolean singleLineMode) {
+ /** Setter of singleLineMode */
+ private Printer setSingleLineMode(boolean singleLineMode) {
this.singleLineMode = singleLineMode;
+ return this;
+ }
+
+ /** Setter of escapeNonAscii */
+ private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+ this.escapeNonAscii = escapeNonAscii;
+ return this;
}
- private void print(final Message message, final TextGenerator generator)
+ private void print(final MessageOrBuilder message, final TextGenerator generator)
throws IOException {
for (Map.Entry<FieldDescriptor, Object> field
: message.getAllFields().entrySet()) {
@@ -339,7 +383,9 @@ public final class TextFormat {
case STRING:
generator.print("\"");
- generator.print(escapeText((String) value));
+ generator.print(escapeNonAscii ?
+ escapeText((String) value) :
+ (String) value);
generator.print("\"");
break;
@@ -541,7 +587,7 @@ public final class TextFormat {
private int previousLine = 0;
private int previousColumn = 0;
- // We use possesive quantifiers (*+ and ++) because otherwise the Java
+ // We use possessive quantifiers (*+ and ++) because otherwise the Java
// regex matcher has stack overflows on large inputs.
private static final Pattern WHITESPACE =
Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -864,7 +910,7 @@ public final class TextFormat {
public ParseException parseException(final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (line + 1) + ":" + (column + 1) + ": " + description);
+ line + 1, column + 1, description);
}
/**
@@ -875,7 +921,7 @@ public final class TextFormat {
final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+ previousLine + 1, previousColumn + 1, description);
}
/**
@@ -900,8 +946,45 @@ public final class TextFormat {
public static class ParseException extends IOException {
private static final long serialVersionUID = 3196188060225107702L;
+ private final int line;
+ private final int column;
+
+ /** Create a new instance, with -1 as the line and column numbers. */
public ParseException(final String message) {
- super(message);
+ this(-1, -1, message);
+ }
+
+ /**
+ * Create a new instance
+ *
+ * @param line the line number where the parse error occurred,
+ * using 1-offset.
+ * @param column the column number where the parser error occurred,
+ * using 1-offset.
+ */
+ public ParseException(final int line, final int column,
+ final String message) {
+ super(Integer.toString(line) + ":" + column + ": " + message);
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Return the line where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Return the column where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getColumn() {
+ return column;
}
}
@@ -1073,7 +1156,7 @@ public final class TextFormat {
mergeField(tokenizer, extensionRegistry, subBuilder);
}
- value = subBuilder.build();
+ value = subBuilder.buildPartial();
} else {
tokenizer.consume(":");
@@ -1212,7 +1295,7 @@ public final class TextFormat {
*/
static ByteString unescapeBytes(final CharSequence charString)
throws InvalidEscapeSequenceException {
- // First convert the Java characater sequence to UTF-8 bytes.
+ // First convert the Java character sequence to UTF-8 bytes.
ByteString input = ByteString.copyFromUtf8(charString.toString());
// Then unescape certain byte sequences introduced by ASCII '\\'. The valid
// escapes can all be expressed with ASCII characters, so it is safe to
@@ -1349,7 +1432,7 @@ public final class TextFormat {
/**
* Parse a 32-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static int parseInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, true, false);
@@ -1358,7 +1441,7 @@ public final class TextFormat {
/**
* Parse a 32-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code int} when returned since Java has
* no unsigned integer type.
*/
@@ -1369,7 +1452,7 @@ public final class TextFormat {
/**
* Parse a 64-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static long parseInt64(final String text) throws NumberFormatException {
return parseInteger(text, true, true);
@@ -1378,7 +1461,7 @@ public final class TextFormat {
/**
* Parse a 64-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code long} when returned since Java has
* no unsigned long type.
*/