aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorCazen <Cazen@korea.com>2016-01-03 17:01:19 -0800
committerReynold Xin <rxin@databricks.com>2016-01-03 17:01:19 -0800
commitb8410ff9ce8cef7159a7364272e4c4234c5b474f (patch)
tree2da8cdd8496644f9d71937cddca54844c6db1ee2 /sql/core
parent7b92922f7f7ba4ff398dcbd734e8305ba03da87b (diff)
downloadspark-b8410ff9ce8cef7159a7364272e4c4234c5b474f.tar.gz
spark-b8410ff9ce8cef7159a7364272e4c4234c5b474f.tar.bz2
spark-b8410ff9ce8cef7159a7364272e4c4234c5b474f.zip
[SPARK-12537][SQL] Add option to accept quoting of all character backslash quoting mechanism
We can provides the option to choose JSON parser can be enabled to accept quoting of all character or not. Author: Cazen <Cazen@korea.com> Author: Cazen Lee <cazen.lee@samsung.com> Author: Cazen Lee <Cazen@korea.com> Author: cazen.lee <cazen.lee@samsung.com> Closes #10497 from Cazen/master.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala9
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala19
3 files changed, 28 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 0acea95344..6debb302d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -258,6 +258,8 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
* </li>
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
* (e.g. 00012)</li>
+ * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
+ * character using backslash quoting mechanism</li>
*
* @since 1.6.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
index c132ead20e..f805c00925 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
@@ -31,7 +31,8 @@ case class JSONOptions(
allowUnquotedFieldNames: Boolean = false,
allowSingleQuotes: Boolean = true,
allowNumericLeadingZeros: Boolean = false,
- allowNonNumericNumbers: Boolean = false) {
+ allowNonNumericNumbers: Boolean = false,
+ allowBackslashEscapingAnyCharacter: Boolean = false) {
/** Sets config options on a Jackson [[JsonFactory]]. */
def setJacksonOptions(factory: JsonFactory): Unit = {
@@ -40,6 +41,8 @@ case class JSONOptions(
factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes)
factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros)
factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers)
+ factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
+ allowBackslashEscapingAnyCharacter)
}
}
@@ -59,6 +62,8 @@ object JSONOptions {
allowNumericLeadingZeros =
parameters.get("allowNumericLeadingZeros").map(_.toBoolean).getOrElse(false),
allowNonNumericNumbers =
- parameters.get("allowNonNumericNumbers").map(_.toBoolean).getOrElse(true)
+ parameters.get("allowNonNumericNumbers").map(_.toBoolean).getOrElse(true),
+ allowBackslashEscapingAnyCharacter =
+ parameters.get("allowBackslashEscapingAnyCharacter").map(_.toBoolean).getOrElse(false)
)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index 4cc0a3a958..1742df31bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -111,4 +111,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
assert(df.schema.head.name == "age")
assert(df.first().getDouble(0).isNaN)
}
+
+ test("allowBackslashEscapingAnyCharacter off") {
+ val str = """{"name": "Cazen Lee", "price": "\$10"}"""
+ val rdd = sqlContext.sparkContext.parallelize(Seq(str))
+ val df = sqlContext.read.option("allowBackslashEscapingAnyCharacter", "false").json(rdd)
+
+ assert(df.schema.head.name == "_corrupt_record")
+ }
+
+ test("allowBackslashEscapingAnyCharacter on") {
+ val str = """{"name": "Cazen Lee", "price": "\$10"}"""
+ val rdd = sqlContext.sparkContext.parallelize(Seq(str))
+ val df = sqlContext.read.option("allowBackslashEscapingAnyCharacter", "true").json(rdd)
+
+ assert(df.schema.head.name == "name")
+ assert(df.schema.last.name == "price")
+ assert(df.first().getString(0) == "Cazen Lee")
+ assert(df.first().getString(1) == "$10")
+ }
}