aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorShixiong Zhu <shixiong@databricks.com>2016-05-31 14:50:07 -0700
committerShixiong Zhu <shixiong@databricks.com>2016-05-31 14:50:07 -0700
commit9a74de18a13d84805e1a448f858bb05ce30de87e (patch)
treedb75558e00e3e926bbb57070299187519d30adb7 /sql
parentc6de5832bfad423e7d6f7e0a92a48170265f25cd (diff)
downloadspark-9a74de18a13d84805e1a448f858bb05ce30de87e.tar.gz
spark-9a74de18a13d84805e1a448f858bb05ce30de87e.tar.bz2
spark-9a74de18a13d84805e1a448f858bb05ce30de87e.zip
Revert "[SPARK-11753][SQL][TEST-HADOOP2.2] Make allowNonNumericNumbers option work
## What changes were proposed in this pull request? This reverts commit c24b6b679c3efa053f7de19be73eb36dc70d9930. Sent a PR to run Jenkins tests due to the revert conflicts of `dev/deps/spark-deps-hadoop*`. ## How was this patch tested? Jenkins unit tests, integration tests, manual tests) Author: Shixiong Zhu <shixiong@databricks.com> Closes #13417 from zsxwing/revert-SPARK-11753.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala28
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala59
3 files changed, 27 insertions, 62 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 2057878028..88fa5cd21d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -293,8 +293,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* </li>
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
* (e.g. 00012)</li>
- * <li>`allowNonNumericNumbers` (default `true`): allows using non-numeric numbers such as "NaN",
- * "Infinity", "-Infinity", "INF", "-INF", which are convertd to floating point numbers.</li>
* <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
* character using backslash quoting mechanism</li>
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
index cafca32318..aeee2600a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -129,15 +129,13 @@ object JacksonParser extends Logging {
case (VALUE_STRING, FloatType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
- if (value.equals("NaN") ||
- value.equals("Infinity") ||
- value.equals("+Infinity") ||
- value.equals("-Infinity")) {
+ val lowerCaseValue = value.toLowerCase()
+ if (lowerCaseValue.equals("nan") ||
+ lowerCaseValue.equals("infinity") ||
+ lowerCaseValue.equals("-infinity") ||
+ lowerCaseValue.equals("inf") ||
+ lowerCaseValue.equals("-inf")) {
value.toFloat
- } else if (value.equals("+INF") || value.equals("INF")) {
- Float.PositiveInfinity
- } else if (value.equals("-INF")) {
- Float.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
}
@@ -148,15 +146,13 @@ object JacksonParser extends Logging {
case (VALUE_STRING, DoubleType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
- if (value.equals("NaN") ||
- value.equals("Infinity") ||
- value.equals("+Infinity") ||
- value.equals("-Infinity")) {
+ val lowerCaseValue = value.toLowerCase()
+ if (lowerCaseValue.equals("nan") ||
+ lowerCaseValue.equals("infinity") ||
+ lowerCaseValue.equals("-infinity") ||
+ lowerCaseValue.equals("inf") ||
+ lowerCaseValue.equals("-inf")) {
value.toDouble
- } else if (value.equals("+INF") || value.equals("INF")) {
- Double.PositiveInfinity
- } else if (value.equals("-INF")) {
- Double.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index 2aab955c1e..c31dffedbd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.json
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
/**
* Test cases for various [[JSONOptions]].
@@ -94,51 +93,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
assert(df.first().getLong(0) == 18)
}
- test("allowNonNumericNumbers off") {
- // non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
- var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
- """{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": INF}""",
- """{"age": +INF}""", """{"age": -INF}""")
- testCases.foreach { str =>
- val rdd = spark.sparkContext.parallelize(Seq(str))
- val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
-
- assert(df.schema.head.name == "_corrupt_record")
- }
-
- // quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
- testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "+Infinity"}""",
- """{"age": "-Infinity"}""", """{"age": "INF"}""", """{"age": "+INF"}""",
- """{"age": "-INF"}""")
- val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
- _.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
- val schema = StructType(StructField("age", DoubleType, true) :: Nil)
-
- testCases.zipWithIndex.foreach { case (str, idx) =>
- val rdd = spark.sparkContext.parallelize(Seq(str))
- val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)
-
- assert(df.schema.head.name == "age")
- assert(tests(idx)(df.first().getDouble(0)))
- }
+ // The following two tests are not really working - need to look into Jackson's
+ // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
+ ignore("allowNonNumericNumbers off") {
+ val str = """{"age": NaN}"""
+ val rdd = spark.sparkContext.parallelize(Seq(str))
+ val df = spark.read.json(rdd)
+
+ assert(df.schema.head.name == "_corrupt_record")
}
- test("allowNonNumericNumbers on") {
- val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
- """{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": +INF}""",
- """{"age": -INF}""", """{"age": "NaN"}""", """{"age": "Infinity"}""",
- """{"age": "-Infinity"}""")
- val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
- _.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
- _.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
- val schema = StructType(StructField("age", DoubleType, true) :: Nil)
- testCases.zipWithIndex.foreach { case (str, idx) =>
- val rdd = spark.sparkContext.parallelize(Seq(str))
- val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)
-
- assert(df.schema.head.name == "age")
- assert(tests(idx)(df.first().getDouble(0)))
- }
+ ignore("allowNonNumericNumbers on") {
+ val str = """{"age": NaN}"""
+ val rdd = spark.sparkContext.parallelize(Seq(str))
+ val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
+
+ assert(df.schema.head.name == "age")
+ assert(df.first().getDouble(0).isNaN)
}
test("allowBackslashEscapingAnyCharacter off") {