aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-06-08 13:15:44 -0700
committerReynold Xin <rxin@databricks.com>2015-06-08 13:15:44 -0700
commit51853891686f353dc9decc31066b0de01ed8b49e (patch)
tree94e140b33abd749ac84ac9575a41ce3de1882e9e /sql
parentfe7669d3072b72954ad0c3f2f8846a0fde839ead (diff)
downloadspark-51853891686f353dc9decc31066b0de01ed8b49e.tar.gz
spark-51853891686f353dc9decc31066b0de01ed8b49e.tar.bz2
spark-51853891686f353dc9decc31066b0de01ed8b49e.zip
[SPARK-8148] Do not use FloatType in partition column inference.
Use DoubleType instead to be more stable and robust. Author: Reynold Xin <rxin@databricks.com> Closes #6692 from rxin/SPARK-8148 and squashes the following commits: 6742ecc [Reynold Xin] [SPARK-8148] Do not use FloatType in partition column inference.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala16
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala12
2 files changed, 15 insertions, 13 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
index 9f6ec2ed8f..7a2b5b949d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.sources
-import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
+import java.lang.{Double => JDouble, Float => JFloat, Integer => JInteger, Long => JLong}
import java.math.{BigDecimal => JBigDecimal}
import scala.collection.mutable.ArrayBuffer
@@ -178,7 +178,7 @@ private[sql] object PartitioningUtils {
* {{{
* NullType ->
* IntegerType -> LongType ->
- * FloatType -> DoubleType -> DecimalType.Unlimited ->
+ * DoubleType -> DecimalType.Unlimited ->
* StringType
* }}}
*/
@@ -208,8 +208,8 @@ private[sql] object PartitioningUtils {
}
/**
- * Converts a string to a `Literal` with automatic type inference. Currently only supports
- * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
+ * Converts a string to a [[Literal]] with automatic type inference. Currently only supports
+ * [[IntegerType]], [[LongType]], [[DoubleType]], [[DecimalType.Unlimited]], and
* [[StringType]].
*/
private[sql] def inferPartitionColumnValue(
@@ -221,13 +221,15 @@ private[sql] object PartitioningUtils {
Try(Literal.create(Integer.parseInt(raw), IntegerType))
.orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
// Then falls back to fractional types
- .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
.orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
.orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
// Then falls back to string
.getOrElse {
- if (raw == defaultPartitionName) Literal.create(null, NullType)
- else Literal.create(unescapePathName(raw), StringType)
+ if (raw == defaultPartitionName) {
+ Literal.create(null, NullType)
+ } else {
+ Literal.create(unescapePathName(raw), StringType)
+ }
}
} else {
if (raw == defaultPartitionName) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index c2f1cc8ffd..3240079483 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -53,7 +53,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
check("10", Literal.create(10, IntegerType))
check("1000000000000000", Literal.create(1000000000000000L, LongType))
- check("1.5", Literal.create(1.5f, FloatType))
+ check("1.5", Literal.create(1.5, DoubleType))
check("hello", Literal.create("hello", StringType))
check(defaultPartitionName, Literal.create(null, NullType))
}
@@ -83,13 +83,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
ArrayBuffer(
Literal.create(10, IntegerType),
Literal.create("hello", StringType),
- Literal.create(1.5f, FloatType)))
+ Literal.create(1.5, DoubleType)))
})
check("file://path/a=10/b_hello/c=1.5", Some {
PartitionValues(
ArrayBuffer("c"),
- ArrayBuffer(Literal.create(1.5f, FloatType)))
+ ArrayBuffer(Literal.create(1.5, DoubleType)))
})
check("file:///", None)
@@ -121,7 +121,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
"hdfs://host:9000/path/a=10.5/b=hello"),
PartitionSpec(
StructType(Seq(
- StructField("a", FloatType),
+ StructField("a", DoubleType),
StructField("b", StringType))),
Seq(
Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
@@ -140,7 +140,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
"hdfs://host:9000/path/a=10.5/b=world/_temporary/path"),
PartitionSpec(
StructType(Seq(
- StructField("a", FloatType),
+ StructField("a", DoubleType),
StructField("b", StringType))),
Seq(
Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
@@ -162,7 +162,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"),
PartitionSpec(
StructType(Seq(
- StructField("a", FloatType),
+ StructField("a", DoubleType),
StructField("b", StringType))),
Seq(
Partition(Row(10, null), s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),