diff options
author | Travis Crawford <travis@medium.com> | 2016-03-30 16:59:52 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-03-30 16:59:52 -0700 |
commit | da54abfd8730ef752eca921089bcf568773bd24a (patch) | |
tree | caffe6260f202b2ab1834630546891c5401c8b96 /sql/core/src/test/scala | |
parent | 258a2434193aae62999102a8df73ca70bf0cb9f1 (diff) | |
download | spark-da54abfd8730ef752eca921089bcf568773bd24a.tar.gz spark-da54abfd8730ef752eca921089bcf568773bd24a.tar.bz2 spark-da54abfd8730ef752eca921089bcf568773bd24a.zip |
[SPARK-14081][SQL] - Preserve DataFrame column types when filling nulls.
## What changes were proposed in this pull request?
This change resolves an issue where `DataFrameNaFunctions.fill` changes a `FloatType` column to a `DoubleType`. We also clarify the contract that replacement values will be cast to the column data type, which may change the replacement value when casting to a lower precision type.
## How was this patch tested?
This patch has associated unit tests.
Author: Travis Crawford <travis@medium.com>
Closes #11967 from traviscrawford/SPARK-14081-dataframena.
Diffstat (limited to 'sql/core/src/test/scala')
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala | 50 |
1 files changed, 30 insertions, 20 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index e34875471f..18e04c24a4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -141,26 +141,36 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext { } test("fill with map") { - val df = Seq[(String, String, java.lang.Long, java.lang.Double, java.lang.Boolean)]( - (null, null, null, null, null)).toDF("a", "b", "c", "d", "e") - checkAnswer( - df.na.fill(Map( - "a" -> "test", - "c" -> 1, - "d" -> 2.2, - "e" -> false - )), - Row("test", null, 1, 2.2, false)) - - // Test Java version - checkAnswer( - df.na.fill(Map( - "a" -> "test", - "c" -> 1, - "d" -> 2.2, - "e" -> false - ).asJava), - Row("test", null, 1, 2.2, false)) + val df = Seq[(String, String, java.lang.Integer, java.lang.Long, + java.lang.Float, java.lang.Double, java.lang.Boolean)]( + (null, null, null, null, null, null, null)) + .toDF("stringFieldA", "stringFieldB", "integerField", "longField", + "floatField", "doubleField", "booleanField") + + val fillMap = Map( + "stringFieldA" -> "test", + "integerField" -> 1, + "longField" -> 2L, + "floatField" -> 3.3f, + "doubleField" -> 4.4d, + "booleanField" -> false) + + val expectedRow = Row("test", null, 1, 2L, 3.3f, 4.4d, false) + + checkAnswer(df.na.fill(fillMap), expectedRow) + checkAnswer(df.na.fill(fillMap.asJava), expectedRow) // Test Java version + + // Ensure replacement values are cast to the column data type. + checkAnswer(df.na.fill(Map( + "integerField" -> 1d, + "longField" -> 2d, + "floatField" -> 3d, + "doubleField" -> 4d)), + Row(null, null, 1, 2L, 3f, 4d, null)) + + // Ensure column types do not change. Columns that have null values replaced + // will no longer be flagged as nullable, so do not compare schemas directly. + assert(df.na.fill(fillMap).schema.fields.map(_.dataType) === df.schema.fields.map(_.dataType)) } test("replace") { |