[SPARK-8840] [SPARKR] Add float coercion on SparkR

JIRA: https://issues.apache.org/jira/browse/SPARK-8840 Currently the type coercion rules don't include float type. This PR simply adds it. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #7280 from viirya/add_r_float_coercion and squashes the following commits: c86dc0e [Liang-Chi Hsieh] For comments. dbf0c1b [Liang-Chi Hsieh] Implicitly convert Double to Float based on provided schema. 733015a [Liang-Chi Hsieh] Add test case for DataFrame with float type. 30c2a40 [Liang-Chi Hsieh] Update test case. 52b5294 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_r_float_coercion 6f9159d [Liang-Chi Hsieh] Add another test case. 8db3244 [Liang-Chi Hsieh] schema also needs to support float. add test case. 0dcc992 [Liang-Chi Hsieh] Add float coercion on SparkR.
author: Liang-Chi Hsieh <viirya@appier.com> 2015-07-15 09:48:33 -0700
committer: Davies Liu <davies.liu@gmail.com> 2015-07-15 09:48:33 -0700
commit: 6f6902597d5d687049c103bc0cf6da30919b92d8 (patch)
tree: 36bce0480d5f1e8043c4879d2a329f8530e8cc98 /R/pkg
parent: 20bb10f8644a92a57496b5df639008832b30e34d (diff)
download: spark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.gz
spark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.bz2
spark-6f6902597d5d687049c103bc0cf6da30919b92d8.zip
3 files changed, 28 insertions, 0 deletions
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index d961bbc383..7d1f6b0819 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -23,6 +23,7 @@
 # Int -> integer
 # String -> character
 # Boolean -> logical
+# Float -> double
 # Double -> double
 # Long -> double
 # Array[Byte] -> raw
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index 15e2bdbd55..06df430687 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -123,6 +123,7 @@ structField.character <- function(x, type, nullable = TRUE) {
   }
   options <- c("byte",
                "integer",
+               "float",
                "double",
                "numeric",
                "character",
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b0ea388543..76f74f8083 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -108,6 +108,32 @@ test_that("create DataFrame from RDD", {
   expect_equal(count(df), 10)
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+
+  df <- jsonFile(sqlContext, jsonPathNa)
+  hiveCtx <- tryCatch({
+    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  }, error = function(err) {
+    skip("Hive is not build with SparkSQL, skipped")
+  })
+  sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
+  insertInto(df, "people")
+  expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
+  expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))
+
+  schema <- structType(structField("name", "string"), structField("age", "integer"),
+                       structField("height", "float"))
+  df2 <- createDataFrame(sqlContext, df.toRDD, schema)
+  expect_equal(columns(df2), c("name", "age", "height"))
+  expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
+  expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
+
+  localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18), height=c(164.10, 181.4, 173.7))
+  df <- createDataFrame(sqlContext, localDF, schema)
+  expect_is(df, "DataFrame")
+  expect_equal(count(df), 3)
+  expect_equal(columns(df), c("name", "age", "height"))
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
+  expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
 })
 
 test_that("convert NAs to null type in DataFrames", {
author	Liang-Chi Hsieh <viirya@appier.com>	2015-07-15 09:48:33 -0700
committer	Davies Liu <davies.liu@gmail.com>	2015-07-15 09:48:33 -0700
commit	6f6902597d5d687049c103bc0cf6da30919b92d8 (patch)
tree	36bce0480d5f1e8043c4879d2a329f8530e8cc98 /R/pkg
parent	20bb10f8644a92a57496b5df639008832b30e34d (diff)
download	spark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.gz spark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.bz2 spark-6f6902597d5d687049c103bc0cf6da30919b92d8.zip