aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@appier.com>2015-07-15 09:48:33 -0700
committerDavies Liu <davies.liu@gmail.com>2015-07-15 09:48:33 -0700
commit6f6902597d5d687049c103bc0cf6da30919b92d8 (patch)
tree36bce0480d5f1e8043c4879d2a329f8530e8cc98 /R/pkg
parent20bb10f8644a92a57496b5df639008832b30e34d (diff)
downloadspark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.gz
spark-6f6902597d5d687049c103bc0cf6da30919b92d8.tar.bz2
spark-6f6902597d5d687049c103bc0cf6da30919b92d8.zip
[SPARK-8840] [SPARKR] Add float coercion on SparkR
JIRA: https://issues.apache.org/jira/browse/SPARK-8840 Currently the type coercion rules don't include float type. This PR simply adds it. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #7280 from viirya/add_r_float_coercion and squashes the following commits: c86dc0e [Liang-Chi Hsieh] For comments. dbf0c1b [Liang-Chi Hsieh] Implicitly convert Double to Float based on provided schema. 733015a [Liang-Chi Hsieh] Add test case for DataFrame with float type. 30c2a40 [Liang-Chi Hsieh] Update test case. 52b5294 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_r_float_coercion 6f9159d [Liang-Chi Hsieh] Add another test case. 8db3244 [Liang-Chi Hsieh] schema also needs to support float. add test case. 0dcc992 [Liang-Chi Hsieh] Add float coercion on SparkR.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/deserialize.R1
-rw-r--r--R/pkg/R/schema.R1
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R26
3 files changed, 28 insertions, 0 deletions
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index d961bbc383..7d1f6b0819 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -23,6 +23,7 @@
# Int -> integer
# String -> character
# Boolean -> logical
+# Float -> double
# Double -> double
# Long -> double
# Array[Byte] -> raw
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index 15e2bdbd55..06df430687 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -123,6 +123,7 @@ structField.character <- function(x, type, nullable = TRUE) {
}
options <- c("byte",
"integer",
+ "float",
"double",
"numeric",
"character",
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b0ea388543..76f74f8083 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -108,6 +108,32 @@ test_that("create DataFrame from RDD", {
expect_equal(count(df), 10)
expect_equal(columns(df), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+
+ df <- jsonFile(sqlContext, jsonPathNa)
+ hiveCtx <- tryCatch({
+ newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+ }, error = function(err) {
+ skip("Hive is not build with SparkSQL, skipped")
+ })
+ sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
+ insertInto(df, "people")
+ expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
+ expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))
+
+ schema <- structType(structField("name", "string"), structField("age", "integer"),
+ structField("height", "float"))
+ df2 <- createDataFrame(sqlContext, df.toRDD, schema)
+ expect_equal(columns(df2), c("name", "age", "height"))
+ expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
+ expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
+
+ localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18), height=c(164.10, 181.4, 173.7))
+ df <- createDataFrame(sqlContext, localDF, schema)
+ expect_is(df, "DataFrame")
+ expect_equal(count(df), 3)
+ expect_equal(columns(df), c("name", "age", "height"))
+ expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
+ expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
})
test_that("convert NAs to null type in DataFrames", {