aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2015-09-10 12:21:13 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-09-10 12:21:13 -0700
commit45e3be5c138d983f40f619735d60bf7eb78c9bf0 (patch)
tree30b7b90f53eadee901a56e0e2e84222e21cf6c44 /sql
parentd88abb7e212fb55f9b0398a0f76a753c86b85cf1 (diff)
downloadspark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.gz
spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.bz2
spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.zip
[SPARK-10049] [SPARKR] Support collecting data of ArraryType in DataFrame.
this PR : 1. Enhance reflection in RBackend. Automatically matching a Java array to Scala Seq when finding methods. Util functions like seq(), listToSeq() in R side can be removed, as they will conflict with the Serde logic that transferrs a Scala seq to R side. 2. Enhance the SerDe to support transferring a Scala seq to R side. Data of ArrayType in DataFrame after collection is observed to be of Scala Seq type. 3. Support ArrayType in createDataFrame(). Author: Sun Rui <rui.sun@intel.com> Closes #8458 from sun-rui/SPARK-10049.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala14
1 files changed, 10 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 7f3defec3d..d4b834adb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpres
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, DataFrame, GroupedData, Row, SQLContext, SaveMode}
+import scala.util.matching.Regex
+
private[r] object SQLUtils {
def createSQLContext(jsc: JavaSparkContext): SQLContext = {
new SQLContext(jsc)
@@ -35,14 +37,15 @@ private[r] object SQLUtils {
new JavaSparkContext(sqlCtx.sparkContext)
}
- def toSeq[T](arr: Array[T]): Seq[T] = {
- arr.toSeq
- }
-
def createStructType(fields : Seq[StructField]): StructType = {
StructType(fields)
}
+ // Support using regex in string interpolation
+ private[this] implicit class RegexContext(sc: StringContext) {
+ def r: Regex = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*)
+ }
+
def getSQLDataType(dataType: String): DataType = {
dataType match {
case "byte" => org.apache.spark.sql.types.ByteType
@@ -58,6 +61,9 @@ private[r] object SQLUtils {
case "boolean" => org.apache.spark.sql.types.BooleanType
case "timestamp" => org.apache.spark.sql.types.TimestampType
case "date" => org.apache.spark.sql.types.DateType
+ case r"\Aarray<(.*)${elemType}>\Z" => {
+ org.apache.spark.sql.types.ArrayType(getSQLDataType(elemType))
+ }
case _ => throw new IllegalArgumentException(s"Invaid type $dataType")
}
}