diff options
author | Sun Rui <rui.sun@intel.com> | 2015-09-10 12:21:13 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-09-10 12:21:13 -0700 |
commit | 45e3be5c138d983f40f619735d60bf7eb78c9bf0 (patch) | |
tree | 30b7b90f53eadee901a56e0e2e84222e21cf6c44 /sql | |
parent | d88abb7e212fb55f9b0398a0f76a753c86b85cf1 (diff) | |
download | spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.gz spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.bz2 spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.zip |
[SPARK-10049] [SPARKR] Support collecting data of ArraryType in DataFrame.
this PR :
1. Enhance reflection in RBackend. Automatically matching a Java array to Scala Seq when finding methods. Util functions like seq(), listToSeq() in R side can be removed, as they will conflict with the Serde logic that transferrs a Scala seq to R side.
2. Enhance the SerDe to support transferring a Scala seq to R side. Data of ArrayType in DataFrame
after collection is observed to be of Scala Seq type.
3. Support ArrayType in createDataFrame().
Author: Sun Rui <rui.sun@intel.com>
Closes #8458 from sun-rui/SPARK-10049.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 7f3defec3d..d4b834adb6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpres import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, DataFrame, GroupedData, Row, SQLContext, SaveMode} +import scala.util.matching.Regex + private[r] object SQLUtils { def createSQLContext(jsc: JavaSparkContext): SQLContext = { new SQLContext(jsc) @@ -35,14 +37,15 @@ private[r] object SQLUtils { new JavaSparkContext(sqlCtx.sparkContext) } - def toSeq[T](arr: Array[T]): Seq[T] = { - arr.toSeq - } - def createStructType(fields : Seq[StructField]): StructType = { StructType(fields) } + // Support using regex in string interpolation + private[this] implicit class RegexContext(sc: StringContext) { + def r: Regex = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*) + } + def getSQLDataType(dataType: String): DataType = { dataType match { case "byte" => org.apache.spark.sql.types.ByteType @@ -58,6 +61,9 @@ private[r] object SQLUtils { case "boolean" => org.apache.spark.sql.types.BooleanType case "timestamp" => org.apache.spark.sql.types.TimestampType case "date" => org.apache.spark.sql.types.DateType + case r"\Aarray<(.*)${elemType}>\Z" => { + org.apache.spark.sql.types.ArrayType(getSQLDataType(elemType)) + } case _ => throw new IllegalArgumentException(s"Invaid type $dataType") } } |