[SPARK-10049] [SPARKR] Support collecting data of ArraryType in DataFrame.

this PR : 1. Enhance reflection in RBackend. Automatically matching a Java array to Scala Seq when finding methods. Util functions like seq(), listToSeq() in R side can be removed, as they will conflict with the Serde logic that transferrs a Scala seq to R side. 2. Enhance the SerDe to support transferring a Scala seq to R side. Data of ArrayType in DataFrame after collection is observed to be of Scala Seq type. 3. Support ArrayType in createDataFrame(). Author: Sun Rui <rui.sun@intel.com> Closes #8458 from sun-rui/SPARK-10049.
author: Sun Rui <rui.sun@intel.com> 2015-09-10 12:21:13 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-09-10 12:21:13 -0700
commit: 45e3be5c138d983f40f619735d60bf7eb78c9bf0 (patch)
tree: 30b7b90f53eadee901a56e0e2e84222e21cf6c44 /sql
parent: d88abb7e212fb55f9b0398a0f76a753c86b85cf1 (diff)
download: spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.gz
spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.bz2
spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.zip
1 files changed, 10 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 7f3defec3d..d4b834adb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpres
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{Column, DataFrame, GroupedData, Row, SQLContext, SaveMode}
 
+import scala.util.matching.Regex
+
 private[r] object SQLUtils {
   def createSQLContext(jsc: JavaSparkContext): SQLContext = {
     new SQLContext(jsc)
@@ -35,14 +37,15 @@ private[r] object SQLUtils {
     new JavaSparkContext(sqlCtx.sparkContext)
   }
 
-  def toSeq[T](arr: Array[T]): Seq[T] = {
-    arr.toSeq
-  }
-
   def createStructType(fields : Seq[StructField]): StructType = {
     StructType(fields)
   }
 
+  // Support using regex in string interpolation
+  private[this] implicit class RegexContext(sc: StringContext) {
+    def r: Regex = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*)
+  }
+
   def getSQLDataType(dataType: String): DataType = {
     dataType match {
       case "byte" => org.apache.spark.sql.types.ByteType
@@ -58,6 +61,9 @@ private[r] object SQLUtils {
       case "boolean" => org.apache.spark.sql.types.BooleanType
       case "timestamp" => org.apache.spark.sql.types.TimestampType
       case "date" => org.apache.spark.sql.types.DateType
+      case r"\Aarray<(.*)${elemType}>\Z" => {
+        org.apache.spark.sql.types.ArrayType(getSQLDataType(elemType))
+      }
       case _ => throw new IllegalArgumentException(s"Invaid type $dataType")
     }
   }
author	Sun Rui <rui.sun@intel.com>	2015-09-10 12:21:13 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-09-10 12:21:13 -0700
commit	45e3be5c138d983f40f619735d60bf7eb78c9bf0 (patch)
tree	30b7b90f53eadee901a56e0e2e84222e21cf6c44 /sql
parent	d88abb7e212fb55f9b0398a0f76a753c86b85cf1 (diff)
download	spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.gz spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.tar.bz2 spark-45e3be5c138d983f40f619735d60bf7eb78c9bf0.zip