aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-09-21 13:33:10 -0700
committerXiangrui Meng <meng@databricks.com>2015-09-21 13:33:10 -0700
commit362539f8d97f6bb67f0d0983f7dea36b77cc9d18 (patch)
treef7476e9c78540ec7f66d854ab3c8b8ee998009dd /sql
parent97a99dde6e8d69a4c4c135dc1d9b1520b2548b5b (diff)
downloadspark-362539f8d97f6bb67f0d0983f7dea36b77cc9d18.tar.gz
spark-362539f8d97f6bb67f0d0983f7dea36b77cc9d18.tar.bz2
spark-362539f8d97f6bb67f0d0983f7dea36b77cc9d18.zip
[SPARK-10630] [SQL] Add a createDataFrame API that takes in a java list
It would be nice to support creating a DataFrame directly from a Java List of Row. Author: Holden Karau <holden@pigscanfly.ca> Closes #8779 from holdenk/SPARK-10630-create-DataFrame-from-Java-List.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala14
-rw-r--r--sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java10
2 files changed, 24 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index f099940800..1bd4e26fb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -477,6 +477,20 @@ class SQLContext(@transient val sparkContext: SparkContext)
}
/**
+ * :: DeveloperApi ::
+ * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema.
+ * It is important to make sure that the structure of every [[Row]] of the provided List matches
+ * the provided schema. Otherwise, there will be runtime exception.
+ *
+ * @group dataframes
+ * @since 1.6.0
+ */
+ @DeveloperApi
+ def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
+ DataFrame(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala))
+ }
+
+ /**
* Applies a schema to an RDD of Java Beans.
*
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index 5f9abd4999..250ac2e109 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -37,6 +37,7 @@ import org.apache.spark.sql.*;
import static org.apache.spark.sql.functions.*;
import org.apache.spark.sql.test.TestSQLContext;
import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
public class JavaDataFrameSuite {
private transient JavaSparkContext jsc;
@@ -181,6 +182,15 @@ public class JavaDataFrameSuite {
}
}
+ @Test
+ public void testCreateDataFromFromList() {
+ StructType schema = createStructType(Arrays.asList(createStructField("i", IntegerType, true)));
+ List<Row> rows = Arrays.asList(RowFactory.create(0));
+ DataFrame df = context.createDataFrame(rows, schema);
+ Row[] result = df.collect();
+ Assert.assertEquals(1, result.length);
+ }
+
private static final Comparator<Row> crosstabRowComparator = new Comparator<Row>() {
@Override
public int compare(Row row1, Row row2) {