aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-05-27 11:11:31 -0700
committerAndrew Or <andrew@databricks.com>2016-05-27 11:11:31 -0700
commit635fb30f83a66cc56f5fecfed5bff77873bf49a6 (patch)
tree4c017dde0baf2a7f8befb5bf64359a826632ce72 /sql
parent4538443e276597530a27c6922e48503677b13956 (diff)
downloadspark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.gz
spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.bz2
spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.zip
[SPARK-15599][SQL][DOCS] API docs for `createDataset` functions in SparkSession
## What changes were proposed in this pull request? Adds API docs and usage examples for the 3 `createDataset` calls in `SparkSession` ## How was this patch tested? N/A Author: Sameer Agarwal <sameer@databricks.com> Closes #13345 from sameeragarwal/dataset-doc.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala63
1 files changed, 63 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5dabe0e83c..aa60048405 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -376,6 +376,40 @@ class SparkSession private(
Dataset.ofRows(self, LogicalRelation(baseRelation))
}
+ /* ------------------------------- *
+ | Methods for creating DataSets |
+ * ------------------------------- */
+
+ /**
+ * :: Experimental ::
+ * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
+ * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+ * that is generally created automatically through implicits from a `SparkSession`, or can be
+ * created explicitly by calling static methods on [[Encoders]].
+ *
+ * == Example ==
+ *
+ * {{{
+ *
+ * import spark.implicits._
+ * case class Person(name: String, age: Long)
+ * val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
+ * val ds = spark.createDataset(data)
+ *
+ * ds.show()
+ * // +-------+---+
+ * // | name|age|
+ * // +-------+---+
+ * // |Michael| 29|
+ * // | Andy| 30|
+ * // | Justin| 19|
+ * // +-------+---+
+ * }}}
+ *
+ * @since 2.0.0
+ * @group dataset
+ */
+ @Experimental
def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
val enc = encoderFor[T]
val attributes = enc.schema.toAttributes
@@ -384,6 +418,17 @@ class SparkSession private(
Dataset[T](self, plan)
}
+ /**
+ * :: Experimental ::
+ * Creates a [[Dataset]] from an RDD of a given type. This method requires an
+ * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+ * that is generally created automatically through implicits from a `SparkSession`, or can be
+ * created explicitly by calling static methods on [[Encoders]].
+ *
+ * @since 2.0.0
+ * @group dataset
+ */
+ @Experimental
def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
val enc = encoderFor[T]
val attributes = enc.schema.toAttributes
@@ -392,6 +437,24 @@ class SparkSession private(
Dataset[T](self, plan)
}
+ /**
+ * :: Experimental ::
+ * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an
+ * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+ * that is generally created automatically through implicits from a `SparkSession`, or can be
+ * created explicitly by calling static methods on [[Encoders]].
+ *
+ * == Java Example ==
+ *
+ * {{{
+ * List<String> data = Arrays.asList("hello", "world");
+ * Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+ * }}}
+ *
+ * @since 2.0.0
+ * @group dataset
+ */
+ @Experimental
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
createDataset(data.asScala)
}