[SPARK-15599][SQL][DOCS] API docs for `createDataset` functions in SparkSession

## What changes were proposed in this pull request? Adds API docs and usage examples for the 3 `createDataset` calls in `SparkSession` ## How was this patch tested? N/A Author: Sameer Agarwal <sameer@databricks.com> Closes #13345 from sameeragarwal/dataset-doc.
author: Sameer Agarwal <sameer@databricks.com> 2016-05-27 11:11:31 -0700
committer: Andrew Or <andrew@databricks.com> 2016-05-27 11:11:31 -0700
commit: 635fb30f83a66cc56f5fecfed5bff77873bf49a6 (patch)
tree: 4c017dde0baf2a7f8befb5bf64359a826632ce72 /sql
parent: 4538443e276597530a27c6922e48503677b13956 (diff)
download: spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.gz
spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.bz2
spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.zip
1 files changed, 63 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5dabe0e83c..aa60048405 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -376,6 +376,40 @@ class SparkSession private(
     Dataset.ofRows(self, LogicalRelation(baseRelation))
   }
 
+  /* ------------------------------- *
+   |  Methods for creating DataSets  |
+   * ------------------------------- */
+
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+   * that is generally created automatically through implicits from a `SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * == Example ==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
+   *   val ds = spark.createDataset(data)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
     val enc = encoderFor[T]
     val attributes = enc.schema.toAttributes
@@ -384,6 +418,17 @@ class SparkSession private(
     Dataset[T](self, plan)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from an RDD of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+   * that is generally created automatically through implicits from a `SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
     val enc = encoderFor[T]
     val attributes = enc.schema.toAttributes
@@ -392,6 +437,24 @@ class SparkSession private(
     Dataset[T](self, plan)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
+   * that is generally created automatically through implicits from a `SparkSession`, or can be
+   * created explicitly by calling static methods on [[Encoders]].
+   *
+   * == Java Example ==
+   *
+   * {{{
+   *     List<String> data = Arrays.asList("hello", "world");
+   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  @Experimental
   def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
     createDataset(data.asScala)
   }
author	Sameer Agarwal <sameer@databricks.com>	2016-05-27 11:11:31 -0700
committer	Andrew Or <andrew@databricks.com>	2016-05-27 11:11:31 -0700
commit	635fb30f83a66cc56f5fecfed5bff77873bf49a6 (patch)
tree	4c017dde0baf2a7f8befb5bf64359a826632ce72 /sql
parent	4538443e276597530a27c6922e48503677b13956 (diff)
download	spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.gz spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.tar.bz2 spark-635fb30f83a66cc56f5fecfed5bff77873bf49a6.zip