From 16df133d7f5f3115cd5baa696fa73a4694f9cba9 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 9 Jun 2016 18:58:24 -0700 Subject: [SPARK-15850][SQL] Remove function grouping in SparkSession ## What changes were proposed in this pull request? SparkSession does not have that many functions due to better namespacing, and as a result we probably don't need the function grouping. This patch removes the grouping and also adds missing scaladocs for createDataset functions in SQLContext. Closes #13577. ## How was this patch tested? N/A - this is a documentation change. Author: Reynold Xin Closes #13582 from rxin/SPARK-15850. --- .../scala/org/apache/spark/sql/SQLContext.scala | 62 +++++++++++++++++++++- .../scala/org/apache/spark/sql/SparkSession.scala | 28 ---------- .../scala/org/apache/spark/sql/functions.scala | 2 +- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 0fb2400d1b..23f2b6e894 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -51,7 +51,7 @@ import org.apache.spark.sql.util.ExecutionListenerManager * @groupname specificdata Specific Data Sources * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation - * @groupname dataset Custom DataFrame Creation + * @groupname dataset Custom Dataset Creation * @groupname Ungrouped Support functions for language integrated queries * @since 1.0.0 */ @@ -346,15 +346,73 @@ class SQLContext private[sql](val sparkSession: SparkSession) sparkSession.createDataFrame(rowRDD, schema, needsConversion) } - + /** + * :: Experimental :: + * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * == Example == + * + * {{{ + * + * import spark.implicits._ + * case class Person(name: String, age: Long) + * val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19)) + * val ds = spark.createDataset(data) + * + * ds.show() + * // +-------+---+ + * // | name|age| + * // +-------+---+ + * // |Michael| 29| + * // | Andy| 30| + * // | Justin| 19| + * // +-------+---+ + * }}} + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = { sparkSession.createDataset(data) } + /** + * :: Experimental :: + * Creates a [[Dataset]] from an RDD of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = { sparkSession.createDataset(data) } + /** + * :: Experimental :: + * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an + * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) + * that is generally created automatically through implicits from a `SparkSession`, or can be + * created explicitly by calling static methods on [[Encoders]]. + * + * == Java Example == + * + * {{{ + * List data = Arrays.asList("hello", "world"); + * Dataset ds = spark.createDataset(data, Encoders.STRING()); + * }}} + * + * @since 2.0.0 + * @group dataset + */ + @Experimental def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = { sparkSession.createDataset(data) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index f5b16d07ad..01c2e3ad29 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -119,7 +119,6 @@ class SparkSession private( * configurations that are relevant to Spark SQL. When getting the value of a config, * this defaults to the value set in the underlying [[SparkContext]], if any. * - * @group config * @since 2.0.0 */ @transient lazy val conf: RuntimeConfig = new RuntimeConfig(sessionState.conf) @@ -129,7 +128,6 @@ class SparkSession private( * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s * that listen for execution metrics. * - * @group basic * @since 2.0.0 */ @Experimental @@ -140,7 +138,6 @@ class SparkSession private( * A collection of methods that are considered experimental, but can be used to hook into * the query planner for advanced functionality. * - * @group basic * @since 2.0.0 */ @Experimental @@ -175,7 +172,6 @@ class SparkSession private( * DataTypes.StringType); * }}} * - * @group basic * @since 2.0.0 */ def udf: UDFRegistration = sessionState.udf @@ -185,7 +181,6 @@ class SparkSession private( * Returns a [[ContinuousQueryManager]] that allows managing all the * [[ContinuousQuery ContinuousQueries]] active on `this`. * - * @group basic * @since 2.0.0 */ @Experimental @@ -200,7 +195,6 @@ class SparkSession private( * and child sessions are set up with the same shared state. If the underlying catalog * implementation is Hive, this will initialize the metastore, which may take some time. * - * @group basic * @since 2.0.0 */ def newSession(): SparkSession = { @@ -215,7 +209,6 @@ class SparkSession private( /** * Returns a [[DataFrame]] with no rows or columns. * - * @group dataframes * @since 2.0.0 */ @transient @@ -239,7 +232,6 @@ class SparkSession private( * :: Experimental :: * Creates a [[DataFrame]] from an RDD of Product (e.g. case classes, tuples). * - * @group dataframes * @since 2.0.0 */ @Experimental @@ -255,7 +247,6 @@ class SparkSession private( * :: Experimental :: * Creates a [[DataFrame]] from a local Seq of Product. * - * @group dataframes * @since 2.0.0 */ @Experimental @@ -295,7 +286,6 @@ class SparkSession private( * sparkSession.sql("select name from people").collect.foreach(println) * }}} * - * @group dataframes * @since 2.0.0 */ @DeveloperApi @@ -309,7 +299,6 @@ class SparkSession private( * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * - * @group dataframes * @since 2.0.0 */ @DeveloperApi @@ -323,7 +312,6 @@ class SparkSession private( * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * - * @group dataframes * @since 2.0.0 */ @DeveloperApi @@ -337,7 +325,6 @@ class SparkSession private( * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * - * @group dataframes * @since 2.0.0 */ def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = { @@ -357,7 +344,6 @@ class SparkSession private( * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * - * @group dataframes * @since 2.0.0 */ def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { @@ -369,7 +355,6 @@ class SparkSession private( * * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. - * @group dataframes * @since 1.6.0 */ def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = { @@ -382,7 +367,6 @@ class SparkSession private( /** * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]]. * - * @group dataframes * @since 2.0.0 */ def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = { @@ -420,7 +404,6 @@ class SparkSession private( * }}} * * @since 2.0.0 - * @group dataset */ @Experimental def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = { @@ -439,7 +422,6 @@ class SparkSession private( * created explicitly by calling static methods on [[Encoders]]. * * @since 2.0.0 - * @group dataset */ @Experimental def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = { @@ -465,7 +447,6 @@ class SparkSession private( * }}} * * @since 2.0.0 - * @group dataset */ @Experimental def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = { @@ -478,7 +459,6 @@ class SparkSession private( * in a range from 0 to `end` (exclusive) with step value 1. * * @since 2.0.0 - * @group dataset */ @Experimental def range(end: Long): Dataset[java.lang.Long] = range(0, end) @@ -489,7 +469,6 @@ class SparkSession private( * in a range from `start` to `end` (exclusive) with step value 1. * * @since 2.0.0 - * @group dataset */ @Experimental def range(start: Long, end: Long): Dataset[java.lang.Long] = { @@ -502,7 +481,6 @@ class SparkSession private( * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 - * @group dataset */ @Experimental def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = { @@ -516,7 +494,6 @@ class SparkSession private( * specified. * * @since 2.0.0 - * @group dataset */ @Experimental def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = { @@ -565,7 +542,6 @@ class SparkSession private( * Interface through which the user may create, drop, alter or query underlying * databases, tables, functions etc. * - * @group ddl_ops * @since 2.0.0 */ @transient lazy val catalog: Catalog = new CatalogImpl(self) @@ -573,7 +549,6 @@ class SparkSession private( /** * Returns the specified table as a [[DataFrame]]. * - * @group ddl_ops * @since 2.0.0 */ def table(tableName: String): DataFrame = { @@ -592,7 +567,6 @@ class SparkSession private( * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'. * - * @group basic * @since 2.0.0 */ def sql(sqlText: String): DataFrame = { @@ -606,7 +580,6 @@ class SparkSession private( * sparkSession.read.schema(schema).json("/path/to/file.json") * }}} * - * @group genericdata * @since 2.0.0 */ def read: DataFrameReader = new DataFrameReader(self) @@ -624,7 +597,6 @@ class SparkSession private( * import sparkSession.implicits._ * }}} * - * @group basic * @since 2.0.0 */ @Experimental diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 4dbd1665e4..02608b0dce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2437,7 +2437,7 @@ object functions { */ def minute(e: Column): Column = withExpr { Minute(e.expr) } - /* + /** * Returns number of months between dates `date1` and `date2`. * @group datetime_funcs * @since 1.5.0 -- cgit v1.2.3