aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorJacek Laskowski <jacek@japila.pl>2017-03-30 16:07:27 +0100
committerSean Owen <sowen@cloudera.com>2017-03-30 16:07:27 +0100
commit0197262a358fd174a188f8246ae777e53157610e (patch)
tree0d0b52965bc6ea18785e97ada5eaca4f29e90b68 /sql/core
parentb454d4402e5ee7d1a7385d1fe3737581f84d2c72 (diff)
downloadspark-0197262a358fd174a188f8246ae777e53157610e.tar.gz
spark-0197262a358fd174a188f8246ae777e53157610e.tar.bz2
spark-0197262a358fd174a188f8246ae777e53157610e.zip
[DOCS] Docs-only improvements
…adoc ## What changes were proposed in this pull request? Use recommended values for row boundaries in Window's scaladoc, i.e. `Window.unboundedPreceding`, `Window.unboundedFollowing`, and `Window.currentRow` (that were introduced in 2.1.0). ## How was this patch tested? Local build Author: Jacek Laskowski <jacek@japila.pl> Closes #17417 from jaceklaskowski/window-expression-scaladoc.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala18
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala23
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala20
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala2
8 files changed, 36 insertions, 36 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index ae0703513c..43de2de7e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -84,8 +84,8 @@ class TypedColumn[-T, U](
}
/**
- * Gives the TypedColumn a name (alias).
- * If the current TypedColumn has metadata associated with it, this metadata will be propagated
+ * Gives the [[TypedColumn]] a name (alias).
+ * If the current `TypedColumn` has metadata associated with it, this metadata will be propagated
* to the new column.
*
* @group expr_ops
@@ -99,16 +99,14 @@ class TypedColumn[-T, U](
/**
* A column that will be computed based on the data in a `DataFrame`.
*
- * A new column is constructed based on the input columns present in a dataframe:
+ * A new column can be constructed based on the input columns present in a DataFrame:
*
* {{{
- * df("columnName") // On a specific DataFrame.
+ * df("columnName") // On a specific `df` DataFrame.
* col("columnName") // A generic column no yet associated with a DataFrame.
* col("columnName.field") // Extracting a struct field
* col("`a.column.with.dots`") // Escape `.` in column names.
* $"columnName" // Scala short hand for a named column.
- * expr("a + 1") // A column that is constructed from a parsed SQL Expression.
- * lit("abc") // A column that produces a literal (constant) value.
* }}}
*
* [[Column]] objects can be composed to form complex expressions:
@@ -118,7 +116,7 @@ class TypedColumn[-T, U](
* $"a" === $"b"
* }}}
*
- * @note The internal Catalyst expression can be accessed via "expr", but this method is for
+ * @note The internal Catalyst expression can be accessed via [[expr]], but this method is for
* debugging purposes only and can change in any future Spark releases.
*
* @groupname java_expr_ops Java-specific expression operators
@@ -1100,7 +1098,7 @@ class Column(val expr: Expression) extends Logging {
def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Set.empty) }
/**
- * Prints the expression to the console for debugging purpose.
+ * Prints the expression to the console for debugging purposes.
*
* @group df_ops
* @since 1.3.0
@@ -1154,8 +1152,8 @@ class Column(val expr: Expression) extends Logging {
* {{{
* val w = Window.partitionBy("name").orderBy("id")
* df.select(
- * sum("price").over(w.rangeBetween(Long.MinValue, 2)),
- * avg("price").over(w.rowsBetween(0, 4))
+ * sum("price").over(w.rangeBetween(Window.unboundedPreceding, 2)),
+ * avg("price").over(w.rowsBetween(Window.currentRow, 4))
* )
* }}}
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
index 18bccee98f..582d4a3670 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
@@ -24,7 +24,8 @@ import org.apache.spark.annotation.InterfaceStability
*
* To use this, import implicit conversions in SQL:
* {{{
- * import sqlContext.implicits._
+ * val spark: SparkSession = ...
+ * import spark.implicits._
* }}}
*
* @since 1.6.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a97297892b..b60499253c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -60,7 +60,7 @@ import org.apache.spark.util.Utils
* The builder can also be used to create a new session:
*
* {{{
- * SparkSession.builder()
+ * SparkSession.builder
* .master("local")
* .appName("Word Count")
* .config("spark.some.config.option", "some-value")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
index e5a6a5f60b..470c736da9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.types.StringType
/**
* A command for users to list the databases/schemas.
- * If a databasePattern is supplied then the databases that only matches the
+ * If a databasePattern is supplied then the databases that only match the
* pattern would be listed.
* The syntax of using this command in SQL is:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 75ffe90f2b..311942f6db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructType
* monotonically increasing notion of progress that can be represented as an [[Offset]]. Spark
* will regularly query each [[Source]] to see if any more data is available.
*/
-trait Source {
+trait Source {
/** Returns the schema of the data from this source */
def schema: StructType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index f3cf3052ea..00053485e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -113,7 +113,7 @@ object Window {
* Creates a [[WindowSpec]] with the frame boundaries defined,
* from `start` (inclusive) to `end` (inclusive).
*
- * Both `start` and `end` are relative positions from the current row. For example, "0" means
+ * Both `start` and `end` are positions relative to the current row. For example, "0" means
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
@@ -131,9 +131,9 @@ object Window {
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -150,7 +150,7 @@ object Window {
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
@@ -162,7 +162,7 @@ object Window {
* Creates a [[WindowSpec]] with the frame boundaries defined,
* from `start` (inclusive) to `end` (inclusive).
*
- * Both `start` and `end` are relative from the current row. For example, "0" means "current row",
+ * Both `start` and `end` are relative to the current row. For example, "0" means "current row",
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
@@ -183,9 +183,9 @@ object Window {
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -202,7 +202,7 @@ object Window {
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
@@ -221,7 +221,8 @@ object Window {
*
* {{{
* // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
- * Window.partitionBy("country").orderBy("date").rowsBetween(Long.MinValue, 0)
+ * Window.partitionBy("country").orderBy("date")
+ * .rowsBetween(Window.unboundedPreceding, Window.currentRow)
*
* // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
* Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index de7d7a1772..6279d48c94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -86,7 +86,7 @@ class WindowSpec private[sql](
* after the current row.
*
* We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
- * and `[Window.currentRow` to specify special boundary values, rather than using integral
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -99,9 +99,9 @@ class WindowSpec private[sql](
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -118,7 +118,7 @@ class WindowSpec private[sql](
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rowsBetween.
@@ -134,7 +134,7 @@ class WindowSpec private[sql](
* current row.
*
* We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
- * and `[Window.currentRow` to specify special boundary values, rather than using integral
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -150,9 +150,9 @@ class WindowSpec private[sql](
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rangeBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -169,7 +169,7 @@ class WindowSpec private[sql](
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0f9203065e..f07e043683 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2968,7 +2968,7 @@ object functions {
*
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string
- * @param options options to control how the json is parsed. accepts the same options and the
+ * @param options options to control how the json is parsed. Accepts the same options as the
* json data source.
*
* @group collection_funcs