aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSandeep Singh <sandeep@techaddict.me>2016-06-08 23:41:29 -0700
committerReynold Xin <rxin@databricks.com>2016-06-08 23:41:29 -0700
commitd5807def10c21e145163dc1e34d38258dda73ebf (patch)
tree167e3d8f3d4e6b473350d20f16c47cecabcc2f8b /sql
parentafbe35cf5b272991b4986e551b42d9201c3862c3 (diff)
downloadspark-d5807def10c21e145163dc1e34d38258dda73ebf.tar.gz
spark-d5807def10c21e145163dc1e34d38258dda73ebf.tar.bz2
spark-d5807def10c21e145163dc1e34d38258dda73ebf.zip
[MINOR][DOC] In Dataset docs, remove self link to Dataset and add link to Column
## What changes were proposed in this pull request? Documentation Fix ## How was this patch tested? Author: Sandeep Singh <sandeep@techaddict.me> Closes #13567 from techaddict/minor-4.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala200
1 files changed, 100 insertions, 100 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 6cbc27d91c..162524a9ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -67,7 +67,7 @@ private[sql] object Dataset {
}
/**
- * A [[Dataset]] is a strongly typed collection of domain-specific objects that can be transformed
+ * A Dataset is a strongly typed collection of domain-specific objects that can be transformed
* in parallel using functional or relational operations. Each Dataset also has an untyped view
* called a [[DataFrame]], which is a Dataset of [[Row]].
*
@@ -105,7 +105,7 @@ private[sql] object Dataset {
* }}}
*
* Dataset operations can also be untyped, through various domain-specific-language (DSL)
- * functions defined in: [[Dataset]] (this class), [[Column]], and [[functions]]. These operations
+ * functions defined in: Dataset (this class), [[Column]], and [[functions]]. These operations
* are very similar to the operations available in the data frame abstraction in R or Python.
*
* To select a column from the Dataset, use `apply` method in Scala and `col` in Java.
@@ -194,13 +194,13 @@ class Dataset[T] private[sql](
/**
* Currently [[ExpressionEncoder]] is the only implementation of [[Encoder]], here we turn the
* passed in encoder to [[ExpressionEncoder]] explicitly, and mark it implicit so that we can use
- * it when constructing new [[Dataset]] objects that have the same object type (that will be
+ * it when constructing new Dataset objects that have the same object type (that will be
* possibly resolved to a different schema).
*/
private[sql] implicit val exprEnc: ExpressionEncoder[T] = encoderFor(encoder)
/**
- * Encoder is used mostly as a container of serde expressions in [[Dataset]]. We build logical
+ * Encoder is used mostly as a container of serde expressions in Dataset. We build logical
* plans by these serde expressions and execute it within the query framework. However, for
* performance reasons we may want to use encoder as a function to deserialize internal rows to
* custom objects, e.g. collect. Here we resolve and bind the encoder so that we can call its
@@ -340,7 +340,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] where each record has been mapped on to the specified type. The
+ * Returns a new Dataset where each record has been mapped on to the specified type. The
* method used to map columns depend on the type of `U`:
* - When `U` is a class, fields for the class will be mapped to columns of the same name
* (case sensitivity is determined by `spark.sql.caseSensitive`).
@@ -349,7 +349,7 @@ class Dataset[T] private[sql](
* - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
* [[DataFrame]] will be used.
*
- * If the schema of the [[Dataset]] does not match the desired `U` type, you can use `select`
+ * If the schema of the Dataset does not match the desired `U` type, you can use `select`
* along with `alias` or `as` to rearrange or rename as required.
*
* @group basic
@@ -385,7 +385,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns the schema of this [[Dataset]].
+ * Returns the schema of this Dataset.
*
* @group basic
* @since 1.6.0
@@ -453,8 +453,8 @@ class Dataset[T] private[sql](
def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
/**
- * Returns true if this [[Dataset]] contains one or more sources that continuously
- * return data as it arrives. A [[Dataset]] that reads data from a streaming source
+ * Returns true if this Dataset contains one or more sources that continuously
+ * return data as it arrives. A Dataset that reads data from a streaming source
* must be executed as a [[ContinuousQuery]] using the `startStream()` method in
* [[DataFrameWriter]]. Methods that return a single answer, e.g. `count()` or
* `collect()`, will throw an [[AnalysisException]] when there is a streaming
@@ -467,7 +467,7 @@ class Dataset[T] private[sql](
def isStreaming: Boolean = logicalPlan.isStreaming
/**
- * Displays the [[Dataset]] in a tabular form. Strings more than 20 characters will be truncated,
+ * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
* and all cells will be aligned right. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
@@ -486,7 +486,7 @@ class Dataset[T] private[sql](
def show(numRows: Int): Unit = show(numRows, truncate = true)
/**
- * Displays the top 20 rows of [[Dataset]] in a tabular form. Strings more than 20 characters
+ * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
* will be truncated, and all cells will be aligned right.
*
* @group action
@@ -495,7 +495,7 @@ class Dataset[T] private[sql](
def show(): Unit = show(20)
/**
- * Displays the top 20 rows of [[Dataset]] in a tabular form.
+ * Displays the top 20 rows of Dataset in a tabular form.
*
* @param truncate Whether truncate long strings. If true, strings more than 20 characters will
* be truncated and all cells will be aligned right
@@ -506,7 +506,7 @@ class Dataset[T] private[sql](
def show(truncate: Boolean): Unit = show(20, truncate)
/**
- * Displays the [[Dataset]] in a tabular form. For example:
+ * Displays the Dataset in a tabular form. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
@@ -727,7 +727,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Joins this [[Dataset]] returning a [[Tuple2]] for each pair where `condition` evaluates to
+ * Joins this Dataset returning a [[Tuple2]] for each pair where `condition` evaluates to
* true.
*
* This is similar to the relation `join` function with one important difference in the
@@ -807,7 +807,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Using inner equi-join to join this [[Dataset]] returning a [[Tuple2]] for each pair
+ * Using inner equi-join to join this Dataset returning a [[Tuple2]] for each pair
* where `condition` evaluates to true.
*
* @param other Right side of the join.
@@ -822,7 +822,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with each partition sorted by the given expressions.
+ * Returns a new Dataset with each partition sorted by the given expressions.
*
* This is the same operation as "SORT BY" in SQL (Hive QL).
*
@@ -835,7 +835,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with each partition sorted by the given expressions.
+ * Returns a new Dataset with each partition sorted by the given expressions.
*
* This is the same operation as "SORT BY" in SQL (Hive QL).
*
@@ -848,7 +848,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] sorted by the specified column, all in ascending order.
+ * Returns a new Dataset sorted by the specified column, all in ascending order.
* {{{
* // The following 3 are equivalent
* ds.sort("sortcol")
@@ -865,7 +865,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] sorted by the given expressions. For example:
+ * Returns a new Dataset sorted by the given expressions. For example:
* {{{
* ds.sort($"col1", $"col2".desc)
* }}}
@@ -879,7 +879,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] sorted by the given expressions.
+ * Returns a new Dataset sorted by the given expressions.
* This is an alias of the `sort` function.
*
* @group typedrel
@@ -889,7 +889,7 @@ class Dataset[T] private[sql](
def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols : _*)
/**
- * Returns a new [[Dataset]] sorted by the given expressions.
+ * Returns a new Dataset sorted by the given expressions.
* This is an alias of the `sort` function.
*
* @group typedrel
@@ -923,7 +923,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with an alias set.
+ * Returns a new Dataset with an alias set.
*
* @group typedrel
* @since 1.6.0
@@ -933,7 +933,7 @@ class Dataset[T] private[sql](
}
/**
- * (Scala-specific) Returns a new [[Dataset]] with an alias set.
+ * (Scala-specific) Returns a new Dataset with an alias set.
*
* @group typedrel
* @since 2.0.0
@@ -941,7 +941,7 @@ class Dataset[T] private[sql](
def as(alias: Symbol): Dataset[T] = as(alias.name)
/**
- * Returns a new [[Dataset]] with an alias set. Same as `as`.
+ * Returns a new Dataset with an alias set. Same as `as`.
*
* @group typedrel
* @since 2.0.0
@@ -949,7 +949,7 @@ class Dataset[T] private[sql](
def alias(alias: String): Dataset[T] = as(alias)
/**
- * (Scala-specific) Returns a new [[Dataset]] with an alias set. Same as `as`.
+ * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`.
*
* @group typedrel
* @since 2.0.0
@@ -1008,7 +1008,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] by computing the given [[Column]] expression for each element.
+ * Returns a new Dataset by computing the given [[Column]] expression for each element.
*
* {{{
* val ds = Seq(1, 2, 3).toDS()
@@ -1045,7 +1045,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element.
+ * Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
@@ -1056,7 +1056,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element.
+ * Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
@@ -1070,7 +1070,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element.
+ * Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
@@ -1085,7 +1085,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element.
+ * Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
@@ -1154,7 +1154,7 @@ class Dataset[T] private[sql](
}
/**
- * Groups the [[Dataset]] using the specified columns, so we can run aggregation on them. See
+ * Groups the Dataset using the specified columns, so we can run aggregation on them. See
* [[RelationalGroupedDataset]] for all the available aggregate functions.
*
* {{{
@@ -1177,7 +1177,7 @@ class Dataset[T] private[sql](
}
/**
- * Create a multi-dimensional rollup for the current [[Dataset]] using the specified columns,
+ * Create a multi-dimensional rollup for the current Dataset using the specified columns,
* so we can run aggregation on them.
* See [[RelationalGroupedDataset]] for all the available aggregate functions.
*
@@ -1201,7 +1201,7 @@ class Dataset[T] private[sql](
}
/**
- * Create a multi-dimensional cube for the current [[Dataset]] using the specified columns,
+ * Create a multi-dimensional cube for the current Dataset using the specified columns,
* so we can run aggregation on them.
* See [[RelationalGroupedDataset]] for all the available aggregate functions.
*
@@ -1225,7 +1225,7 @@ class Dataset[T] private[sql](
}
/**
- * Groups the [[Dataset]] using the specified columns, so that we can run aggregation on them.
+ * Groups the Dataset using the specified columns, so that we can run aggregation on them.
* See [[RelationalGroupedDataset]] for all the available aggregate functions.
*
* This is a variant of groupBy that can only group by existing columns using column names
@@ -1254,7 +1254,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Scala-specific)
- * Reduces the elements of this [[Dataset]] using the specified binary function. The given `func`
+ * Reduces the elements of this Dataset using the specified binary function. The given `func`
* must be commutative and associative or the result may be non-deterministic.
*
* @group action
@@ -1310,7 +1310,7 @@ class Dataset[T] private[sql](
groupByKey(func.call(_))(encoder)
/**
- * Create a multi-dimensional rollup for the current [[Dataset]] using the specified columns,
+ * Create a multi-dimensional rollup for the current Dataset using the specified columns,
* so we can run aggregation on them.
* See [[RelationalGroupedDataset]] for all the available aggregate functions.
*
@@ -1339,7 +1339,7 @@ class Dataset[T] private[sql](
}
/**
- * Create a multi-dimensional cube for the current [[Dataset]] using the specified columns,
+ * Create a multi-dimensional cube for the current Dataset using the specified columns,
* so we can run aggregation on them.
* See [[RelationalGroupedDataset]] for all the available aggregate functions.
*
@@ -1367,7 +1367,7 @@ class Dataset[T] private[sql](
}
/**
- * (Scala-specific) Aggregates on the entire [[Dataset]] without groups.
+ * (Scala-specific) Aggregates on the entire Dataset without groups.
* {{{
* // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
* ds.agg("age" -> "max", "salary" -> "avg")
@@ -1382,7 +1382,7 @@ class Dataset[T] private[sql](
}
/**
- * (Scala-specific) Aggregates on the entire [[Dataset]] without groups.
+ * (Scala-specific) Aggregates on the entire Dataset without groups.
* {{{
* // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
* ds.agg(Map("age" -> "max", "salary" -> "avg"))
@@ -1395,7 +1395,7 @@ class Dataset[T] private[sql](
def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
/**
- * (Java-specific) Aggregates on the entire [[Dataset]] without groups.
+ * (Java-specific) Aggregates on the entire Dataset without groups.
* {{{
* // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
* ds.agg(Map("age" -> "max", "salary" -> "avg"))
@@ -1408,7 +1408,7 @@ class Dataset[T] private[sql](
def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
/**
- * Aggregates on the entire [[Dataset]] without groups.
+ * Aggregates on the entire Dataset without groups.
* {{{
* // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
* ds.agg(max($"age"), avg($"salary"))
@@ -1422,9 +1422,9 @@ class Dataset[T] private[sql](
def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*)
/**
- * Returns a new [[Dataset]] by taking the first `n` rows. The difference between this function
+ * Returns a new Dataset by taking the first `n` rows. The difference between this function
* and `head` is that `head` is an action and returns an array (by triggering query execution)
- * while `limit` returns a new [[Dataset]].
+ * while `limit` returns a new Dataset.
*
* @group typedrel
* @since 2.0.0
@@ -1434,7 +1434,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] containing union of rows in this Dataset and another Dataset.
+ * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
* This is equivalent to `UNION ALL` in SQL.
*
* To do a SQL-style set union (that does deduplication of elements), use this function followed
@@ -1447,7 +1447,7 @@ class Dataset[T] private[sql](
def unionAll(other: Dataset[T]): Dataset[T] = union(other)
/**
- * Returns a new [[Dataset]] containing union of rows in this Dataset and another Dataset.
+ * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
* This is equivalent to `UNION ALL` in SQL.
*
* To do a SQL-style set union (that does deduplication of elements), use this function followed
@@ -1463,7 +1463,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] containing rows only in both this Dataset and another Dataset.
+ * Returns a new Dataset containing rows only in both this Dataset and another Dataset.
* This is equivalent to `INTERSECT` in SQL.
*
* Note that, equality checking is performed directly on the encoded representation of the data
@@ -1477,7 +1477,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] containing rows in this Dataset but not in another Dataset.
+ * Returns a new Dataset containing rows in this Dataset but not in another Dataset.
* This is equivalent to `EXCEPT` in SQL.
*
* Note that, equality checking is performed directly on the encoded representation of the data
@@ -1491,7 +1491,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] by sampling a fraction of rows.
+ * Returns a new Dataset by sampling a fraction of rows.
*
* @param withReplacement Sample with replacement or not.
* @param fraction Fraction of rows to generate.
@@ -1505,7 +1505,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
+ * Returns a new Dataset by sampling a fraction of rows, using a random seed.
*
* @param withReplacement Sample with replacement or not.
* @param fraction Fraction of rows to generate.
@@ -1518,7 +1518,7 @@ class Dataset[T] private[sql](
}
/**
- * Randomly splits this [[Dataset]] with the provided weights.
+ * Randomly splits this Dataset with the provided weights.
*
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @param seed Seed for sampling.
@@ -1545,7 +1545,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a Java list that contains randomly split [[Dataset]] with the provided weights.
+ * Returns a Java list that contains randomly split Dataset with the provided weights.
*
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @param seed Seed for sampling.
@@ -1559,7 +1559,7 @@ class Dataset[T] private[sql](
}
/**
- * Randomly splits this [[Dataset]] with the provided weights.
+ * Randomly splits this Dataset with the provided weights.
*
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @group typedrel
@@ -1570,7 +1570,7 @@ class Dataset[T] private[sql](
}
/**
- * Randomly splits this [[Dataset]] with the provided weights. Provided for the Python Api.
+ * Randomly splits this Dataset with the provided weights. Provided for the Python Api.
*
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @param seed Seed for sampling.
@@ -1580,7 +1580,7 @@ class Dataset[T] private[sql](
}
/**
- * (Scala-specific) Returns a new [[Dataset]] where each row has been expanded to zero or more
+ * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more
* rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of
* the input row are implicitly joined with each row that is output by the function.
*
@@ -1623,7 +1623,7 @@ class Dataset[T] private[sql](
}
/**
- * (Scala-specific) Returns a new [[Dataset]] where a single column has been expanded to zero
+ * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero
* or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
* columns of the input row are implicitly joined with each value that is output by the function.
*
@@ -1664,7 +1664,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] by adding a column or replacing the existing column that has
+ * Returns a new Dataset by adding a column or replacing the existing column that has
* the same name.
*
* @group untypedrel
@@ -1689,7 +1689,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] by adding a column with metadata.
+ * Returns a new Dataset by adding a column with metadata.
*/
private[spark] def withColumn(colName: String, col: Column, metadata: Metadata): DataFrame = {
val resolver = sparkSession.sessionState.analyzer.resolver
@@ -1710,7 +1710,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with a column renamed.
+ * Returns a new Dataset with a column renamed.
* This is a no-op if schema doesn't contain existingName.
*
* @group untypedrel
@@ -1735,7 +1735,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with a column dropped. This is a no-op if schema doesn't contain
+ * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain
* column name.
*
* This method can only be used to drop top level columns. the colName string is treated
@@ -1749,7 +1749,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with columns dropped.
+ * Returns a new Dataset with columns dropped.
* This is a no-op if schema doesn't contain column name(s).
*
* This method can only be used to drop top level columns. the colName string is treated literally
@@ -1773,8 +1773,8 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with a column dropped.
- * This version of drop accepts a Column rather than a name.
+ * Returns a new Dataset with a column dropped.
+ * This version of drop accepts a [[Column]] rather than a name.
* This is a no-op if the Dataset doesn't have a column
* with an equivalent expression.
*
@@ -1796,7 +1796,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] that contains only the unique rows from this [[Dataset]].
+ * Returns a new Dataset that contains only the unique rows from this Dataset.
* This is an alias for `distinct`.
*
* @group typedrel
@@ -1805,7 +1805,7 @@ class Dataset[T] private[sql](
def dropDuplicates(): Dataset[T] = dropDuplicates(this.columns)
/**
- * (Scala-specific) Returns a new [[Dataset]] with duplicate rows removed, considering only
+ * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only
* the subset of columns.
*
* @group typedrel
@@ -1825,7 +1825,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] with duplicate rows removed, considering only
+ * Returns a new Dataset with duplicate rows removed, considering only
* the subset of columns.
*
* @group typedrel
@@ -1838,7 +1838,7 @@ class Dataset[T] private[sql](
* If no columns are given, this function computes statistics for all numerical columns.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
- * backward compatibility of the schema of the resulting [[Dataset]]. If you want to
+ * backward compatibility of the schema of the resulting Dataset. If you want to
* programmatically compute summary statistics, use the `agg` function instead.
*
* {{{
@@ -1937,7 +1937,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Scala-specific)
- * Returns a new [[Dataset]] that only contains elements where `func` returns `true`.
+ * Returns a new Dataset that only contains elements where `func` returns `true`.
*
* @group typedrel
* @since 1.6.0
@@ -1954,7 +1954,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Java-specific)
- * Returns a new [[Dataset]] that only contains elements where `func` returns `true`.
+ * Returns a new Dataset that only contains elements where `func` returns `true`.
*
* @group typedrel
* @since 1.6.0
@@ -1971,7 +1971,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Scala-specific)
- * Returns a new [[Dataset]] that contains the result of applying `func` to each element.
+ * Returns a new Dataset that contains the result of applying `func` to each element.
*
* @group typedrel
* @since 1.6.0
@@ -1984,7 +1984,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Java-specific)
- * Returns a new [[Dataset]] that contains the result of applying `func` to each element.
+ * Returns a new Dataset that contains the result of applying `func` to each element.
*
* @group typedrel
* @since 1.6.0
@@ -1998,7 +1998,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Scala-specific)
- * Returns a new [[Dataset]] that contains the result of applying `func` to each partition.
+ * Returns a new Dataset that contains the result of applying `func` to each partition.
*
* @group typedrel
* @since 1.6.0
@@ -2014,7 +2014,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Java-specific)
- * Returns a new [[Dataset]] that contains the result of applying `f` to each partition.
+ * Returns a new Dataset that contains the result of applying `f` to each partition.
*
* @group typedrel
* @since 1.6.0
@@ -2043,7 +2043,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Scala-specific)
- * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]],
+ * Returns a new Dataset by first applying a function to all elements of this Dataset,
* and then flattening the results.
*
* @group typedrel
@@ -2056,7 +2056,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
* (Java-specific)
- * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]],
+ * Returns a new Dataset by first applying a function to all elements of this Dataset,
* and then flattening the results.
*
* @group typedrel
@@ -2080,7 +2080,7 @@ class Dataset[T] private[sql](
/**
* (Java-specific)
- * Runs `func` on each element of this [[Dataset]].
+ * Runs `func` on each element of this Dataset.
*
* @group action
* @since 1.6.0
@@ -2088,7 +2088,7 @@ class Dataset[T] private[sql](
def foreach(func: ForeachFunction[T]): Unit = foreach(func.call(_))
/**
- * Applies a function `f` to each partition of this [[Dataset]].
+ * Applies a function `f` to each partition of this Dataset.
*
* @group action
* @since 1.6.0
@@ -2099,7 +2099,7 @@ class Dataset[T] private[sql](
/**
* (Java-specific)
- * Runs `func` on each partition of this [[Dataset]].
+ * Runs `func` on each partition of this Dataset.
*
* @group action
* @since 1.6.0
@@ -2108,7 +2108,7 @@ class Dataset[T] private[sql](
foreachPartition(it => func.call(it.asJava))
/**
- * Returns the first `n` rows in the [[Dataset]].
+ * Returns the first `n` rows in the Dataset.
*
* Running take requires moving data into the application's driver process, and doing so with
* a very large `n` can crash the driver process with OutOfMemoryError.
@@ -2119,7 +2119,7 @@ class Dataset[T] private[sql](
def take(n: Int): Array[T] = head(n)
/**
- * Returns the first `n` rows in the [[Dataset]] as a list.
+ * Returns the first `n` rows in the Dataset as a list.
*
* Running take requires moving data into the application's driver process, and doing so with
* a very large `n` can crash the driver process with OutOfMemoryError.
@@ -2130,7 +2130,7 @@ class Dataset[T] private[sql](
def takeAsList(n: Int): java.util.List[T] = java.util.Arrays.asList(take(n) : _*)
/**
- * Returns an array that contains all of [[Row]]s in this [[Dataset]].
+ * Returns an array that contains all of [[Row]]s in this Dataset.
*
* Running collect requires moving all the data into the application's driver process, and
* doing so on a very large dataset can crash the driver process with OutOfMemoryError.
@@ -2143,7 +2143,7 @@ class Dataset[T] private[sql](
def collect(): Array[T] = collect(needCallback = true)
/**
- * Returns a Java list that contains all of [[Row]]s in this [[Dataset]].
+ * Returns a Java list that contains all of [[Row]]s in this Dataset.
*
* Running collect requires moving all the data into the application's driver process, and
* doing so on a very large dataset can crash the driver process with OutOfMemoryError.
@@ -2171,9 +2171,9 @@ class Dataset[T] private[sql](
}
/**
- * Return an iterator that contains all of [[Row]]s in this [[Dataset]].
+ * Return an iterator that contains all of [[Row]]s in this Dataset.
*
- * The iterator will consume as much memory as the largest partition in this [[Dataset]].
+ * The iterator will consume as much memory as the largest partition in this Dataset.
*
* Note: this results in multiple Spark jobs, and if the input Dataset is the result
* of a wide transformation (e.g. join with different partitioners), to avoid
@@ -2189,7 +2189,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns the number of rows in the [[Dataset]].
+ * Returns the number of rows in the Dataset.
* @group action
* @since 1.6.0
*/
@@ -2198,7 +2198,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] that has exactly `numPartitions` partitions.
+ * Returns a new Dataset that has exactly `numPartitions` partitions.
*
* @group typedrel
* @since 1.6.0
@@ -2208,7 +2208,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] partitioned by the given partitioning expressions into
+ * Returns a new Dataset partitioned by the given partitioning expressions into
* `numPartitions`. The resulting Dataset is hash partitioned.
*
* This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
@@ -2222,7 +2222,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] partitioned by the given partitioning expressions, using
+ * Returns a new Dataset partitioned by the given partitioning expressions, using
* `spark.sql.shuffle.partitions` as number of partitions.
* The resulting Dataset is hash partitioned.
*
@@ -2237,7 +2237,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] that has exactly `numPartitions` partitions.
+ * Returns a new Dataset that has exactly `numPartitions` partitions.
* Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g.
* if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
* the 100 new partitions will claim 10 of the current partitions.
@@ -2250,7 +2250,7 @@ class Dataset[T] private[sql](
}
/**
- * Returns a new [[Dataset]] that contains only the unique rows from this [[Dataset]].
+ * Returns a new Dataset that contains only the unique rows from this Dataset.
* This is an alias for `dropDuplicates`.
*
* Note that, equality checking is performed directly on the encoded representation of the data
@@ -2262,7 +2262,7 @@ class Dataset[T] private[sql](
def distinct(): Dataset[T] = dropDuplicates()
/**
- * Persist this [[Dataset]] with the default storage level (`MEMORY_AND_DISK`).
+ * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
*
* @group basic
* @since 1.6.0
@@ -2273,7 +2273,7 @@ class Dataset[T] private[sql](
}
/**
- * Persist this [[Dataset]] with the default storage level (`MEMORY_AND_DISK`).
+ * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
*
* @group basic
* @since 1.6.0
@@ -2281,7 +2281,7 @@ class Dataset[T] private[sql](
def cache(): this.type = persist()
/**
- * Persist this [[Dataset]] with the given storage level.
+ * Persist this Dataset with the given storage level.
* @param newLevel One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`,
* `MEMORY_AND_DISK_SER`, `DISK_ONLY`, `MEMORY_ONLY_2`,
* `MEMORY_AND_DISK_2`, etc.
@@ -2295,7 +2295,7 @@ class Dataset[T] private[sql](
}
/**
- * Mark the [[Dataset]] as non-persistent, and remove all blocks for it from memory and disk.
+ * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk.
*
* @param blocking Whether to block until all blocks are deleted.
*
@@ -2308,7 +2308,7 @@ class Dataset[T] private[sql](
}
/**
- * Mark the [[Dataset]] as non-persistent, and remove all blocks for it from memory and disk.
+ * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk.
*
* @group basic
* @since 1.6.0
@@ -2316,7 +2316,7 @@ class Dataset[T] private[sql](
def unpersist(): this.type = unpersist(blocking = false)
/**
- * Represents the content of the [[Dataset]] as an [[RDD]] of [[T]].
+ * Represents the content of the Dataset as an [[RDD]] of [[T]].
*
* @group basic
* @since 1.6.0
@@ -2330,21 +2330,21 @@ class Dataset[T] private[sql](
}
/**
- * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s.
+ * Returns the content of the Dataset as a [[JavaRDD]] of [[Row]]s.
* @group basic
* @since 1.6.0
*/
def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD()
/**
- * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s.
+ * Returns the content of the Dataset as a [[JavaRDD]] of [[Row]]s.
* @group basic
* @since 1.6.0
*/
def javaRDD: JavaRDD[T] = toJavaRDD
/**
- * Registers this [[Dataset]] as a temporary table using the given name. The lifetime of this
+ * Registers this Dataset as a temporary table using the given name. The lifetime of this
* temporary table is tied to the [[SparkSession]] that was used to create this Dataset.
*
* @group basic
@@ -2394,7 +2394,7 @@ class Dataset[T] private[sql](
/**
* :: Experimental ::
- * Interface for saving the content of the [[Dataset]] out into external storage or streams.
+ * Interface for saving the content of the Dataset out into external storage or streams.
*
* @group basic
* @since 1.6.0
@@ -2403,7 +2403,7 @@ class Dataset[T] private[sql](
def write: DataFrameWriter = new DataFrameWriter(toDF())
/**
- * Returns the content of the [[Dataset]] as a Dataset of JSON strings.
+ * Returns the content of the Dataset as a Dataset of JSON strings.
* @since 2.0.0
*/
def toJSON: Dataset[String] = {