aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala17
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala62
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala24
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala20
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala2
38 files changed, 143 insertions, 133 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 65f9142964..a821d2ca34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -343,7 +343,7 @@ trait Row extends Serializable {
}
/**
- * Returns a Map(name -> value) for the requested fieldNames
+ * Returns a Map(name -> value) for the requested fieldNames
* For primitive types if value is null it returns 'zero value' specific for primitive
* ie. 0 for Int - use isNullAt to ensure that value is not null
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 302054708c..1a93f45903 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -37,8 +37,8 @@ import org.apache.spark.sql.types._
* - Xiangrui Meng. "Simpler Online Updates for Arbitrary-Order Central Moments."
* 2015. http://arxiv.org/abs/1510.04923
*
- * @see [[https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- * Algorithms for calculating variance (Wikipedia)]]
+ * @see <a href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * Algorithms for calculating variance (Wikipedia)</a>
*
* @param child to compute central moments of.
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index a4a358a242..02c8318b4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
/**
* The data type representing `Array[Byte]` values.
- * Please use the singleton [[DataTypes.BinaryType]].
+ * Please use the singleton `DataTypes.BinaryType`.
*/
@InterfaceStability.Stable
class BinaryType private() extends AtomicType {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index 059f89f9cd..cee78f4b4a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Boolean` values. Please use the singleton [[DataTypes.BooleanType]].
+ * The data type representing `Boolean` values. Please use the singleton `DataTypes.BooleanType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index bc6251f024..b1dd5eda36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Byte` values. Please use the singleton [[DataTypes.ByteType]].
+ * The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index 21f3497ba0..2342036a57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.InterfaceStability
* The data type representing calendar time intervals. The calendar time interval is stored
* internally in two components: number of months the number of microseconds.
*
- * Please use the singleton [[DataTypes.CalendarIntervalType]].
+ * Please use the singleton `DataTypes.CalendarIntervalType`.
*
* @note Calendar intervals are not comparable.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 8d0ecc051f..0c0574b845 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
* A date type, supporting "0001-01-01" through "9999-12-31".
*
- * Please use the singleton [[DataTypes.DateType]].
+ * Please use the singleton `DataTypes.DateType`.
*
* Internally, this is represented as the number of days from 1970-01-01.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index d7ca0cbeed..cecad3b7b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
*
* The default precision and scale is (10, 0).
*
- * Please use [[DataTypes.createDecimalType()]] to create a specific instance.
+ * Please use `DataTypes.createDecimalType()` to create a specific instance.
*
* @since 1.3.0
*/
@@ -92,7 +92,7 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
}
/**
- * The default size of a value of the DecimalType is 8 bytes (precision <= 18) or 16 bytes.
+ * The default size of a value of the DecimalType is 8 bytes (precision &lt;= 18) or 16 bytes.
*/
override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index c21ac0e43e..400f7aed6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.util.Utils
/**
- * The data type representing `Double` values. Please use the singleton [[DataTypes.DoubleType]].
+ * The data type representing `Double` values. Please use the singleton `DataTypes.DoubleType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index c5bf8883ba..b9812b236d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.util.Utils
/**
- * The data type representing `Float` values. Please use the singleton [[DataTypes.FloatType]].
+ * The data type representing `Float` values. Please use the singleton `DataTypes.FloatType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index 724e59c0bc..dca612ecbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Int` values. Please use the singleton [[DataTypes.IntegerType]].
+ * The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
index 42285a9d0a..396c335570 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Long` values. Please use the singleton [[DataTypes.LongType]].
+ * The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 3a32aa43d1..fbf3a61786 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.InterfaceStability
/**
* The data type for Maps. Keys in a map are not allowed to have `null` values.
*
- * Please use [[DataTypes.createMapType()]] to create a specific instance.
+ * Please use `DataTypes.createMapType()` to create a specific instance.
*
* @param keyType The data type of map keys.
* @param valueType The data type of map values.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
index bdf9a819d0..494225b47a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.InterfaceStability
/**
- * The data type representing `NULL` values. Please use the singleton [[DataTypes.NullType]].
+ * The data type representing `NULL` values. Please use the singleton `DataTypes.NullType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 3fee299d57..1410d5ba0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Short` values. Please use the singleton [[DataTypes.ShortType]].
+ * The data type representing `Short` values. Please use the singleton `DataTypes.ShortType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 5d5a6f52a3..d1c0da3479 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.unsafe.types.UTF8String
/**
- * The data type representing `String` values. Please use the singleton [[DataTypes.StringType]].
+ * The data type representing `String` values. Please use the singleton `DataTypes.StringType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index 4540d8358a..2875995420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
* The data type representing `java.sql.Timestamp` values.
- * Please use the singleton [[DataTypes.TimestampType]].
+ * Please use the singleton `DataTypes.TimestampType`.
*
* @since 1.3.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index a77937efd7..5be9a99369 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -239,8 +239,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
}
/**
- * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
- * and returns the result as a [[DataFrame]].
+ * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
* See the documentation on the overloaded `json()` method with varargs for more details.
*
* @since 1.4.0
@@ -251,8 +251,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
}
/**
- * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
- * and returns the result as a [[DataFrame]].
+ * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
*
* This function goes through the input once to determine the input schema. If you know the
* schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -297,8 +297,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
def json(paths: String*): DataFrame = format("json").load(paths : _*)
/**
- * Loads a `JavaRDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format
- * or newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+ * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON
+ * Lines text format or newline-delimited JSON</a>) and returns the result as
+ * a [[DataFrame]].
*
* Unless the schema is specified using [[schema]] function, this function goes through the
* input once to determine the input schema.
@@ -309,8 +310,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd)
/**
- * Loads an `RDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format or
- * newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+ * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+ * text format or newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
*
* Unless the schema is specified using [[schema]] function, this function goes through the
* input once to determine the input schema.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 6335fc4579..a9a861c463 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -48,8 +48,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
*
* This method implements a variation of the Greenwald-Khanna algorithm (with some speed
* optimizations).
- * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
- * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+ * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+ * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
*
* @param col the name of the numerical column
* @param probabilities a list of quantile probabilities
@@ -184,7 +184,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+ * Schenker, and Papadimitriou.
* The `support` should be greater than 1e-4.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -230,7 +231,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+ * Schenker, and Papadimitriou.
* Uses a `default` support of 1%.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -248,7 +250,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
* backward compatibility of the schema of the resulting [[DataFrame]].
@@ -291,7 +294,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
* Uses a `default` support of 1%.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 15281f24fa..2d863422fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -442,8 +442,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
}
/**
- * Saves the content of the [[DataFrame]] in JSON format ([[http://jsonlines.org/ JSON Lines text
- * format or newline-delimited JSON]]) at the specified path.
+ * Saves the content of the [[DataFrame]] in JSON format (<a href="http://jsonlines.org/">
+ * JSON Lines text format or newline-delimited JSON</a>) at the specified path.
* This is equivalent to:
* {{{
* format("json").save(path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 2fae93651b..858fa4c760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -172,7 +172,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
def experimental: ExperimentalMethods = sparkSession.experimental
/**
- * Returns a [[DataFrame]] with no rows or columns.
+ * Returns a `DataFrame` with no rows or columns.
*
* @group basic
* @since 1.3.0
@@ -254,7 +254,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
* (Scala-specific) Implicit methods available in Scala for converting
- * common Scala objects into [[DataFrame]]s.
+ * common Scala objects into `DataFrame`s.
*
* {{{
* val sqlContext = new SQLContext(sc)
@@ -298,7 +298,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
+ * Convert a [[BaseRelation]] created for external data sources into a `DataFrame`.
*
* @group dataframes
* @since 1.3.0
@@ -309,7 +309,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from an [[RDD]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
* the provided schema. Otherwise, there will be runtime exception.
* Example:
@@ -438,7 +438,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from a [[JavaRDD]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
* the provided schema. Otherwise, there will be runtime exception.
*
@@ -453,7 +453,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided List matches
* the provided schema. Otherwise, there will be runtime exception.
*
@@ -504,7 +504,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
- * [[DataFrame]].
+ * `DataFrame`.
* {{{
* sqlContext.read.parquet("/path/to/file.parquet")
* sqlContext.read.schema(schema).json("/path/to/file.json")
@@ -518,7 +518,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Returns a [[DataStreamReader]] that can be used to read streaming data in as a [[DataFrame]].
+ * Returns a [[DataStreamReader]] that can be used to read streaming data in as a `DataFrame`.
* {{{
* sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
* sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
@@ -617,7 +617,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Registers the given [[DataFrame]] as a temporary table in the catalog. Temporary tables exist
+ * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
* only during the lifetime of this instance of SQLContext.
*/
private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = {
@@ -638,7 +638,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from 0 to `end` (exclusive) with step value 1.
*
* @since 1.4.1
@@ -650,7 +650,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with step value 1.
*
* @since 1.4.0
@@ -662,7 +662,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with a step value.
*
* @since 2.0.0
@@ -676,7 +676,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in an range from `start` to `end` (exclusive) with an step value, with partition number
* specified.
*
@@ -690,7 +690,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. The dialect that is
+ * Executes a SQL query using Spark, returning the result as a `DataFrame`. The dialect that is
* used for SQL parsing can be configured with 'spark.sql.dialect'.
*
* @group basic
@@ -699,7 +699,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
def sql(sqlText: String): DataFrame = sparkSession.sql(sqlText)
/**
- * Returns the specified table as a [[DataFrame]].
+ * Returns the specified table as a `DataFrame`.
*
* @group ddl_ops
* @since 1.3.0
@@ -709,7 +709,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Returns a [[DataFrame]] containing names of existing tables in the current database.
+ * Returns a `DataFrame` containing names of existing tables in the current database.
* The returned DataFrame has two columns, tableName and isTemporary (a Boolean
* indicating if a table is a temporary one or not).
*
@@ -721,7 +721,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Returns a [[DataFrame]] containing names of existing tables in the given database.
+ * Returns a `DataFrame` containing names of existing tables in the given database.
* The returned DataFrame has two columns, tableName and isTemporary (a Boolean
* indicating if a table is a temporary one or not).
*
@@ -799,8 +799,8 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
- * [[DataFrame]] if no paths are passed in.
+ * Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
+ * `DataFrame` if no paths are passed in.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().parquet()`.
@@ -816,7 +816,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
+ * Loads a JSON file (one object per line), returning the result as a `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -829,7 +829,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads a JSON file (one object per line) and applies the given schema,
- * returning the result as a [[DataFrame]].
+ * returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -850,7 +850,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
+ * `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -861,7 +861,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
+ * `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -872,7 +872,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
- * returning the result as a [[DataFrame]].
+ * returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -884,7 +884,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -896,7 +896,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -908,7 +908,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -995,7 +995,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table.
*
* @group specificdata
@@ -1007,7 +1007,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. Partitions of the table will be retrieved in parallel based on the parameters
* passed to this function.
*
@@ -1031,10 +1031,10 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. The theParts parameter gives a list expressions
* suitable for inclusion in WHERE clauses; each one defines one partition
- * of the [[DataFrame]].
+ * of the `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index b9dbfcf773..cdb755edc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -69,7 +69,8 @@ object FrequentItems extends Logging {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
* The `support` should be greater than 1e-4.
* For Internal use only.
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index c02b154987..2b2e706125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -41,8 +41,8 @@ object StatFunctions extends Logging {
*
* This method implements a variation of the Greenwald-Khanna algorithm (with some speed
* optimizations).
- * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
- * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+ * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+ * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
*
* @param df the dataframe
* @param cols numerical columns of the dataframe
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index eea9841400..058c38c8cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
/**
* :: Experimental ::
- * A base class for user-defined aggregations, which can be used in [[Dataset]] operations to take
+ * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
* all of the elements of a group and reduce them to a single value.
*
* For example, the following aggregator extracts an `int` from a specific class and adds them up:
@@ -80,19 +80,19 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
def finish(reduction: BUF): OUT
/**
- * Specifies the [[Encoder]] for the intermediate value type.
+ * Specifies the `Encoder` for the intermediate value type.
* @since 2.0.0
*/
def bufferEncoder: Encoder[BUF]
/**
- * Specifies the [[Encoder]] for the final ouput value type.
+ * Specifies the `Encoder` for the final ouput value type.
* @since 2.0.0
*/
def outputEncoder: Encoder[OUT]
/**
- * Returns this `Aggregator` as a [[TypedColumn]] that can be used in [[Dataset]].
+ * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`.
* operations.
* @since 1.6.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 36dd5f78ac..b13fe70160 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.functions
import org.apache.spark.sql.types.DataType
/**
- * A user-defined function. To create one, use the `udf` functions in [[functions]].
+ * A user-defined function. To create one, use the `udf` functions in `functions`.
*
* As an example:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 327bc379d4..f3cf3052ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -117,8 +117,8 @@ object Window {
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -148,9 +148,9 @@ object Window {
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
@@ -166,8 +166,8 @@ object Window {
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -200,9 +200,9 @@ object Window {
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 4a8ce695bd..de7d7a1772 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -85,8 +85,8 @@ class WindowSpec private[sql](
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `[Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -116,9 +116,9 @@ class WindowSpec private[sql](
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rowsBetween.
@@ -133,8 +133,8 @@ class WindowSpec private[sql](
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `[Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -167,9 +167,9 @@ class WindowSpec private[sql](
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index aa71cb9e3b..650ffd4586 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.aggregate._
/**
* :: Experimental ::
- * Type-safe functions available for [[Dataset]] operations in Scala.
+ * Type-safe functions available for `Dataset` operations in Scala.
*
* Java users should use [[org.apache.spark.sql.expressions.javalang.typed]].
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index bc9788d81f..4976b875fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -32,9 +32,9 @@ import org.apache.spark.sql.types._
abstract class UserDefinedAggregateFunction extends Serializable {
/**
- * A [[StructType]] represents data types of input arguments of this aggregate function.
+ * A `StructType` represents data types of input arguments of this aggregate function.
* For example, if a [[UserDefinedAggregateFunction]] expects two input arguments
- * with type of [[DoubleType]] and [[LongType]], the returned [[StructType]] will look like
+ * with type of `DoubleType` and `LongType`, the returned `StructType` will look like
*
* ```
* new StructType()
@@ -42,7 +42,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
* .add("longInput", LongType)
* ```
*
- * The name of a field of this [[StructType]] is only used to identify the corresponding
+ * The name of a field of this `StructType` is only used to identify the corresponding
* input argument. Users can choose names to identify the input arguments.
*
* @since 1.5.0
@@ -50,10 +50,10 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def inputSchema: StructType
/**
- * A [[StructType]] represents data types of values in the aggregation buffer.
+ * A `StructType` represents data types of values in the aggregation buffer.
* For example, if a [[UserDefinedAggregateFunction]]'s buffer has two values
- * (i.e. two intermediate values) with type of [[DoubleType]] and [[LongType]],
- * the returned [[StructType]] will look like
+ * (i.e. two intermediate values) with type of `DoubleType` and `LongType`,
+ * the returned `StructType` will look like
*
* ```
* new StructType()
@@ -61,7 +61,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
* .add("longInput", LongType)
* ```
*
- * The name of a field of this [[StructType]] is only used to identify the corresponding
+ * The name of a field of this `StructType` is only used to identify the corresponding
* buffer value. Users can choose names to identify the input arguments.
*
* @since 1.5.0
@@ -69,7 +69,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def bufferSchema: StructType
/**
- * The [[DataType]] of the returned value of this [[UserDefinedAggregateFunction]].
+ * The `DataType` of the returned value of this [[UserDefinedAggregateFunction]].
*
* @since 1.5.0
*/
@@ -121,7 +121,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def evaluate(buffer: Row): Any
/**
- * Creates a [[Column]] for this UDAF using given [[Column]]s as input arguments.
+ * Creates a `Column` for this UDAF using given `Column`s as input arguments.
*
* @since 1.5.0
*/
@@ -136,8 +136,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
}
/**
- * Creates a [[Column]] for this UDAF using the distinct values of the given
- * [[Column]]s as input arguments.
+ * Creates a `Column` for this UDAF using the distinct values of the given
+ * `Column`s as input arguments.
*
* @since 1.5.0
*/
@@ -153,7 +153,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
}
/**
- * A [[Row]] representing a mutable aggregation buffer.
+ * A `Row` representing a mutable aggregation buffer.
*
* This is not meant to be extended outside of Spark.
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7c64e28d24..83857c322a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -40,7 +40,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
* SQL dialect of a certain database or jdbc driver.
* Lots of databases define types that aren't explicitly supported
* by the JDBC spec. Some JDBC drivers also report inaccurate
- * information---for instance, BIT(n>1) being reported as a BIT type is quite
+ * information---for instance, BIT(n&gt;1) being reported as a BIT type is quite
* common, even though BIT in JDBC is meant for single-bit values. Also, there
* does not appear to be a standard name for an unbounded string or binary
* type; we use BLOB and CLOB by default but override with database-specific
@@ -134,7 +134,7 @@ abstract class JdbcDialect extends Serializable {
/**
* :: DeveloperApi ::
- * Registry of dialects that apply to every new jdbc [[org.apache.spark.sql.DataFrame]].
+ * Registry of dialects that apply to every new jdbc `org.apache.spark.sql.DataFrame`.
*
* If multiple matching dialects are registered then all matching ones will be
* tried in reverse order. A user-added dialect will thus be applied first,
@@ -148,7 +148,7 @@ abstract class JdbcDialect extends Serializable {
object JdbcDialects {
/**
- * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]].
+ * Register a dialect for use on all new matching jdbc `org.apache.spark.sql.DataFrame`.
* Reading an existing dialect will cause a move-to-front.
*
* @param dialect The new dialect.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 40b482e4c0..c50733534e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.execution.streaming.StreamingRelation
import org.apache.spark.sql.types.StructType
/**
- * Interface used to load a streaming [[Dataset]] from external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[SparkSession.readStream]] to access this.
+ * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.readStream` to access this.
*
* @since 2.0.0
*/
@@ -109,7 +109,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
/**
- * Loads input data stream in as a [[DataFrame]], for data streams that don't require a path
+ * Loads input data stream in as a `DataFrame`, for data streams that don't require a path
* (e.g. external key-value stores).
*
* @since 2.0.0
@@ -125,7 +125,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads input in as a [[DataFrame]], for data streams that read from some path.
+ * Loads input in as a `DataFrame`, for data streams that read from some path.
*
* @since 2.0.0
*/
@@ -134,8 +134,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads a JSON file stream ([[http://jsonlines.org/ JSON Lines text format or newline-delimited
- * JSON]]) and returns the result as a [[DataFrame]].
+ * Loads a JSON file stream (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a `DataFrame`.
*
* This function goes through the input once to determine the input schema. If you know the
* schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -181,7 +181,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def json(path: String): DataFrame = format("json").load(path)
/**
- * Loads a CSV file stream and returns the result as a [[DataFrame]].
+ * Loads a CSV file stream and returns the result as a `DataFrame`.
*
* This function will go through the input once to determine the input schema if `inferSchema`
* is enabled. To avoid going through the entire data once, disable `inferSchema` option or
@@ -243,7 +243,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def csv(path: String): DataFrame = format("csv").load(path)
/**
- * Loads a Parquet file stream, returning the result as a [[DataFrame]].
+ * Loads a Parquet file stream, returning the result as a `DataFrame`.
*
* You can set the following Parquet-specific option(s) for reading Parquet files:
* <ul>
@@ -262,7 +262,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named
+ * Loads text files and returns a `DataFrame` whose schema starts with a string column named
* "value", and followed by partitioned columns if there are any.
*
* Each line in the text files is a new row in the resulting DataFrame. For example:
@@ -285,7 +285,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def text(path: String): DataFrame = format("text").load(path)
/**
- * Loads text file(s) and returns a [[Dataset]] of String. The underlying schema of the Dataset
+ * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
* contains a single string column named "value".
*
* If the directory structure of the text files contains partitioning information, those are
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index daed1dcb77..b3c600ae53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, Memory
/**
* :: Experimental ::
- * Interface used to write a streaming [[Dataset]] to external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[Dataset.writeStream]] to access this.
+ * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `Dataset.writeStream` to access this.
*
* @since 2.0.0
*/
@@ -273,8 +273,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
/**
* Starts the execution of the streaming query, which will continually send results to the given
- * [[ForeachWriter]] as as new data arrives. The [[ForeachWriter]] can be used to send the data
- * generated by the [[DataFrame]]/[[Dataset]] to an external system.
+ * `ForeachWriter` as as new data arrives. The `ForeachWriter` can be used to send the data
+ * generated by the `DataFrame`/`Dataset` to an external system.
*
* Scala example:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 0a85414451..374313f2ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -31,7 +31,7 @@ trait StreamingQuery {
/**
* Returns the name of the query. This name is unique across all active queries. This can be
- * set in the [[org.apache.spark.sql.DataStreamWriter DataStreamWriter]] as
+ * set in the `org.apache.spark.sql.streaming.DataStreamWriter` as
* `dataframe.writeStream.queryName("query").start()`.
* @since 2.0.0
*/
@@ -45,7 +45,7 @@ trait StreamingQuery {
def id: Long
/**
- * Returns the [[SparkSession]] associated with `this`.
+ * Returns the `SparkSession` associated with `this`.
* @since 2.0.0
*/
def sparkSession: SparkSession
@@ -90,10 +90,11 @@ trait StreamingQuery {
* immediately (if the query was terminated by `stop()`), or throw the exception
* immediately (if the query has terminated with exception).
*
- * @throws StreamingQueryException, if `this` query has terminated with an exception.
+ * @throws StreamingQueryException if the query has terminated with an exception.
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitTermination(): Unit
/**
@@ -106,10 +107,11 @@ trait StreamingQuery {
* `true` immediately (if the query was terminated by `stop()`), or throw the exception
* immediately (if the query has terminated with exception).
*
- * @throws StreamingQueryException, if `this` query has terminated with an exception
+ * @throws StreamingQueryException if the query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitTermination(timeoutMs: Long): Boolean
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index bba7bc753e..53968a82d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
/**
* :: Experimental ::
- * A class to manage all the [[StreamingQuery]] active on a [[SparkSession]].
+ * A class to manage all the [[StreamingQuery]] active on a `SparkSession`.
*
* @since 2.0.0
*/
@@ -81,10 +81,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
* users need to stop all of them after any of them terminates with exception, and then check the
* `query.exception()` for each query.
*
- * @throws StreamingQueryException, if any query has terminated with an exception
+ * @throws StreamingQueryException if any query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitAnyTermination(): Unit = {
awaitTerminationLock.synchronized {
while (lastTerminatedQuery == null) {
@@ -113,10 +114,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
* users need to stop all of them after any of them terminates with exception, and then check the
* `query.exception()` for each query.
*
- * @throws StreamingQueryException, if any query has terminated with an exception
+ * @throws StreamingQueryException if any query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitAnyTermination(timeoutMs: Long): Boolean = {
val startTime = System.currentTimeMillis
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 4504582187..26ad0eadd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -68,7 +68,7 @@ trait QueryExecutionListener {
/**
* :: Experimental ::
*
- * Manager for [[QueryExecutionListener]]. See [[org.apache.spark.sql.SQLContext.listenerManager]].
+ * Manager for [[QueryExecutionListener]]. See `org.apache.spark.sql.SQLContext.listenerManager`.
*/
@Experimental
@InterfaceStability.Evolving
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index e333fc7feb..a2d64da001 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -57,9 +57,9 @@ import org.apache.spark.util.SerializableJobConf
* @param partition a map from the partition key to the partition value (optional). If the partition
* value is optional, dynamic partition insert will be performed.
* As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- * Map('a' -> Some('1'), 'b' -> Some('2')),
+ * Map('a' -&gt; Some('1'), 'b' -&gt; Some('2')),
* and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- * would have Map('a' -> Some('1'), 'b' -> None).
+ * would have Map('a' -&gt; Some('1'), 'b' -&gt; None).
* @param child the logical plan representing data to write to.
* @param overwrite overwrite existing table or partitions.
* @param ifNotExists If true, only write if the table or partition does not exist.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 42c92ed5ca..0a7631f782 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -42,8 +42,8 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.util.SerializableConfiguration
/**
- * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update
- * [[DataSource]]'s backwardCompatibilityMap.
+ * `FileFormat` for reading ORC files. If this is moved or renamed, please update
+ * `DataSource`'s backwardCompatibilityMap.
*/
class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index f5db73b715..3f1f86c278 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -38,7 +38,7 @@ private[orc] object OrcFileOperator extends Logging {
* 1. Retrieving file metadata (schema and compression codecs, etc.)
* 2. Read the actual file content (in this case, the given path should point to the target file)
*
- * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code) to an
+ * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code>) to an
* ORC file if the file contains zero rows. This is OK for Hive since the schema of the
* table is managed by metastore. But this becomes a problem when reading ORC files
* directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC