aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-05-12 18:37:02 -0700
committerReynold Xin <rxin@databricks.com>2015-05-12 18:37:02 -0700
commit8fd55358b7fc1c7545d823bef7b39769f731c1ee (patch)
tree593ec8df076fcdc8d8ad952e3d2df97c58773378
parent1b9e434b6c19f23a01e9875a3c1966cd03ce8e2d (diff)
downloadspark-8fd55358b7fc1c7545d823bef7b39769f731c1ee.tar.gz
spark-8fd55358b7fc1c7545d823bef7b39769f731c1ee.tar.bz2
spark-8fd55358b7fc1c7545d823bef7b39769f731c1ee.zip
[SPARK-7588] Document all SQL/DataFrame public methods with @since tag
This pull request adds since tag to all public methods/classes in SQL/DataFrame to indicate which version the methods/classes were first added. Author: Reynold Xin <rxin@databricks.com> Closes #6101 from rxin/tbc and squashes the following commits: ed55e11 [Reynold Xin] Add since version to all DataFrame methods.
-rw-r--r--sql/core/src/main/java/org/apache/spark/sql/SaveMode.java10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala145
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala103
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala44
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala18
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala22
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala94
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala1
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala49
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala140
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala30
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala57
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala6
17 files changed, 706 insertions, 26 deletions
diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
index a40be526d0..9665c3c46f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
@@ -18,28 +18,38 @@ package org.apache.spark.sql;
/**
* SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+ *
+ * @since 1.3.0
*/
public enum SaveMode {
/**
* Append mode means that when saving a DataFrame to a data source, if data/table already exists,
* contents of the DataFrame are expected to be appended to existing data.
+ *
+ * @since 1.3.0
*/
Append,
/**
* Overwrite mode means that when saving a DataFrame to a data source,
* if data/table already exists, existing data is expected to be overwritten by the contents of
* the DataFrame.
+ *
+ * @since 1.3.0
*/
Overwrite,
/**
* ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
* an exception is expected to be thrown.
+ *
+ * @since 1.3.0
*/
ErrorIfExists,
/**
* Ignore mode means that when saving a DataFrame to a data source, if data already exists,
* the save operation is expected to not save the contents of the DataFrame and to not
* change the existing data.
+ *
+ * @since 1.3.0
*/
Ignore
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4d50821620..4773dedf72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -45,6 +45,8 @@ private[sql] object Column {
* @groupname expr_ops Expression operators.
* @groupname df_ops DataFrame functions.
* @groupname Ungrouped Support functions for DataFrames.
+ *
+ * @since 1.3.0
*/
@Experimental
class Column(protected[sql] val expr: Expression) extends Logging {
@@ -77,6 +79,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* of every struct in that array, and return an Array of fields
*
* @group expr_ops
+ * @since 1.4.0
*/
def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr)
@@ -92,6 +95,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def unary_- : Column = UnaryMinus(expr)
@@ -107,6 +111,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def unary_! : Column = Not(expr)
@@ -122,6 +127,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def === (other: Any): Column = {
val right = lit(other).expr
@@ -145,6 +151,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def equalTo(other: Any): Column = this === other
@@ -161,6 +168,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr))
@@ -177,6 +185,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr))
@@ -192,6 +201,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def > (other: Any): Column = GreaterThan(expr, lit(other).expr)
@@ -207,6 +217,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def gt(other: Any): Column = this > other
@@ -221,6 +232,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def < (other: Any): Column = LessThan(expr, lit(other).expr)
@@ -235,6 +247,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def lt(other: Any): Column = this < other
@@ -249,6 +262,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr)
@@ -263,6 +277,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def leq(other: Any): Column = this <= other
@@ -277,6 +292,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr)
@@ -291,6 +307,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def geq(other: Any): Column = this >= other
@@ -298,6 +315,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Equality test that is safe for null values.
*
* @group expr_ops
+ * @since 1.3.0
*/
def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr)
@@ -305,6 +323,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Equality test that is safe for null values.
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def eqNullSafe(other: Any): Column = this <=> other
@@ -312,6 +331,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* True if the current column is between the lower bound and upper bound, inclusive.
*
* @group java_expr_ops
+ * @since 1.4.0
*/
def between(lowerBound: Any, upperBound: Any): Column = {
(this >= lowerBound) && (this <= upperBound)
@@ -321,6 +341,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* True if the current expression is null.
*
* @group expr_ops
+ * @since 1.3.0
*/
def isNull: Column = IsNull(expr)
@@ -328,6 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* True if the current expression is NOT null.
*
* @group expr_ops
+ * @since 1.3.0
*/
def isNotNull: Column = IsNotNull(expr)
@@ -342,6 +364,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def || (other: Any): Column = Or(expr, lit(other).expr)
@@ -356,6 +379,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def or(other: Column): Column = this || other
@@ -370,6 +394,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def && (other: Any): Column = And(expr, lit(other).expr)
@@ -384,6 +409,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def and(other: Column): Column = this && other
@@ -398,6 +424,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def + (other: Any): Column = Add(expr, lit(other).expr)
@@ -412,6 +439,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def plus(other: Any): Column = this + other
@@ -426,6 +454,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def - (other: Any): Column = Subtract(expr, lit(other).expr)
@@ -440,6 +469,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def minus(other: Any): Column = this - other
@@ -454,6 +484,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def * (other: Any): Column = Multiply(expr, lit(other).expr)
@@ -468,6 +499,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def multiply(other: Any): Column = this * other
@@ -482,6 +514,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def / (other: Any): Column = Divide(expr, lit(other).expr)
@@ -496,6 +529,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def divide(other: Any): Column = this / other
@@ -503,6 +537,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Modulo (a.k.a. remainder) expression.
*
* @group expr_ops
+ * @since 1.3.0
*/
def % (other: Any): Column = Remainder(expr, lit(other).expr)
@@ -510,6 +545,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Modulo (a.k.a. remainder) expression.
*
* @group java_expr_ops
+ * @since 1.3.0
*/
def mod(other: Any): Column = this % other
@@ -518,6 +554,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* by the evaluated values of the arguments.
*
* @group expr_ops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def in(list: Column*): Column = In(expr, list.map(_.expr))
@@ -526,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* SQL like expression.
*
* @group expr_ops
+ * @since 1.3.0
*/
def like(literal: String): Column = Like(expr, lit(literal).expr)
@@ -533,6 +571,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* SQL RLIKE expression (LIKE with Regex).
*
* @group expr_ops
+ * @since 1.3.0
*/
def rlike(literal: String): Column = RLike(expr, lit(literal).expr)
@@ -541,6 +580,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* or gets a value by key `key` in a [[MapType]].
*
* @group expr_ops
+ * @since 1.3.0
*/
def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key))
@@ -548,6 +588,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* An expression that gets a field by name in a [[StructType]].
*
* @group expr_ops
+ * @since 1.3.0
*/
def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName))
@@ -557,6 +598,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* @param len expression for the length of the substring.
*
* @group expr_ops
+ * @since 1.3.0
*/
def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr)
@@ -566,6 +608,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* @param len length of the substring.
*
* @group expr_ops
+ * @since 1.3.0
*/
def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr)
@@ -573,6 +616,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Contains the other element.
*
* @group expr_ops
+ * @since 1.3.0
*/
def contains(other: Any): Column = Contains(expr, lit(other).expr)
@@ -580,6 +624,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* String starts with.
*
* @group expr_ops
+ * @since 1.3.0
*/
def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr)
@@ -587,6 +632,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* String starts with another string literal.
*
* @group expr_ops
+ * @since 1.3.0
*/
def startsWith(literal: String): Column = this.startsWith(lit(literal))
@@ -594,6 +640,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* String ends with.
*
* @group expr_ops
+ * @since 1.3.0
*/
def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr)
@@ -601,6 +648,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* String ends with another string literal.
*
* @group expr_ops
+ * @since 1.3.0
*/
def endsWith(literal: String): Column = this.endsWith(lit(literal))
@@ -612,6 +660,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def as(alias: String): Column = Alias(expr, alias)()
@@ -623,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def as(alias: Symbol): Column = Alias(expr, alias.name)()
@@ -634,6 +684,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def as(alias: String, metadata: Metadata): Column = {
Alias(expr, alias)(explicitMetadata = Some(metadata))
@@ -651,6 +702,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def cast(to: DataType): Column = expr match {
// Lift alias out of cast so we can support col.as("name").cast(IntegerType)
@@ -668,6 +720,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def cast(to: String): Column = cast(DataTypeParser.parse(to))
@@ -682,6 +735,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def desc: Column = SortOrder(expr, Descending)
@@ -696,6 +750,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.3.0
*/
def asc: Column = SortOrder(expr, Ascending)
@@ -703,6 +758,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* Prints the expression to the console for debugging purpose.
*
* @group df_ops
+ * @since 1.3.0
*/
def explain(extended: Boolean): Unit = {
if (extended) {
@@ -719,6 +775,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.4.0
*/
def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr)
@@ -729,6 +786,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.4.0
*/
def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr)
@@ -739,6 +797,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* }}}
*
* @group expr_ops
+ * @since 1.4.0
*/
def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr)
@@ -748,61 +807,115 @@ class Column(protected[sql] val expr: Expression) extends Logging {
/**
* :: Experimental ::
* A convenient class used for constructing schema.
+ *
+ * @since 1.3.0
*/
@Experimental
class ColumnName(name: String) extends Column(name) {
- /** Creates a new AttributeReference of type boolean */
+ /**
+ * Creates a new [[StructField]] of type boolean.
+ * @since 1.3.0
+ */
def boolean: StructField = StructField(name, BooleanType)
- /** Creates a new AttributeReference of type byte */
+ /**
+ * Creates a new [[StructField]] of type byte.
+ * @since 1.3.0
+ */
def byte: StructField = StructField(name, ByteType)
- /** Creates a new AttributeReference of type short */
+ /**
+ * Creates a new [[StructField]] of type short.
+ * @since 1.3.0
+ */
def short: StructField = StructField(name, ShortType)
- /** Creates a new AttributeReference of type int */
+ /**
+ * Creates a new [[StructField]] of type int.
+ * @since 1.3.0
+ */
def int: StructField = StructField(name, IntegerType)
- /** Creates a new AttributeReference of type long */
+ /**
+ * Creates a new [[StructField]] of type long.
+ * @since 1.3.0
+ */
def long: StructField = StructField(name, LongType)
- /** Creates a new AttributeReference of type float */
+ /**
+ * Creates a new [[StructField]] of type float.
+ * @since 1.3.0
+ */
def float: StructField = StructField(name, FloatType)
- /** Creates a new AttributeReference of type double */
+ /**
+ * Creates a new [[StructField]] of type double.
+ * @since 1.3.0
+ */
def double: StructField = StructField(name, DoubleType)
- /** Creates a new AttributeReference of type string */
+ /**
+ * Creates a new [[StructField]] of type string.
+ * @since 1.3.0
+ */
def string: StructField = StructField(name, StringType)
- /** Creates a new AttributeReference of type date */
+ /**
+ * Creates a new [[StructField]] of type date.
+ * @since 1.3.0
+ */
def date: StructField = StructField(name, DateType)
- /** Creates a new AttributeReference of type decimal */
+ /**
+ * Creates a new [[StructField]] of type decimal.
+ * @since 1.3.0
+ */
def decimal: StructField = StructField(name, DecimalType.Unlimited)
- /** Creates a new AttributeReference of type decimal */
+ /**
+ * Creates a new [[StructField]] of type decimal.
+ * @since 1.3.0
+ */
def decimal(precision: Int, scale: Int): StructField =
StructField(name, DecimalType(precision, scale))
- /** Creates a new AttributeReference of type timestamp */
+ /**
+ * Creates a new [[StructField]] of type timestamp.
+ * @since 1.3.0
+ */
def timestamp: StructField = StructField(name, TimestampType)
- /** Creates a new AttributeReference of type binary */
+ /**
+ * Creates a new [[StructField]] of type binary.
+ * @since 1.3.0
+ */
def binary: StructField = StructField(name, BinaryType)
- /** Creates a new AttributeReference of type array */
+ /**
+ * Creates a new [[StructField]] of type array.
+ * @since 1.3.0
+ */
def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
- /** Creates a new AttributeReference of type map */
+ /**
+ * Creates a new [[StructField]] of type map.
+ * @since 1.3.0
+ */
def map(keyType: DataType, valueType: DataType): StructField =
map(MapType(keyType, valueType))
def map(mapType: MapType): StructField = StructField(name, mapType)
- /** Creates a new AttributeReference of type struct */
+ /**
+ * Creates a new [[StructField]] of type struct.
+ * @since 1.3.0
+ */
def struct(fields: StructField*): StructField = struct(StructType(fields))
+ /**
+ * Creates a new [[StructField]] of type struct.
+ * @since 1.3.0
+ */
def struct(structType: StructType): StructField = StructField(name, structType)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 01fd432cc8..c820a67357 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -114,6 +114,7 @@ private[sql] object DataFrame {
* @groupname rdd RDD Operations
* @groupname output Output Operations
* @groupname action Actions
+ * @since 1.3.0
*/
// TODO: Improve documentation.
@Experimental
@@ -233,6 +234,7 @@ class DataFrame private[sql](
/**
* Returns the object itself.
* @group basic
+ * @since 1.3.0
*/
// This is declared with parentheses to prevent the Scala compiler from treating
// `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
@@ -247,6 +249,7 @@ class DataFrame private[sql](
* rdd.toDF("id", "name") // this creates a DataFrame with column name "id" and "name"
* }}}
* @group basic
+ * @since 1.3.0
*/
@scala.annotation.varargs
def toDF(colNames: String*): DataFrame = {
@@ -264,12 +267,14 @@ class DataFrame private[sql](
/**
* Returns the schema of this [[DataFrame]].
* @group basic
+ * @since 1.3.0
*/
def schema: StructType = queryExecution.analyzed.schema
/**
* Returns all column names and their data types as an array.
* @group basic
+ * @since 1.3.0
*/
def dtypes: Array[(String, String)] = schema.fields.map { field =>
(field.name, field.dataType.toString)
@@ -278,18 +283,21 @@ class DataFrame private[sql](
/**
* Returns all column names as an array.
* @group basic
+ * @since 1.3.0
*/
def columns: Array[String] = schema.fields.map(_.name)
/**
* Prints the schema to the console in a nice tree format.
* @group basic
+ * @since 1.3.0
*/
def printSchema(): Unit = println(schema.treeString)
/**
* Prints the plans (logical and physical) to the console for debugging purposes.
* @group basic
+ * @since 1.3.0
*/
def explain(extended: Boolean): Unit = {
ExplainCommand(
@@ -302,6 +310,7 @@ class DataFrame private[sql](
/**
* Only prints the physical plan to the console for debugging purposes.
* @group basic
+ * @since 1.3.0
*/
def explain(): Unit = explain(extended = false)
@@ -309,6 +318,7 @@ class DataFrame private[sql](
* Returns true if the `collect` and `take` methods can be run locally
* (without any Spark executors).
* @group basic
+ * @since 1.3.0
*/
def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
@@ -325,12 +335,14 @@ class DataFrame private[sql](
* @param numRows Number of rows to show
*
* @group action
+ * @since 1.3.0
*/
def show(numRows: Int): Unit = println(showString(numRows))
/**
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
* @group action
+ * @since 1.3.0
*/
def show(): Unit = show(20)
@@ -342,6 +354,7 @@ class DataFrame private[sql](
* }}}
*
* @group dfops
+ * @since 1.3.1
*/
def na: DataFrameNaFunctions = new DataFrameNaFunctions(this)
@@ -353,6 +366,7 @@ class DataFrame private[sql](
* }}}
*
* @group dfops
+ * @since 1.4.0
*/
def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this)
@@ -363,6 +377,7 @@ class DataFrame private[sql](
*
* @param right Right side of the join operation.
* @group dfops
+ * @since 1.3.0
*/
def join(right: DataFrame): DataFrame = {
Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
@@ -386,6 +401,7 @@ class DataFrame private[sql](
* @param right Right side of the join operation.
* @param usingColumn Name of the column to join on. This column must exist on both sides.
* @group dfops
+ * @since 1.4.0
*/
def join(right: DataFrame, usingColumn: String): DataFrame = {
// Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
@@ -416,6 +432,7 @@ class DataFrame private[sql](
* df1.join(df2).where($"df1Key" === $"df2Key")
* }}}
* @group dfops
+ * @since 1.3.0
*/
def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
@@ -437,6 +454,7 @@ class DataFrame private[sql](
* @param joinExprs Join expression.
* @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
* @group dfops
+ * @since 1.3.0
*/
def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {
// Note that in this function, we introduce a hack in the case of self-join to automatically
@@ -483,6 +501,7 @@ class DataFrame private[sql](
* df.sort($"sortcol".asc)
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def sort(sortCol: String, sortCols: String*): DataFrame = {
@@ -495,6 +514,7 @@ class DataFrame private[sql](
* df.sort($"col1", $"col2".desc)
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def sort(sortExprs: Column*): DataFrame = {
@@ -513,6 +533,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] sorted by the given expressions.
* This is an alias of the `sort` function.
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*)
@@ -521,6 +542,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] sorted by the given expressions.
* This is an alias of the `sort` function.
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*)
@@ -528,12 +550,14 @@ class DataFrame private[sql](
/**
* Selects column based on the column name and return it as a [[Column]].
* @group dfops
+ * @since 1.3.0
*/
def apply(colName: String): Column = col(colName)
/**
* Selects column based on the column name and return it as a [[Column]].
* @group dfops
+ * @since 1.3.0
*/
def col(colName: String): Column = colName match {
case "*" =>
@@ -546,12 +570,14 @@ class DataFrame private[sql](
/**
* Returns a new [[DataFrame]] with an alias set.
* @group dfops
+ * @since 1.3.0
*/
def as(alias: String): DataFrame = Subquery(alias, logicalPlan)
/**
* (Scala-specific) Returns a new [[DataFrame]] with an alias set.
* @group dfops
+ * @since 1.3.0
*/
def as(alias: Symbol): DataFrame = as(alias.name)
@@ -561,6 +587,7 @@ class DataFrame private[sql](
* df.select($"colA", $"colB" + 1)
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def select(cols: Column*): DataFrame = {
@@ -583,6 +610,7 @@ class DataFrame private[sql](
* df.select($"colA", $"colB")
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*)
@@ -595,6 +623,7 @@ class DataFrame private[sql](
* df.selectExpr("colA", "colB as newName", "abs(colC)")
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def selectExpr(exprs: String*): DataFrame = {
@@ -612,6 +641,7 @@ class DataFrame private[sql](
* peopleDf($"age" > 15)
* }}}
* @group dfops
+ * @since 1.3.0
*/
def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan)
@@ -621,6 +651,7 @@ class DataFrame private[sql](
* peopleDf.filter("age > 15")
* }}}
* @group dfops
+ * @since 1.3.0
*/
def filter(conditionExpr: String): DataFrame = {
filter(Column(new SqlParser().parseExpression(conditionExpr)))
@@ -635,6 +666,7 @@ class DataFrame private[sql](
* peopleDf($"age" > 15)
* }}}
* @group dfops
+ * @since 1.3.0
*/
def where(condition: Column): DataFrame = filter(condition)
@@ -653,6 +685,7 @@ class DataFrame private[sql](
* ))
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
@@ -675,6 +708,7 @@ class DataFrame private[sql](
* ))
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def groupBy(col1: String, cols: String*): GroupedData = {
@@ -690,6 +724,7 @@ class DataFrame private[sql](
* df.groupBy().agg("age" -> "max", "salary" -> "avg")
* }}}
* @group dfops
+ * @since 1.3.0
*/
def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
groupBy().agg(aggExpr, aggExprs :_*)
@@ -703,6 +738,7 @@ class DataFrame private[sql](
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
* }}}
* @group dfops
+ * @since 1.3.0
*/
def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
@@ -714,6 +750,7 @@ class DataFrame private[sql](
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
* }}}
* @group dfops
+ * @since 1.3.0
*/
def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
@@ -725,6 +762,7 @@ class DataFrame private[sql](
* df.groupBy().agg(max($"age"), avg($"salary"))
* }}}
* @group dfops
+ * @since 1.3.0
*/
@scala.annotation.varargs
def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*)
@@ -733,6 +771,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function
* and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]].
* @group dfops
+ * @since 1.3.0
*/
def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan)
@@ -740,6 +779,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] containing union of rows in this frame and another frame.
* This is equivalent to `UNION ALL` in SQL.
* @group dfops
+ * @since 1.3.0
*/
def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan)
@@ -747,6 +787,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] containing rows only in both this frame and another frame.
* This is equivalent to `INTERSECT` in SQL.
* @group dfops
+ * @since 1.3.0
*/
def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan)
@@ -754,6 +795,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] containing rows in this frame but not in another frame.
* This is equivalent to `EXCEPT` in SQL.
* @group dfops
+ * @since 1.3.0
*/
def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan)
@@ -764,6 +806,7 @@ class DataFrame private[sql](
* @param fraction Fraction of rows to generate.
* @param seed Seed for sampling.
* @group dfops
+ * @since 1.3.0
*/
def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = {
Sample(0.0, fraction, withReplacement, seed, logicalPlan)
@@ -775,6 +818,7 @@ class DataFrame private[sql](
* @param withReplacement Sample with replacement or not.
* @param fraction Fraction of rows to generate.
* @group dfops
+ * @since 1.3.0
*/
def sample(withReplacement: Boolean, fraction: Double): DataFrame = {
sample(withReplacement, fraction, Utils.random.nextLong)
@@ -786,6 +830,7 @@ class DataFrame private[sql](
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @param seed Seed for sampling.
* @group dfops
+ * @since 1.4.0
*/
def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = {
val sum = weights.sum
@@ -800,6 +845,7 @@ class DataFrame private[sql](
*
* @param weights weights for splits, will be normalized if they don't sum to 1.
* @group dfops
+ * @since 1.4.0
*/
def randomSplit(weights: Array[Double]): Array[DataFrame] = {
randomSplit(weights, Utils.random.nextLong)
@@ -836,6 +882,7 @@ class DataFrame private[sql](
* val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
* }}}
* @group dfops
+ * @since 1.3.0
*/
def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -860,6 +907,7 @@ class DataFrame private[sql](
* df.explode("words", "word")(words: String => words.split(" "))
* }}}
* @group dfops
+ * @since 1.3.0
*/
def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B])
: DataFrame = {
@@ -883,6 +931,7 @@ class DataFrame private[sql](
/**
* Returns a new [[DataFrame]] by adding a column.
* @group dfops
+ * @since 1.3.0
*/
def withColumn(colName: String, col: Column): DataFrame = {
val resolver = sqlContext.analyzer.resolver
@@ -902,6 +951,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] with a column renamed.
* This is a no-op if schema doesn't contain existingName.
* @group dfops
+ * @since 1.3.0
*/
def withColumnRenamed(existingName: String, newName: String): DataFrame = {
val resolver = sqlContext.analyzer.resolver
@@ -921,6 +971,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] with a column dropped.
* This is a no-op if schema doesn't contain column name.
* @group dfops
+ * @since 1.4.0
*/
def drop(colName: String): DataFrame = {
val resolver = sqlContext.analyzer.resolver
@@ -940,6 +991,7 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
* This is an alias for `distinct`.
* @group dfops
+ * @since 1.4.0
*/
def dropDuplicates(): DataFrame = dropDuplicates(this.columns)
@@ -948,6 +1000,7 @@ class DataFrame private[sql](
* the subset of columns.
*
* @group dfops
+ * @since 1.4.0
*/
def dropDuplicates(colNames: Seq[String]): DataFrame = {
val groupCols = colNames.map(resolve)
@@ -967,6 +1020,7 @@ class DataFrame private[sql](
* the subset of columns.
*
* @group dfops
+ * @since 1.4.0
*/
def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq)
@@ -991,6 +1045,7 @@ class DataFrame private[sql](
* }}}
*
* @group action
+ * @since 1.3.1
*/
@scala.annotation.varargs
def describe(cols: String*): DataFrame = {
@@ -1034,24 +1089,28 @@ class DataFrame private[sql](
/**
* Returns the first `n` rows.
* @group action
+ * @since 1.3.0
*/
def head(n: Int): Array[Row] = limit(n).collect()
/**
* Returns the first row.
* @group action
+ * @since 1.3.0
*/
def head(): Row = head(1).head
/**
* Returns the first row. Alias for head().
* @group action
+ * @since 1.3.0
*/
override def first(): Row = head()
/**
* Returns a new RDD by applying a function to all rows of this DataFrame.
* @group rdd
+ * @since 1.3.0
*/
override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f)
@@ -1059,12 +1118,14 @@ class DataFrame private[sql](
* Returns a new RDD by first applying a function to all rows of this [[DataFrame]],
* and then flattening the results.
* @group rdd
+ * @since 1.3.0
*/
override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f)
/**
* Returns a new RDD by applying a function to each partition of this DataFrame.
* @group rdd
+ * @since 1.3.0
*/
override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = {
rdd.mapPartitions(f)
@@ -1073,42 +1134,49 @@ class DataFrame private[sql](
/**
* Applies a function `f` to all rows.
* @group rdd
+ * @since 1.3.0
*/
override def foreach(f: Row => Unit): Unit = rdd.foreach(f)
/**
* Applies a function f to each partition of this [[DataFrame]].
* @group rdd
+ * @since 1.3.0
*/
override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f)
/**
* Returns the first `n` rows in the [[DataFrame]].
* @group action
+ * @since 1.3.0
*/
override def take(n: Int): Array[Row] = head(n)
/**
* Returns an array that contains all of [[Row]]s in this [[DataFrame]].
* @group action
+ * @since 1.3.0
*/
override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
/**
* Returns a Java list that contains all of [[Row]]s in this [[DataFrame]].
* @group action
+ * @since 1.3.0
*/
override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*)
/**
* Returns the number of rows in the [[DataFrame]].
* @group action
+ * @since 1.3.0
*/
override def count(): Long = groupBy().count().collect().head.getLong(0)
/**
* Returns a new [[DataFrame]] that has exactly `numPartitions` partitions.
* @group rdd
+ * @since 1.3.0
*/
override def repartition(numPartitions: Int): DataFrame = {
Repartition(numPartitions, shuffle = true, logicalPlan)
@@ -1120,6 +1188,7 @@ class DataFrame private[sql](
* if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
* the 100 new partitions will claim 10 of the current partitions.
* @group rdd
+ * @since 1.4.0
*/
override def coalesce(numPartitions: Int): DataFrame = {
Repartition(numPartitions, shuffle = false, logicalPlan)
@@ -1129,11 +1198,13 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
* This is an alias for `dropDuplicates`.
* @group dfops
+ * @since 1.3.0
*/
override def distinct: DataFrame = Distinct(logicalPlan)
/**
* @group basic
+ * @since 1.3.0
*/
override def persist(): this.type = {
sqlContext.cacheManager.cacheQuery(this)
@@ -1142,11 +1213,13 @@ class DataFrame private[sql](
/**
* @group basic
+ * @since 1.3.0
*/
override def cache(): this.type = persist()
/**
* @group basic
+ * @since 1.3.0
*/
override def persist(newLevel: StorageLevel): this.type = {
sqlContext.cacheManager.cacheQuery(this, None, newLevel)
@@ -1155,6 +1228,7 @@ class DataFrame private[sql](
/**
* @group basic
+ * @since 1.3.0
*/
override def unpersist(blocking: Boolean): this.type = {
sqlContext.cacheManager.tryUncacheQuery(this, blocking)
@@ -1163,6 +1237,7 @@ class DataFrame private[sql](
/**
* @group basic
+ * @since 1.3.0
*/
override def unpersist(): this.type = unpersist(blocking = false)
@@ -1175,6 +1250,7 @@ class DataFrame private[sql](
* memoized. Once called, it won't change even if you change any query planning related Spark SQL
* configurations (e.g. `spark.sql.shuffle.partitions`).
* @group rdd
+ * @since 1.3.0
*/
lazy val rdd: RDD[Row] = {
// use a local variable to make sure the map closure doesn't capture the whole DataFrame
@@ -1188,12 +1264,14 @@ class DataFrame private[sql](
/**
* Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
* @group rdd
+ * @since 1.3.0
*/
def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD()
/**
* Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
* @group rdd
+ * @since 1.3.0
*/
def javaRDD: JavaRDD[Row] = toJavaRDD
@@ -1202,6 +1280,7 @@ class DataFrame private[sql](
* temporary table is tied to the [[SQLContext]] that was used to create this DataFrame.
*
* @group basic
+ * @since 1.3.0
*/
def registerTempTable(tableName: String): Unit = {
sqlContext.registerDataFrameAsTable(this, tableName)
@@ -1212,6 +1291,7 @@ class DataFrame private[sql](
* Files that are written out using this method can be read back in as a [[DataFrame]]
* using the `parquetFile` function in [[SQLContext]].
* @group output
+ * @since 1.3.0
*/
def saveAsParquetFile(path: String): Unit = {
if (sqlContext.conf.parquetUseDataSourceApi) {
@@ -1235,6 +1315,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(tableName: String): Unit = {
@@ -1254,6 +1335,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(tableName: String, mode: SaveMode): Unit = {
@@ -1281,6 +1363,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(tableName: String, source: String): Unit = {
@@ -1300,6 +1383,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
@@ -1319,6 +1403,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(
@@ -1340,6 +1425,7 @@ class DataFrame private[sql](
* an RDD out to a parquet file, and then register that file as a table. This "table" can then
* be the target of an `insertInto`.
* @group output
+ * @since 1.4.0
*/
@Experimental
def saveAsTable(
@@ -1365,6 +1451,7 @@ class DataFrame private[sql](
* Also note that while this function can persist the table metadata into Hive's metastore,
* the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
* @group output
+ * @since 1.3.0
*/
@Experimental
def saveAsTable(
@@ -1396,6 +1483,7 @@ class DataFrame private[sql](
* an RDD out to a parquet file, and then register that file as a table. This "table" can then
* be the target of an `insertInto`.
* @group output
+ * @since 1.4.0
*/
@Experimental
def saveAsTable(
@@ -1421,6 +1509,7 @@ class DataFrame private[sql](
* using the default data source configured by spark.sql.sources.default and
* [[SaveMode.ErrorIfExists]] as the save mode.
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(path: String): Unit = {
@@ -1432,6 +1521,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
* using the default data source configured by spark.sql.sources.default.
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(path: String, mode: SaveMode): Unit = {
@@ -1444,6 +1534,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame to the given path based on the given data source,
* using [[SaveMode.ErrorIfExists]] as the save mode.
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(path: String, source: String): Unit = {
@@ -1455,6 +1546,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame to the given path based on the given data source and
* [[SaveMode]] specified by mode.
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(path: String, source: String, mode: SaveMode): Unit = {
@@ -1466,6 +1558,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame based on the given data source,
* [[SaveMode]] specified by mode, and a set of options.
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(
@@ -1480,6 +1573,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame to the given path based on the given data source,
* [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
* @group output
+ * @since 1.4.0
*/
@Experimental
def save(
@@ -1496,6 +1590,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame based on the given data source,
* [[SaveMode]] specified by mode, and a set of options
* @group output
+ * @since 1.3.0
*/
@Experimental
def save(
@@ -1510,6 +1605,7 @@ class DataFrame private[sql](
* Saves the contents of this DataFrame to the given path based on the given data source,
* [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
* @group output
+ * @since 1.4.0
*/
@Experimental
def save(
@@ -1524,6 +1620,7 @@ class DataFrame private[sql](
* :: Experimental ::
* Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
* @group output
+ * @since 1.3.0
*/
@Experimental
def insertInto(tableName: String, overwrite: Boolean): Unit = {
@@ -1536,6 +1633,7 @@ class DataFrame private[sql](
* Adds the rows from this RDD to the specified table.
* Throws an exception if the table already exists.
* @group output
+ * @since 1.3.0
*/
@Experimental
def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
@@ -1543,6 +1641,7 @@ class DataFrame private[sql](
/**
* Returns the content of the [[DataFrame]] as a RDD of JSON strings.
* @group rdd
+ * @since 1.3.0
*/
def toJSON: RDD[String] = {
val rowSchema = this.schema
@@ -1581,6 +1680,7 @@ class DataFrame private[sql](
* given name; if you pass `false`, it will throw if the table already
* exists.
* @group output
+ * @since 1.3.0
*/
def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
createJDBCTable(url, table, allowExisting, new Properties())
@@ -1594,6 +1694,7 @@ class DataFrame private[sql](
* given name; if you pass `false`, it will throw if the table already
* exists.
* @group output
+ * @since 1.4.0
*/
def createJDBCTable(
url: String,
@@ -1626,6 +1727,7 @@ class DataFrame private[sql](
* the RDD in order via the simple statement
* `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
* @group output
+ * @since 1.3.0
*/
def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
insertIntoJDBC(url, table, overwrite, new Properties())
@@ -1643,6 +1745,7 @@ class DataFrame private[sql](
* the RDD in order via the simple statement
* `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
* @group output
+ * @since 1.4.0
*/
def insertIntoJDBC(
url: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
index a3187fe323..b87efb58d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
/**
* A container for a [[DataFrame]], used for implicit conversions.
+ *
+ * @since 1.3.0
*/
private[sql] case class DataFrameHolder(df: DataFrame) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 4a54120ba8..b4c2daa055 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -30,12 +30,16 @@ import org.apache.spark.sql.types._
/**
* :: Experimental ::
* Functionality for working with missing data in [[DataFrame]]s.
+ *
+ * @since 1.3.1
*/
@Experimental
final class DataFrameNaFunctions private[sql](df: DataFrame) {
/**
* Returns a new [[DataFrame]] that drops rows containing any null values.
+ *
+ * @since 1.3.1
*/
def drop(): DataFrame = drop("any", df.columns)
@@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* If `how` is "any", then drop rows containing any null values.
* If `how` is "all", then drop rows only if every column is null for that row.
+ *
+ * @since 1.3.1
*/
def drop(how: String): DataFrame = drop(how, df.columns)
/**
* Returns a new [[DataFrame]] that drops rows containing any null values
* in the specified columns.
+ *
+ * @since 1.3.1
*/
def drop(cols: Array[String]): DataFrame = drop(cols.toSeq)
/**
* (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values
* in the specified columns.
+ *
+ * @since 1.3.1
*/
def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols)
@@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* If `how` is "any", then drop rows containing any null values in the specified columns.
* If `how` is "all", then drop rows only if every specified column is null for that row.
+ *
+ * @since 1.3.1
*/
def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq)
@@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* If `how` is "any", then drop rows containing any null values in the specified columns.
* If `how` is "all", then drop rows only if every specified column is null for that row.
+ *
+ * @since 1.3.1
*/
def drop(how: String, cols: Seq[String]): DataFrame = {
how.toLowerCase match {
@@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
/**
* Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values.
+ *
+ * @since 1.3.1
*/
def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns)
/**
* Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null
* values in the specified columns.
+ *
+ * @since 1.3.1
*/
def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq)
/**
* (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than
* `minNonNulls` non-null values in the specified columns.
+ *
+ * @since 1.3.1
*/
def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
// Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values.
@@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
/**
* Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`.
+ *
+ * @since 1.3.1
*/
def fill(value: Double): DataFrame = fill(value, df.columns)
/**
* Returns a new [[DataFrame ]] that replaces null values in string columns with `value`.
+ *
+ * @since 1.3.1
*/
def fill(value: String): DataFrame = fill(value, df.columns)
/**
* Returns a new [[DataFrame]] that replaces null values in specified numeric columns.
* If a specified column is not a numeric column, it is ignored.
+ *
+ * @since 1.3.1
*/
def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
/**
* (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified
* numeric columns. If a specified column is not a numeric column, it is ignored.
+ *
+ * @since 1.3.1
*/
def fill(value: Double, cols: Seq[String]): DataFrame = {
val columnEquals = df.sqlContext.analyzer.resolver
@@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
/**
* Returns a new [[DataFrame]] that replaces null values in specified string columns.
* If a specified column is not a string column, it is ignored.
+ *
+ * @since 1.3.1
*/
def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
/**
* (Scala-specific) Returns a new [[DataFrame]] that replaces null values in
* specified string columns. If a specified column is not a string column, it is ignored.
+ *
+ * @since 1.3.1
*/
def fill(value: String, cols: Seq[String]): DataFrame = {
val columnEquals = df.sqlContext.analyzer.resolver
@@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
* import com.google.common.collect.ImmutableMap;
* df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
* }}}
+ *
+ * @since 1.3.1
*/
def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
@@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
* "B" -> 1.0
* ))
* }}}
+ *
+ * @since 1.3.1
*/
def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
@@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* @param col name of the column to apply the value replacement
* @param replacement value replacement map, as explained above
+ *
+ * @since 1.3.1
*/
def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = {
replace[T](col, replacement.toMap : Map[T, T])
@@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* @param cols list of columns to apply the value replacement
* @param replacement value replacement map, as explained above
+ *
+ * @since 1.3.1
*/
def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
replace(cols.toSeq, replacement.toMap)
@@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* @param col name of the column to apply the value replacement
* @param replacement value replacement map, as explained above
+ *
+ * @since 1.3.1
*/
def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
if (col == "*") {
@@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
*
* @param cols list of columns to apply the value replacement
* @param replacement value replacement map, as explained above
+ *
+ * @since 1.3.1
*/
def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index a1e74470af..5d106c1ac2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.execution.stat._
/**
* :: Experimental ::
* Statistic functions for [[DataFrame]]s.
+ *
+ * @since 1.4.0
*/
@Experimental
final class DataFrameStatFunctions private[sql](df: DataFrame) {
@@ -32,6 +34,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* @param col1 the name of the first column
* @param col2 the name of the second column
* @return the covariance of the two columns.
+ *
+ * @since 1.4.0
*/
def cov(col1: String, col2: String): Double = {
StatFunctions.calculateCov(df, Seq(col1, col2))
@@ -45,6 +49,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* @param col1 the name of the column
* @param col2 the name of the column to calculate the correlation against
* @return The Pearson Correlation Coefficient as a Double.
+ *
+ * @since 1.4.0
*/
def corr(col1: String, col2: String, method: String): Double = {
require(method == "pearson", "Currently only the calculation of the Pearson Correlation " +
@@ -58,6 +64,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* @param col1 the name of the column
* @param col2 the name of the column to calculate the correlation against
* @return The Pearson Correlation Coefficient as a Double.
+ *
+ * @since 1.4.0
*/
def corr(col1: String, col2: String): Double = {
corr(col1, col2, "pearson")
@@ -76,6 +84,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* @param col2 The name of the second column. Distinct items will make the column names
* of the DataFrame.
* @return A DataFrame containing for the contingency table.
+ *
+ * @since 1.4.0
*/
def crosstab(col1: String, col2: String): DataFrame = {
StatFunctions.crossTabulate(df, col1, col2)
@@ -91,6 +101,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* @param support The minimum frequency for an item to be considered `frequent`. Should be greater
* than 1e-4.
* @return A Local DataFrame with the Array of frequent items for each column.
+ *
+ * @since 1.4.0
*/
def freqItems(cols: Array[String], support: Double): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, support)
@@ -104,6 +116,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
*
* @param cols the names of the columns to search frequent items in.
* @return A Local DataFrame with the Array of frequent items for each column.
+ *
+ * @since 1.4.0
*/
def freqItems(cols: Array[String]): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, 0.01)
@@ -116,6 +130,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
*
* @param cols the names of the columns to search frequent items in.
* @return A Local DataFrame with the Array of frequent items for each column.
+ *
+ * @since 1.4.0
*/
def freqItems(cols: Seq[String], support: Double): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, support)
@@ -129,6 +145,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
*
* @param cols the names of the columns to search frequent items in.
* @return A Local DataFrame with the Array of frequent items for each column.
+ *
+ * @since 1.4.0
*/
def freqItems(cols: Seq[String]): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, 0.01)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index d5d7e35a6b..717709e4f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental
* {{{
* sqlContext.experimental.extraStrategies += ...
* }}}
+ *
+ * @since 1.3.0
*/
@Experimental
class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
@@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
/**
* Allows extra strategies to be injected into the query planner at runtime. Note this API
* should be consider experimental and is not intended to be stable across releases.
+ *
+ * @since 1.3.0
*/
@Experimental
var extraStrategies: Seq[Strategy] = Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 543320e471..1381b9f1a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.types.NumericType
/**
* :: Experimental ::
* A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]].
+ *
+ * @since 1.3.0
*/
@Experimental
class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
@@ -94,6 +96,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* "expense" -> "sum"
* )
* }}}
+ *
+ * @since 1.3.0
*/
def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
agg((aggExpr +: aggExprs).toMap)
@@ -111,6 +115,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* "expense" -> "sum"
* ))
* }}}
+ *
+ * @since 1.3.0
*/
def agg(exprs: Map[String, String]): DataFrame = {
exprs.map { case (colName, expr) =>
@@ -129,6 +135,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* import com.google.common.collect.ImmutableMap;
* df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
* }}}
+ *
+ * @since 1.3.0
*/
def agg(exprs: java.util.Map[String, String]): DataFrame = {
agg(exprs.toMap)
@@ -162,6 +170,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* // Java, 1.3.x:
* df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
* }}}
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def agg(expr: Column, exprs: Column*): DataFrame = {
@@ -183,6 +193,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
/**
* Count the number of rows for each group.
* The resulting [[DataFrame]] will also contain the grouping columns.
+ *
+ * @since 1.3.0
*/
def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
@@ -190,6 +202,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* Compute the average value for each numeric columns for each group. This is an alias for `avg`.
* The resulting [[DataFrame]] will also contain the grouping columns.
* When specified columns are given, only compute the average values for them.
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def mean(colNames: String*): DataFrame = {
@@ -200,6 +214,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* Compute the max value for each numeric columns for each group.
* The resulting [[DataFrame]] will also contain the grouping columns.
* When specified columns are given, only compute the max values for them.
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def max(colNames: String*): DataFrame = {
@@ -210,6 +226,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* Compute the mean value for each numeric columns for each group.
* The resulting [[DataFrame]] will also contain the grouping columns.
* When specified columns are given, only compute the mean values for them.
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def avg(colNames: String*): DataFrame = {
@@ -220,6 +238,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* Compute the min value for each numeric column for each group.
* The resulting [[DataFrame]] will also contain the grouping columns.
* When specified columns are given, only compute the min values for them.
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def min(colNames: String*): DataFrame = {
@@ -230,6 +250,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
* Compute the sum for each numeric columns for each group.
* The resulting [[DataFrame]] will also contain the grouping columns.
* When specified columns are given, only compute the sum for them.
+ *
+ * @since 1.3.0
*/
@scala.annotation.varargs
def sum(colNames: String*): DataFrame = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
index db484c5f50..1ec874f796 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
@@ -21,11 +21,12 @@ import java.beans.Introspector
import java.lang.{Iterable => JIterable}
import java.util.{Iterator => JIterator, Map => JMap}
+import scala.language.existentials
+
import com.google.common.reflect.TypeToken
import org.apache.spark.sql.types._
-import scala.language.existentials
/**
* Type-inference utilities for POJOs and Java collections.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 70ba8985d6..975498c11f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -95,6 +95,8 @@ private[spark] class DefaultParserDialect extends ParserDialect {
* @groupname config Configuration
* @groupname dataframes Custom DataFrame Creation
* @groupname Ungrouped Support functions for language integrated queries.
+ *
+ * @since 1.0.0
*/
class SQLContext(@transient val sparkContext: SparkContext)
extends org.apache.spark.Logging
@@ -113,6 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Set Spark SQL configuration properties.
*
* @group config
+ * @since 1.0.0
*/
def setConf(props: Properties): Unit = conf.setConf(props)
@@ -120,6 +123,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Set the given Spark SQL configuration property.
*
* @group config
+ * @since 1.0.0
*/
def setConf(key: String, value: String): Unit = conf.setConf(key, value)
@@ -127,6 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Return the value of Spark SQL configuration property for the given key.
*
* @group config
+ * @since 1.0.0
*/
def getConf(key: String): String = conf.getConf(key)
@@ -135,6 +140,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* yet, return `defaultValue`.
*
* @group config
+ * @since 1.0.0
*/
def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue)
@@ -143,6 +149,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* This creates a new copy of the config properties in the form of a Map.
*
* @group config
+ * @since 1.0.0
*/
def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
@@ -228,6 +235,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* the query planner for advanced functionality.
*
* @group basic
+ * @since 1.3.0
*/
@Experimental
@transient
@@ -238,6 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Returns a [[DataFrame]] with no rows or columns.
*
* @group basic
+ * @since 1.3.0
*/
@Experimental
@transient
@@ -270,6 +279,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* }}}
*
* @group basic
+ * @since 1.3.0
* TODO move to SQLSession?
*/
@transient
@@ -278,23 +288,27 @@ class SQLContext(@transient val sparkContext: SparkContext)
/**
* Returns true if the table is currently cached in-memory.
* @group cachemgmt
+ * @since 1.3.0
*/
def isCached(tableName: String): Boolean = cacheManager.isCached(tableName)
/**
* Caches the specified table in-memory.
* @group cachemgmt
+ * @since 1.3.0
*/
def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName)
/**
* Removes the specified table from the in-memory cache.
* @group cachemgmt
+ * @since 1.3.0
*/
def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName)
/**
* Removes all cached tables from the in-memory cache.
+ * @since 1.3.0
*/
def clearCache(): Unit = cacheManager.clearCache()
@@ -311,27 +325,40 @@ class SQLContext(@transient val sparkContext: SparkContext)
* }}}
*
* @group basic
+ * @since 1.3.0
*/
@Experimental
object implicits extends Serializable {
// scalastyle:on
- /** Converts $"col name" into an [[Column]]. */
+ /**
+ * Converts $"col name" into an [[Column]].
+ * @since 1.3.0
+ */
implicit class StringToColumn(val sc: StringContext) {
def $(args: Any*): ColumnName = {
new ColumnName(sc.s(args :_*))
}
}
- /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */
+ /**
+ * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
+ * @since 1.3.0
+ */
implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
- /** Creates a DataFrame from an RDD of case classes or tuples. */
+ /**
+ * Creates a DataFrame from an RDD of case classes or tuples.
+ * @since 1.3.0
+ */
implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = {
DataFrameHolder(self.createDataFrame(rdd))
}
- /** Creates a DataFrame from a local Seq of Product. */
+ /**
+ * Creates a DataFrame from a local Seq of Product.
+ * @since 1.3.0
+ */
implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder =
{
DataFrameHolder(self.createDataFrame(data))
@@ -341,7 +368,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
// making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous
// because of [[DoubleRDDFunctions]].
- /** Creates a single column DataFrame from an RDD[Int]. */
+ /**
+ * Creates a single column DataFrame from an RDD[Int].
+ * @since 1.3.0
+ */
implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = {
val dataType = IntegerType
val rows = data.mapPartitions { iter =>
@@ -354,7 +384,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
}
- /** Creates a single column DataFrame from an RDD[Long]. */
+ /**
+ * Creates a single column DataFrame from an RDD[Long].
+ * @since 1.3.0
+ */
implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = {
val dataType = LongType
val rows = data.mapPartitions { iter =>
@@ -367,7 +400,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
}
- /** Creates a single column DataFrame from an RDD[String]. */
+ /**
+ * Creates a single column DataFrame from an RDD[String].
+ * @since 1.3.0
+ */
implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = {
val dataType = StringType
val rows = data.mapPartitions { iter =>
@@ -386,6 +422,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Creates a DataFrame from an RDD of case classes.
*
* @group dataframes
+ * @since 1.3.0
*/
@Experimental
def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
@@ -401,6 +438,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Creates a DataFrame from a local Seq of Product.
*
* @group dataframes
+ * @since 1.3.0
*/
@Experimental
def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
@@ -414,6 +452,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
*
* @group dataframes
+ * @since 1.3.0
*/
def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
DataFrame(this, LogicalRelation(baseRelation))
@@ -449,6 +488,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* }}}
*
* @group dataframes
+ * @since 1.3.0
*/
@DeveloperApi
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
@@ -480,6 +520,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* the provided schema. Otherwise, there will be runtime exception.
*
* @group dataframes
+ * @since 1.3.0
*/
@DeveloperApi
def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
@@ -492,6 +533,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
* SELECT * queries will return the columns in an undefined order.
* @group dataframes
+ * @since 1.3.0
*/
def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
val attributeSeq = getSchema(beanClass)
@@ -520,6 +562,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
* SELECT * queries will return the columns in an undefined order.
* @group dataframes
+ * @since 1.3.0
*/
def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
createDataFrame(rdd.rdd, beanClass)
@@ -591,6 +634,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* [[DataFrame]] if no paths are passed in.
*
* @group specificdata
+ * @since 1.3.0
*/
@scala.annotation.varargs
def parquetFile(paths: String*): DataFrame = {
@@ -609,6 +653,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
+ * @since 1.3.0
*/
def jsonFile(path: String): DataFrame = jsonFile(path, 1.0)
@@ -618,6 +663,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* returning the result as a [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonFile(path: String, schema: StructType): DataFrame =
@@ -626,6 +672,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
/**
* :: Experimental ::
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonFile(path: String, samplingRatio: Double): DataFrame =
@@ -637,6 +684,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
+ * @since 1.3.0
*/
def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0)
@@ -647,6 +695,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
+ * @since 1.3.0
*/
def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0)
@@ -656,6 +705,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* returning the result as a [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
@@ -678,6 +728,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* schema, returning the result as a [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
@@ -690,6 +741,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* schema, returning the result as a [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
@@ -711,6 +763,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* schema, returning the result as a [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
@@ -723,6 +776,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* using the default data source configured by spark.sql.sources.default.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(path: String): DataFrame = {
@@ -735,6 +789,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Returns the dataset stored at path as a DataFrame, using the given data source.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(path: String, source: String): DataFrame = {
@@ -747,6 +802,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options as a DataFrame.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(source: String, options: java.util.Map[String, String]): DataFrame = {
@@ -759,6 +815,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options as a DataFrame.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(source: String, options: Map[String, String]): DataFrame = {
@@ -772,6 +829,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(
@@ -787,6 +845,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
*
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(
@@ -802,6 +861,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* (Scala-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(
@@ -817,6 +877,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* (Scala-specific) Returns the dataset specified by the given data source and
* a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
* @group genericdata
+ * @since 1.3.0
*/
@Experimental
def load(
@@ -834,6 +895,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* It will use the default data source configured by spark.sql.sources.default.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(tableName: String, path: String): DataFrame = {
@@ -847,6 +909,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* and returns the corresponding DataFrame.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(
@@ -862,6 +925,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Then, returns the corresponding DataFrame.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(
@@ -878,6 +942,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Then, returns the corresponding DataFrame.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(
@@ -903,6 +968,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options. Then, returns the corresponding DataFrame.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(
@@ -920,6 +986,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* a set of options. Then, returns the corresponding DataFrame.
*
* @group ddl_ops
+ * @since 1.3.0
*/
@Experimental
def createExternalTable(
@@ -946,6 +1013,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* url named table.
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jdbc(url: String, table: String): DataFrame = {
@@ -958,6 +1026,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* url named table and connection properties.
*
* @group specificdata
+ * @since 1.4.0
*/
@Experimental
def jdbc(url: String, table: String, properties: Properties): DataFrame = {
@@ -976,6 +1045,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split
* evenly into this many partitions
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jdbc(
@@ -1001,6 +1071,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* evenly into this many partitions
* @param properties connection properties
* @group specificdata
+ * @since 1.4.0
*/
@Experimental
def jdbc(
@@ -1024,6 +1095,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* of the [[DataFrame]].
*
* @group specificdata
+ * @since 1.3.0
*/
@Experimental
def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
@@ -1038,6 +1110,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* of the [[DataFrame]].
*
* @group specificdata
+ * @since 1.4.0
*/
@Experimental
def jdbc(
@@ -1075,6 +1148,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* @param tableName the name of the table to be unregistered.
*
* @group basic
+ * @since 1.3.0
*/
def dropTempTable(tableName: String): Unit = {
cacheManager.tryUncacheQuery(table(tableName))
@@ -1086,6 +1160,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* used for SQL parsing can be configured with 'spark.sql.dialect'.
*
* @group basic
+ * @since 1.3.0
*/
def sql(sqlText: String): DataFrame = {
DataFrame(this, parseSql(sqlText))
@@ -1095,6 +1170,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Returns the specified table as a [[DataFrame]].
*
* @group ddl_ops
+ * @since 1.3.0
*/
def table(tableName: String): DataFrame =
DataFrame(this, catalog.lookupRelation(Seq(tableName)))
@@ -1105,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* indicating if a table is a temporary one or not).
*
* @group ddl_ops
+ * @since 1.3.0
*/
def tables(): DataFrame = {
DataFrame(this, ShowTablesCommand(None))
@@ -1116,6 +1193,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* indicating if a table is a temporary one or not).
*
* @group ddl_ops
+ * @since 1.3.0
*/
def tables(databaseName: String): DataFrame = {
DataFrame(this, ShowTablesCommand(Some(databaseName)))
@@ -1125,6 +1203,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Returns the names of tables in the current database as an array.
*
* @group ddl_ops
+ * @since 1.3.0
*/
def tableNames(): Array[String] = {
catalog.getTables(None).map {
@@ -1136,6 +1215,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* Returns the names of tables in the given database as an array.
*
* @group ddl_ops
+ * @since 1.3.0
*/
def tableNames(databaseName: String): Array[String] = {
catalog.getTables(Some(databaseName)).map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
index 5921eaf5e6..6b1ae81972 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql
-
import scala.util.parsing.combinator.RegexParsers
import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index b97aaf7352..dc3389c41b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -33,6 +33,8 @@ import org.apache.spark.sql.types.DataType
/**
* Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this.
+ *
+ * @since 1.3.0
*/
class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
@@ -87,6 +89,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of ${x} arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -104,6 +107,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
println(s"""
|/**
| * Register a user-defined function with ${i} arguments.
+ | * @since 1.3.0
| */
|def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = {
| functionRegistry.registerFunction(
@@ -116,6 +120,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 0 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -127,6 +132,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 1 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -138,6 +144,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 2 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -149,6 +156,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 3 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -160,6 +168,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 4 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -171,6 +180,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 5 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -182,6 +192,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 6 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -193,6 +204,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 7 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -204,6 +216,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 8 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -215,6 +228,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 9 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -226,6 +240,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 10 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -237,6 +252,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 11 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -248,6 +264,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 12 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -259,6 +276,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 13 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -270,6 +288,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 14 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -281,6 +300,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 15 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -292,6 +312,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 16 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -303,6 +324,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 17 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -314,6 +336,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 18 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -325,6 +348,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 19 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -336,6 +360,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 20 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -347,6 +372,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 21 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -358,6 +384,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a Scala closure of 22 arguments as user-defined function (UDF).
* @tparam RT return type of UDF.
+ * @since 1.3.0
*/
def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -371,6 +398,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 1 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF1[_, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -380,6 +408,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 2 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF2[_, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -389,6 +418,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 3 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -398,6 +428,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 4 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -407,6 +438,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 5 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -416,6 +448,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 6 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -425,6 +458,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 7 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -434,6 +468,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 8 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -443,6 +478,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 9 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -452,6 +488,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 10 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -461,6 +498,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 11 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -470,6 +508,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 12 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -479,6 +518,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 13 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -488,6 +528,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 14 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -497,6 +538,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 15 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -506,6 +548,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 16 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -515,6 +558,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 17 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -524,6 +568,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 18 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -533,6 +578,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 19 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -542,6 +588,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 20 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -551,6 +598,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 21 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
@@ -560,6 +608,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
/**
* Register a user-defined function with 22 arguments.
+ * @since 1.3.0
*/
def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
functionRegistry.registerFunction(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
index 295db539ad..505ab1301e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
import java.util.{List => JList, Map => JMap}
import org.apache.spark.Accumulator
+import org.apache.spark.annotation.Experimental
import org.apache.spark.api.python.PythonBroadcast
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.catalyst.expressions.ScalaUdf
@@ -36,7 +37,10 @@ import org.apache.spark.sql.types.DataType
* // Projects a column that adds a prediction column based on the score column.
* df.select( predict(df("score")) )
* }}}
+ *
+ * @since 1.3.0
*/
+@Experimental
case class UserDefinedFunction protected[sql] (f: AnyRef, dataType: DataType) {
def apply(exprs: Column*): Column = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index fae4bd0fd2..215787e40b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -38,6 +38,7 @@ import org.apache.spark.util.Utils
* @groupname normal_funcs Non-aggregate functions
* @groupname math_funcs Math functions
* @groupname Ungrouped Support functions for DataFrames.
+ * @since 1.3.0
*/
@Experimental
// scalastyle:off
@@ -50,6 +51,7 @@ object functions {
* Returns a [[Column]] based on the given column name.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def col(colName: String): Column = Column(colName)
@@ -57,6 +59,7 @@ object functions {
* Returns a [[Column]] based on the given column name. Alias of [[col]].
*
* @group normal_funcs
+ * @since 1.3.0
*/
def column(colName: String): Column = Column(colName)
@@ -68,6 +71,7 @@ object functions {
* Otherwise, a new [[Column]] is created to represent the literal value.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def lit(literal: Any): Column = {
literal match {
@@ -92,6 +96,7 @@ object functions {
* }}}
*
* @group sort_funcs
+ * @since 1.3.0
*/
def asc(columnName: String): Column = Column(columnName).asc
@@ -103,6 +108,7 @@ object functions {
* }}}
*
* @group sort_funcs
+ * @since 1.3.0
*/
def desc(columnName: String): Column = Column(columnName).desc
@@ -114,6 +120,7 @@ object functions {
* Aggregate function: returns the sum of all values in the expression.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def sum(e: Column): Column = Sum(e.expr)
@@ -121,6 +128,7 @@ object functions {
* Aggregate function: returns the sum of all values in the given column.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def sum(columnName: String): Column = sum(Column(columnName))
@@ -128,6 +136,7 @@ object functions {
* Aggregate function: returns the sum of distinct values in the expression.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def sumDistinct(e: Column): Column = SumDistinct(e.expr)
@@ -135,6 +144,7 @@ object functions {
* Aggregate function: returns the sum of distinct values in the expression.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
@@ -142,6 +152,7 @@ object functions {
* Aggregate function: returns the number of items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def count(e: Column): Column = e.expr match {
// Turn count(*) into count(1)
@@ -153,6 +164,7 @@ object functions {
* Aggregate function: returns the number of items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def count(columnName: String): Column = count(Column(columnName))
@@ -160,6 +172,7 @@ object functions {
* Aggregate function: returns the number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
@scala.annotation.varargs
def countDistinct(expr: Column, exprs: Column*): Column =
@@ -169,6 +182,7 @@ object functions {
* Aggregate function: returns the number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
@scala.annotation.varargs
def countDistinct(columnName: String, columnNames: String*): Column =
@@ -178,6 +192,7 @@ object functions {
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
@@ -185,6 +200,7 @@ object functions {
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
@@ -192,6 +208,7 @@ object functions {
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
@@ -199,6 +216,7 @@ object functions {
* Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def approxCountDistinct(columnName: String, rsd: Double): Column = {
approxCountDistinct(Column(columnName), rsd)
@@ -208,6 +226,7 @@ object functions {
* Aggregate function: returns the average of the values in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def avg(e: Column): Column = Average(e.expr)
@@ -215,6 +234,7 @@ object functions {
* Aggregate function: returns the average of the values in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def avg(columnName: String): Column = avg(Column(columnName))
@@ -222,6 +242,7 @@ object functions {
* Aggregate function: returns the first value in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def first(e: Column): Column = First(e.expr)
@@ -229,6 +250,7 @@ object functions {
* Aggregate function: returns the first value of a column in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def first(columnName: String): Column = first(Column(columnName))
@@ -236,6 +258,7 @@ object functions {
* Aggregate function: returns the last value in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def last(e: Column): Column = Last(e.expr)
@@ -243,6 +266,7 @@ object functions {
* Aggregate function: returns the last value of the column in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def last(columnName: String): Column = last(Column(columnName))
@@ -251,6 +275,7 @@ object functions {
* Alias for avg.
*
* @group agg_funcs
+ * @since 1.4.0
*/
def mean(e: Column): Column = avg(e)
@@ -259,6 +284,7 @@ object functions {
* Alias for avg.
*
* @group agg_funcs
+ * @since 1.4.0
*/
def mean(columnName: String): Column = avg(columnName)
@@ -266,6 +292,7 @@ object functions {
* Aggregate function: returns the minimum value of the expression in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def min(e: Column): Column = Min(e.expr)
@@ -273,6 +300,7 @@ object functions {
* Aggregate function: returns the minimum value of the column in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def min(columnName: String): Column = min(Column(columnName))
@@ -280,6 +308,7 @@ object functions {
* Aggregate function: returns the maximum value of the expression in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def max(e: Column): Column = Max(e.expr)
@@ -287,6 +316,7 @@ object functions {
* Aggregate function: returns the maximum value of the column in a group.
*
* @group agg_funcs
+ * @since 1.3.0
*/
def max(columnName: String): Column = max(Column(columnName))
@@ -298,6 +328,7 @@ object functions {
* Computes the absolute value.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def abs(e: Column): Column = Abs(e.expr)
@@ -305,6 +336,7 @@ object functions {
* Creates a new array column. The input columns must all have the same data type.
*
* @group normal_funcs
+ * @since 1.4.0
*/
@scala.annotation.varargs
def array(cols: Column*): Column = CreateArray(cols.map(_.expr))
@@ -313,6 +345,7 @@ object functions {
* Creates a new array column. The input columns must all have the same data type.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def array(colName: String, colNames: String*): Column = {
array((colName +: colNames).map(col) : _*)
@@ -325,6 +358,7 @@ object functions {
* }}}
*
* @group normal_funcs
+ * @since 1.3.0
*/
@scala.annotation.varargs
def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
@@ -333,6 +367,7 @@ object functions {
* Converts a string exprsesion to lower case.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def lower(e: Column): Column = Lower(e.expr)
@@ -349,6 +384,7 @@ object functions {
* 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def monotonicallyIncreasingId(): Column = execution.expressions.MonotonicallyIncreasingID()
@@ -364,6 +400,7 @@ object functions {
* }}}
*
* @group normal_funcs
+ * @since 1.3.0
*/
def negate(e: Column): Column = -e
@@ -378,6 +415,7 @@ object functions {
* }}}
*
* @group normal_funcs
+ * @since 1.3.0
*/
def not(e: Column): Column = !e
@@ -385,6 +423,7 @@ object functions {
* Generate a random column with i.i.d. samples from U[0.0, 1.0].
*
* @group normal_funcs
+ * @since 1.4.0
*/
def rand(seed: Long): Column = Rand(seed)
@@ -392,6 +431,7 @@ object functions {
* Generate a random column with i.i.d. samples from U[0.0, 1.0].
*
* @group normal_funcs
+ * @since 1.4.0
*/
def rand(): Column = rand(Utils.random.nextLong)
@@ -399,6 +439,7 @@ object functions {
* Generate a column with i.i.d. samples from the standard normal distribution.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def randn(seed: Long): Column = Randn(seed)
@@ -406,6 +447,7 @@ object functions {
* Generate a column with i.i.d. samples from the standard normal distribution.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def randn(): Column = randn(Utils.random.nextLong)
@@ -415,6 +457,7 @@ object functions {
* Note that this is indeterministic because it depends on data partitioning and task scheduling.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def sparkPartitionId(): Column = execution.expressions.SparkPartitionID
@@ -422,6 +465,7 @@ object functions {
* Computes the square root of the specified float value.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def sqrt(e: Column): Column = Sqrt(e.expr)
@@ -430,6 +474,7 @@ object functions {
* a derived column expression that is named (i.e. aliased).
*
* @group normal_funcs
+ * @since 1.4.0
*/
@scala.annotation.varargs
def struct(cols: Column*): Column = {
@@ -442,6 +487,7 @@ object functions {
* Creates a new struct column that composes multiple input columns.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def struct(colName: String, colNames: String*): Column = {
struct((colName +: colNames).map(col) : _*)
@@ -451,14 +497,15 @@ object functions {
* Converts a string expression to upper case.
*
* @group normal_funcs
+ * @since 1.3.0
*/
def upper(e: Column): Column = Upper(e.expr)
-
/**
* Computes bitwise NOT.
*
* @group normal_funcs
+ * @since 1.4.0
*/
def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
@@ -471,6 +518,7 @@ object functions {
* 0.0 through pi.
*
* @group math_funcs
+ * @since 1.4.0
*/
def acos(e: Column): Column = Acos(e.expr)
@@ -479,6 +527,7 @@ object functions {
* 0.0 through pi.
*
* @group math_funcs
+ * @since 1.4.0
*/
def acos(columnName: String): Column = acos(Column(columnName))
@@ -487,6 +536,7 @@ object functions {
* -pi/2 through pi/2.
*
* @group math_funcs
+ * @since 1.4.0
*/
def asin(e: Column): Column = Asin(e.expr)
@@ -495,6 +545,7 @@ object functions {
* -pi/2 through pi/2.
*
* @group math_funcs
+ * @since 1.4.0
*/
def asin(columnName: String): Column = asin(Column(columnName))
@@ -502,6 +553,7 @@ object functions {
* Computes the tangent inverse of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan(e: Column): Column = Atan(e.expr)
@@ -509,6 +561,7 @@ object functions {
* Computes the tangent inverse of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan(columnName: String): Column = atan(Column(columnName))
@@ -517,6 +570,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr)
@@ -525,6 +579,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName))
@@ -533,6 +588,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r)
@@ -541,6 +597,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(leftName: String, rightName: String): Column =
atan2(Column(leftName), Column(rightName))
@@ -550,6 +607,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr)
@@ -558,6 +616,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r)
@@ -566,6 +625,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r)
@@ -574,6 +634,7 @@ object functions {
* polar coordinates (r, theta).
*
* @group math_funcs
+ * @since 1.4.0
*/
def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName))
@@ -581,6 +642,7 @@ object functions {
* Computes the cube-root of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cbrt(e: Column): Column = Cbrt(e.expr)
@@ -588,6 +650,7 @@ object functions {
* Computes the cube-root of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cbrt(columnName: String): Column = cbrt(Column(columnName))
@@ -595,6 +658,7 @@ object functions {
* Computes the ceiling of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def ceil(e: Column): Column = Ceil(e.expr)
@@ -602,6 +666,7 @@ object functions {
* Computes the ceiling of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def ceil(columnName: String): Column = ceil(Column(columnName))
@@ -609,6 +674,7 @@ object functions {
* Computes the cosine of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cos(e: Column): Column = Cos(e.expr)
@@ -616,6 +682,7 @@ object functions {
* Computes the cosine of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cos(columnName: String): Column = cos(Column(columnName))
@@ -623,6 +690,7 @@ object functions {
* Computes the hyperbolic cosine of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cosh(e: Column): Column = Cosh(e.expr)
@@ -630,6 +698,7 @@ object functions {
* Computes the hyperbolic cosine of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def cosh(columnName: String): Column = cosh(Column(columnName))
@@ -637,6 +706,7 @@ object functions {
* Computes the exponential of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def exp(e: Column): Column = Exp(e.expr)
@@ -644,6 +714,7 @@ object functions {
* Computes the exponential of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def exp(columnName: String): Column = exp(Column(columnName))
@@ -651,6 +722,7 @@ object functions {
* Computes the exponential of the given value minus one.
*
* @group math_funcs
+ * @since 1.4.0
*/
def expm1(e: Column): Column = Expm1(e.expr)
@@ -658,6 +730,7 @@ object functions {
* Computes the exponential of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def expm1(columnName: String): Column = expm1(Column(columnName))
@@ -665,6 +738,7 @@ object functions {
* Computes the floor of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def floor(e: Column): Column = Floor(e.expr)
@@ -672,6 +746,7 @@ object functions {
* Computes the floor of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def floor(columnName: String): Column = floor(Column(columnName))
@@ -679,6 +754,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr)
@@ -686,6 +762,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
@@ -693,6 +770,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
@@ -700,6 +778,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(leftName: String, rightName: String): Column =
hypot(Column(leftName), Column(rightName))
@@ -708,6 +787,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr)
@@ -715,6 +795,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
@@ -722,6 +803,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r)
@@ -729,6 +811,7 @@ object functions {
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
+ * @since 1.4.0
*/
def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
@@ -736,6 +819,7 @@ object functions {
* Computes the natural logarithm of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log(e: Column): Column = Log(e.expr)
@@ -743,6 +827,7 @@ object functions {
* Computes the natural logarithm of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log(columnName: String): Column = log(Column(columnName))
@@ -750,6 +835,7 @@ object functions {
* Computes the logarithm of the given value in Base 10.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log10(e: Column): Column = Log10(e.expr)
@@ -757,6 +843,7 @@ object functions {
* Computes the logarithm of the given value in Base 10.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log10(columnName: String): Column = log10(Column(columnName))
@@ -764,6 +851,7 @@ object functions {
* Computes the natural logarithm of the given value plus one.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log1p(e: Column): Column = Log1p(e.expr)
@@ -771,6 +859,7 @@ object functions {
* Computes the natural logarithm of the given column plus one.
*
* @group math_funcs
+ * @since 1.4.0
*/
def log1p(columnName: String): Column = log1p(Column(columnName))
@@ -778,6 +867,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr)
@@ -785,6 +875,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
@@ -792,6 +883,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
@@ -799,6 +891,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
@@ -806,6 +899,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(l: Column, r: Double): Column = pow(l, lit(r).expr)
@@ -813,6 +907,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
@@ -820,6 +915,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(l: Double, r: Column): Column = pow(lit(l).expr, r)
@@ -827,6 +923,7 @@ object functions {
* Returns the value of the first argument raised to the power of the second argument.
*
* @group math_funcs
+ * @since 1.4.0
*/
def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
@@ -835,6 +932,7 @@ object functions {
* is equal to a mathematical integer.
*
* @group math_funcs
+ * @since 1.4.0
*/
def rint(e: Column): Column = Rint(e.expr)
@@ -843,6 +941,7 @@ object functions {
* is equal to a mathematical integer.
*
* @group math_funcs
+ * @since 1.4.0
*/
def rint(columnName: String): Column = rint(Column(columnName))
@@ -850,6 +949,7 @@ object functions {
* Computes the signum of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def signum(e: Column): Column = Signum(e.expr)
@@ -857,6 +957,7 @@ object functions {
* Computes the signum of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def signum(columnName: String): Column = signum(Column(columnName))
@@ -864,6 +965,7 @@ object functions {
* Computes the sine of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def sin(e: Column): Column = Sin(e.expr)
@@ -871,6 +973,7 @@ object functions {
* Computes the sine of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def sin(columnName: String): Column = sin(Column(columnName))
@@ -878,6 +981,7 @@ object functions {
* Computes the hyperbolic sine of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def sinh(e: Column): Column = Sinh(e.expr)
@@ -885,6 +989,7 @@ object functions {
* Computes the hyperbolic sine of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def sinh(columnName: String): Column = sinh(Column(columnName))
@@ -892,6 +997,7 @@ object functions {
* Computes the tangent of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def tan(e: Column): Column = Tan(e.expr)
@@ -899,6 +1005,7 @@ object functions {
* Computes the tangent of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def tan(columnName: String): Column = tan(Column(columnName))
@@ -906,6 +1013,7 @@ object functions {
* Computes the hyperbolic tangent of the given value.
*
* @group math_funcs
+ * @since 1.4.0
*/
def tanh(e: Column): Column = Tanh(e.expr)
@@ -913,6 +1021,7 @@ object functions {
* Computes the hyperbolic tangent of the given column.
*
* @group math_funcs
+ * @since 1.4.0
*/
def tanh(columnName: String): Column = tanh(Column(columnName))
@@ -920,6 +1029,7 @@ object functions {
* Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
*
* @group math_funcs
+ * @since 1.4.0
*/
def toDegrees(e: Column): Column = ToDegrees(e.expr)
@@ -927,6 +1037,7 @@ object functions {
* Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
*
* @group math_funcs
+ * @since 1.4.0
*/
def toDegrees(columnName: String): Column = toDegrees(Column(columnName))
@@ -934,6 +1045,7 @@ object functions {
* Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
*
* @group math_funcs
+ * @since 1.4.0
*/
def toRadians(e: Column): Column = ToRadians(e.expr)
@@ -941,6 +1053,7 @@ object functions {
* Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
*
* @group math_funcs
+ * @since 1.4.0
*/
def toRadians(columnName: String): Column = toRadians(Column(columnName))
@@ -960,6 +1073,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -976,6 +1090,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = {
ScalaUdf(f, returnType, Seq($argsInUdf))
@@ -988,6 +1103,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -998,6 +1114,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1008,6 +1125,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1018,6 +1136,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1028,6 +1147,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1038,6 +1158,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1048,6 +1169,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1058,6 +1180,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1068,6 +1191,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1078,6 +1202,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1088,6 +1213,7 @@ object functions {
* The data types are automatically inferred based on the function's signature.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1100,6 +1226,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function0[_], returnType: DataType): Column = {
ScalaUdf(f, returnType, Seq())
@@ -1110,6 +1237,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr))
@@ -1120,6 +1248,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr))
@@ -1130,6 +1259,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
@@ -1140,6 +1270,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
@@ -1150,6 +1281,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
@@ -1160,6 +1292,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
@@ -1170,6 +1303,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
@@ -1180,6 +1314,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
@@ -1190,6 +1325,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
@@ -1200,6 +1336,7 @@ object functions {
* you to specify the return data type.
*
* @group udf_funcs
+ * @since 1.3.0
*/
def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = {
ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
@@ -1220,6 +1357,7 @@ object functions {
* }}}
*
* @group udf_funcs
+ * @since 1.4.0
*/
def callUdf(udfName: String, cols: Column*): Column = {
UnresolvedFunction(udfName, cols.map(_.expr))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 791046e007..24e86ca415 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -19,83 +19,113 @@ package org.apache.spark.sql.sources
/**
* A filter predicate for data sources.
+ *
+ * @since 1.3.0
*/
abstract class Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a value
* equal to `value`.
+ *
+ * @since 1.3.0
*/
case class EqualTo(attribute: String, value: Any) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a value
* greater than `value`.
+ *
+ * @since 1.3.0
*/
case class GreaterThan(attribute: String, value: Any) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a value
* greater than or equal to `value`.
+ *
+ * @since 1.3.0
*/
case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a value
* less than `value`.
+ *
+ * @since 1.3.0
*/
case class LessThan(attribute: String, value: Any) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a value
* less than or equal to `value`.
+ *
+ * @since 1.3.0
*/
case class LessThanOrEqual(attribute: String, value: Any) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array.
+ *
+ * @since 1.3.0
*/
case class In(attribute: String, values: Array[Any]) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to null.
+ *
+ * @since 1.3.0
*/
case class IsNull(attribute: String) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to a non-null value.
+ *
+ * @since 1.3.0
*/
case class IsNotNull(attribute: String) extends Filter
/**
* A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`.
+ *
+ * @since 1.3.0
*/
case class And(left: Filter, right: Filter) extends Filter
/**
* A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`.
+ *
+ * @since 1.3.0
*/
case class Or(left: Filter, right: Filter) extends Filter
/**
* A filter that evaluates to `true` iff `child` is evaluated to `false`.
+ *
+ * @since 1.3.0
*/
case class Not(child: Filter) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to
* a string that starts with `value`.
+ *
+ * @since 1.3.1
*/
case class StringStartsWith(attribute: String, value: String) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to
* a string that starts with `value`.
+ *
+ * @since 1.3.1
*/
case class StringEndsWith(attribute: String, value: String) extends Filter
/**
* A filter that evaluates to `true` iff the attribute evaluates to
* a string that contains the string `value`.
+ *
+ * @since 1.3.1
*/
case class StringContains(attribute: String, value: String) extends Filter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 5e010d2112..6f315305c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -43,6 +43,8 @@ import org.apache.spark.sql.types.{StructField, StructType}
* data source 'org.apache.spark.sql.json.DefaultSource'
*
* A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait RelationProvider {
@@ -72,6 +74,8 @@ trait RelationProvider {
* users need to provide a schema when using a SchemaRelationProvider.
* A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
* if it can support both schema inference and user-specified schemas.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait SchemaRelationProvider {
@@ -106,6 +110,8 @@ trait SchemaRelationProvider {
* using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
* and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
* schemas, and accessing partitioned relations.
+ *
+ * @since 1.4.0
*/
trait FSBasedRelationProvider {
/**
@@ -121,6 +127,9 @@ trait FSBasedRelationProvider {
parameters: Map[String, String]): FSBasedRelation
}
+/**
+ * @since 1.3.0
+ */
@DeveloperApi
trait CreatableRelationProvider {
/**
@@ -134,6 +143,8 @@ trait CreatableRelationProvider {
* existing data is expected to be overwritten by the contents of the DataFrame.
* ErrorIfExists mode means that when saving a DataFrame to a data source,
* if data already exists, an exception is expected to be thrown.
+ *
+ * @since 1.3.0
*/
def createRelation(
sqlContext: SQLContext,
@@ -152,6 +163,8 @@ trait CreatableRelationProvider {
* BaseRelations must also define a equality function that only returns true when the two
* instances will return the same data. This equality function is used when determining when
* it is safe to substitute cached results for a given relation.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
abstract class BaseRelation {
@@ -167,6 +180,8 @@ abstract class BaseRelation {
*
* Note that it is always better to overestimate size than underestimate, because underestimation
* could lead to execution plans that are suboptimal (i.e. broadcasting a very large table).
+ *
+ * @since 1.3.0
*/
def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes
@@ -177,6 +192,8 @@ abstract class BaseRelation {
*
* Note: The internal representation is not stable across releases and thus data sources outside
* of Spark SQL should leave this as true.
+ *
+ * @since 1.4.0
*/
def needConversion: Boolean = true
}
@@ -184,6 +201,8 @@ abstract class BaseRelation {
/**
* ::DeveloperApi::
* A BaseRelation that can produce all of its tuples as an RDD of Row objects.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait TableScan {
@@ -194,6 +213,8 @@ trait TableScan {
* ::DeveloperApi::
* A BaseRelation that can eliminate unneeded columns before producing an RDD
* containing all of its tuples as Row objects.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait PrunedScan {
@@ -211,6 +232,8 @@ trait PrunedScan {
* The pushed down filters are currently purely an optimization as they will all be evaluated
* again. This means it is safe to use them with methods that produce false positives such
* as filtering partitions based on a bloom filter.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait PrunedFilteredScan {
@@ -232,6 +255,8 @@ trait PrunedFilteredScan {
* 3. It assumes that fields of the data provided in the insert method are nullable.
* If a data source needs to check the actual nullability of a field, it needs to do it in the
* insert method.
+ *
+ * @since 1.3.0
*/
@DeveloperApi
trait InsertableRelation {
@@ -245,6 +270,8 @@ trait InsertableRelation {
* [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. Unlike the other APIs this
* interface is NOT designed to be binary compatible across releases and thus should only be used
* for experimentation.
+ *
+ * @since 1.3.0
*/
@Experimental
trait CatalystScan {
@@ -257,6 +284,8 @@ trait CatalystScan {
* underlying file system. Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
* An [[OutputWriter]] instance is created and initialized when a new output file is opened on
* executor side. This instance is used to persist rows to this single output file.
+ *
+ * @since 1.4.0
*/
@Experimental
abstract class OutputWriter {
@@ -270,6 +299,8 @@ abstract class OutputWriter {
* @param dataSchema Schema of the rows to be written. Partition columns are not included in the
* schema if the corresponding relation is partitioned.
* @param context The Hadoop MapReduce task context.
+ *
+ * @since 1.4.0
*/
def init(
path: String,
@@ -279,12 +310,16 @@ abstract class OutputWriter {
/**
* Persists a single row. Invoked on the executor side. When writing to dynamically partitioned
* tables, dynamic partition columns are not included in rows to be written.
+ *
+ * @since 1.4.0
*/
def write(row: Row): Unit
/**
* Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before
* the task output is committed.
+ *
+ * @since 1.4.0
*/
def close(): Unit
}
@@ -310,6 +345,8 @@ abstract class OutputWriter {
* directories of all partition directories.
* @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
* [[PartitionSpec]], so that partition discovery can be skipped.
+ *
+ * @since 1.4.0
*/
@Experimental
abstract class FSBasedRelation private[sql](
@@ -323,6 +360,8 @@ abstract class FSBasedRelation private[sql](
* @param paths Base paths of this relation. For partitioned relations, it should be either root
* directories of all partition directories.
* @param partitionColumns Partition columns of this relation.
+ *
+ * @since 1.4.0
*/
def this(paths: Array[String], partitionColumns: StructType) =
this(paths, {
@@ -335,6 +374,8 @@ abstract class FSBasedRelation private[sql](
*
* @param paths Base paths of this relation. For partitioned relations, it should be root
* directories of all partition directories.
+ *
+ * @since 1.4.0
*/
def this(paths: Array[String]) = this(paths, None)
@@ -356,6 +397,8 @@ abstract class FSBasedRelation private[sql](
/**
* Partition columns. Note that they are always nullable.
+ *
+ * @since 1.4.0
*/
def partitionColumns: StructType = partitionSpec.partitionColumns
@@ -385,6 +428,8 @@ abstract class FSBasedRelation private[sql](
/**
* Schema of this relation. It consists of columns appearing in [[dataSchema]] and all partition
* columns not appearing in [[dataSchema]].
+ *
+ * @since 1.4.0
*/
override lazy val schema: StructType = {
val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
@@ -396,6 +441,8 @@ abstract class FSBasedRelation private[sql](
/**
* Specifies schema of actual data files. For partitioned relations, if one or more partitioned
* columns are contained in the data files, they should also appear in `dataSchema`.
+ *
+ * @since 1.4.0
*/
def dataSchema: StructType
@@ -407,6 +454,8 @@ abstract class FSBasedRelation private[sql](
* @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
* relation. For a partitioned relation, it contains paths of all data files in a single
* selected partition.
+ *
+ * @since 1.4.0
*/
def buildScan(inputPaths: Array[String]): RDD[Row] = {
throw new RuntimeException(
@@ -422,6 +471,8 @@ abstract class FSBasedRelation private[sql](
* @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
* relation. For a partitioned relation, it contains paths of all data files in a single
* selected partition.
+ *
+ * @since 1.4.0
*/
def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
// Yeah, to workaround serialization...
@@ -458,6 +509,8 @@ abstract class FSBasedRelation private[sql](
* @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
* relation. For a partitioned relation, it contains paths of all data files in a single
* selected partition.
+ *
+ * @since 1.4.0
*/
def buildScan(
requiredColumns: Array[String],
@@ -473,12 +526,16 @@ abstract class FSBasedRelation private[sql](
* Note that the only side effect expected here is mutating `job` via its setters. Especially,
* Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
* may cause unexpected behaviors.
+ *
+ * @since 1.4.0
*/
def prepareForWrite(job: Job): Unit = ()
/**
* This method is responsible for producing a new [[OutputWriter]] for each newly opened output
* file on the executor side.
+ *
+ * @since 1.4.0
*/
def outputWriterClass: Class[_ <: OutputWriter]
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 61e8c154e8..766c42d040 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -63,6 +63,8 @@ private[hive] class HiveQLDialect extends ParserDialect {
/**
* An instance of the Spark SQL execution engine that integrates with data stored in Hive.
* Configuration for Hive is read from hive-site.xml on the classpath.
+ *
+ * @since 1.0.0
*/
class HiveContext(sc: SparkContext) extends SQLContext(sc) {
self =>
@@ -225,6 +227,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
* Spark SQL or the external data source library it uses might cache certain metadata about a
* table, such as the location of blocks. When those change outside of Spark SQL, users should
* call this function to invalidate the cache.
+ *
+ * @since 1.3.0
*/
def refreshTable(tableName: String): Unit = {
// TODO: Database support...
@@ -242,6 +246,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
*
* Right now, it only supports Hive tables and it only updates the size of a Hive table
* in the Hive metastore.
+ *
+ * @since 1.2.0
*/
@Experimental
def analyze(tableName: String) {