aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-04-10 11:46:45 -0700
committerYin Huai <yhuai@databricks.com>2016-04-10 11:46:45 -0700
commita7ce473bd0520c71154ed028f295dab64a7485fe (patch)
treea4eb37c52e78c14136c859083444a6e9a4ed0708 /sql
parentb5c785629acb9afa5a62de3da472ec2184a31e3d (diff)
downloadspark-a7ce473bd0520c71154ed028f295dab64a7485fe.tar.gz
spark-a7ce473bd0520c71154ed028f295dab64a7485fe.tar.bz2
spark-a7ce473bd0520c71154ed028f295dab64a7485fe.zip
[SPARK-14415][SQL] All functions should show usages by command `DESC FUNCTION`
## What changes were proposed in this pull request? Currently, many functions do now show usages like the followings. ``` scala> sql("desc function extended `sin`").collect().foreach(println) [Function: sin] [Class: org.apache.spark.sql.catalyst.expressions.Sin] [Usage: To be added.] [Extended Usage: To be added.] ``` This PR adds descriptions for functions and adds a testcase prevent adding function without usage. ``` scala> sql("desc function extended `sin`").collect().foreach(println); [Function: sin] [Class: org.apache.spark.sql.catalyst.expressions.Sin] [Usage: sin(x) - Returns the sine of x.] [Extended Usage: > SELECT sin(0); 0.0] ``` The only exceptions are `cube`, `grouping`, `grouping_id`, `rollup`, `window`. ## How was this patch tested? Pass the Jenkins tests (including new testcases.) Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12185 from dongjoon-hyun/SPARK-14415.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala14
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala7
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala23
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala12
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala10
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala10
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala13
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala82
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala129
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala11
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala29
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala14
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala106
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala8
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala2
28 files changed, 489 insertions, 25 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 94ac4bf09b..ff70774847 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the mean calculated from values of a group.")
case class Average(child: Expression) extends DeclarativeAggregate {
override def prettyName: String = "avg"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 9d2db45144..17a7c6dce8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -130,6 +130,10 @@ abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate
}
// Compute the population standard deviation of a column
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the population standard deviation calculated from values of a group.")
+// scalastyle:on line.size.limit
case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@@ -143,6 +147,8 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the sample standard deviation of a column
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the sample standard deviation calculated from values of a group.")
case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@@ -157,6 +163,8 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the population variance of a column
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the population variance calculated from values of a group.")
case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@@ -170,6 +178,8 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the sample variance of a column
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the sample variance calculated from values of a group.")
case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@@ -183,6 +193,8 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
override def prettyName: String = "var_samp"
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the Skewness value calculated from values of a group.")
case class Skewness(child: Expression) extends CentralMomentAgg(child) {
override def prettyName: String = "skewness"
@@ -196,6 +208,8 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the Kurtosis value calculated from values of a group.")
case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 4
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
index e6b8214ef2..e29265e2f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* Definition of Pearson correlation can be found at
* http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
*/
+@ExpressionDescription(
+ usage = "_FUNC_(x,y) - Returns Pearson coefficient of correlation between a set of number pairs.")
case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = Seq(x, y)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index 663c69e799..17ae012af7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -21,6 +21,12 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """_FUNC_(*) - Returns the total number of retrieved rows, including rows containing NULL values.
+ _FUNC_(expr) - Returns the number of rows for which the supplied expression is non-NULL.
+ _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.""")
+// scalastyle:on line.size.limit
case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index c175a8c4c7..d80afbebf7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -76,6 +76,8 @@ abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggre
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs.")
case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),
@@ -85,6 +87,8 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
}
+@ExpressionDescription(
+ usage = "_FUNC_(x,y) - Returns the sample covariance of a set of number pairs.")
case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index 35f57426fe..b8ab0364dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -28,6 +28,11 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
+@ExpressionDescription(
+ usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows.
+ _FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows.
+ If isIgnoreNull is true, returns only non-null values.
+ """)
case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
def this(child: Expression) = this(child, Literal.create(false, BooleanType))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
index b6bd56cff6..1d218da6db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
import java.lang.{Long => JLong}
import java.util
-import com.clearspring.analytics.hash.MurmurHash
-
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
@@ -48,6 +46,11 @@ import org.apache.spark.sql.types._
* @param relativeSD the maximum estimation error allowed.
*/
// scalastyle:on
+@ExpressionDescription(
+ usage = """_FUNC_(expr) - Returns the estimated cardinality by HyperLogLog++.
+ _FUNC_(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++
+ with relativeSD, the maximum estimation error allowed.
+ """)
case class HyperLogLogPlusPlus(
child: Expression,
relativeSD: Double = 0.05,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index be7e12d7a2..b05d74b49b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
+@ExpressionDescription(
+ usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.")
case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
def this(child: Expression) = this(child, Literal.create(false, BooleanType))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index 906003188d..c534fe495f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
+@ExpressionDescription(
+ usage = "_FUNC_(expr) - Returns the maximum value of expr.")
case class Max(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 39f7afbd08..35289b4681 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
-
+@ExpressionDescription(
+ usage = "_FUNC_(expr) - Returns the minimum value of expr.")
case class Min(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index 08a67ea3df..ad217f25b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the sum calculated from values of a group.")
case class Sum(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index b388091538..f3d42fc0b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
-
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns -a.")
case class UnaryMinus(child: Expression) extends UnaryExpression
with ExpectsInputTypes with NullIntolerant {
@@ -59,6 +60,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
override def sql: String = s"(-${child.sql})"
}
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns a.")
case class UnaryPositive(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
override def prettyName: String = "positive"
@@ -79,8 +82,8 @@ case class UnaryPositive(child: Expression)
* A function that get the absolute value of the numeric value.
*/
@ExpressionDescription(
- usage = "_FUNC_(expr) - Returns the absolute value of the numeric value",
- extended = "> SELECT _FUNC_('-1');\n1")
+ usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
+ extended = "> SELECT _FUNC_('-1');\n 1")
case class Abs(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
@@ -126,6 +129,8 @@ private[sql] object BinaryArithmetic {
def unapply(e: BinaryArithmetic): Option[(Expression, Expression)] = Some((e.left, e.right))
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns a+b.")
case class Add(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
@@ -155,6 +160,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic wit
}
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns a-b.")
case class Subtract(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@@ -185,6 +192,8 @@ case class Subtract(left: Expression, right: Expression)
}
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Multiplies a by b.")
case class Multiply(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@@ -198,6 +207,9 @@ case class Multiply(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Divides a by b.",
+ extended = "> SELECT 3 _FUNC_ 2;\n 1.5")
case class Divide(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@@ -275,6 +287,8 @@ case class Divide(left: Expression, right: Expression)
}
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns the remainder when dividing a by b.")
case class Remainder(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@@ -464,6 +478,9 @@ case class MinOf(left: Expression, right: Expression)
override def symbol: String = "min"
}
+@ExpressionDescription(
+ usage = "_FUNC_(a, b) - Returns the positive modulo",
+ extended = "> SELECT _FUNC_(10,3);\n 1")
case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
override def toString: String = s"pmod($left, $right)"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 4c90b3f7d3..a7e1cd66f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -26,6 +26,9 @@ import org.apache.spark.sql.types._
*
* Code generation inherited from BinaryArithmetic.
*/
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Bitwise AND.",
+ extended = "> SELECT 3 _FUNC_ 5; 1")
case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@@ -51,6 +54,9 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
*
* Code generation inherited from BinaryArithmetic.
*/
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Bitwise OR.",
+ extended = "> SELECT 3 _FUNC_ 5; 7")
case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@@ -76,6 +82,9 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
*
* Code generation inherited from BinaryArithmetic.
*/
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Bitwise exclusive OR.",
+ extended = "> SELECT 3 _FUNC_ 5; 2")
case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@@ -99,6 +108,9 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
/**
* A function that calculates bitwise not(~) of a number.
*/
+@ExpressionDescription(
+ usage = "_FUNC_ b - Bitwise NOT.",
+ extended = "> SELECT _FUNC_ 0; -1")
case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index e36c985249..ab790cf372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.types._
/**
* Given an array or map, returns its size.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(expr) - Returns the size of an array or a map.")
case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType))
@@ -44,6 +46,11 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
* Sorts the input array in ascending / descending order according to the natural ordering of
* the array elements and returns it.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.",
+ extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 'a', 'b', 'c', 'd'")
+// scalastyle:on line.size.limit
case class SortArray(base: Expression, ascendingOrder: Expression)
extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
@@ -125,6 +132,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
/**
* Checks if the array (left) has the element (right)
*/
+@ExpressionDescription(
+ usage = "_FUNC_(array, value) - Returns TRUE if the array contains value.",
+ extended = " > SELECT _FUNC_(array(1, 2, 3), 2);\n true")
case class ArrayContains(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index c299586dde..74de4a776d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -27,6 +27,8 @@ import org.apache.spark.unsafe.types.UTF8String
/**
* Returns an Array containing the evaluation of all children expressions.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(n0, ...) - Returns an array with the given elements.")
case class CreateArray(children: Seq[Expression]) extends Expression {
override def foldable: Boolean = children.forall(_.foldable)
@@ -73,6 +75,8 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
* Returns a catalyst Map containing the evaluation of all children expressions as keys and values.
* The children are a flatted sequence of kv pairs, e.g. (key1, value1, key2, value2, ...)
*/
+@ExpressionDescription(
+ usage = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs.")
case class CreateMap(children: Seq[Expression]) extends Expression {
private[sql] lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
private[sql] lazy val values = children.indices.filter(_ % 2 != 0).map(children)
@@ -153,6 +157,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
/**
* Returns a Row containing the evaluation of all children expressions.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.")
case class CreateStruct(children: Seq[Expression]) extends Expression {
override def foldable: Boolean = children.forall(_.foldable)
@@ -204,6 +210,10 @@ case class CreateStruct(children: Seq[Expression]) extends Expression {
*
* @param children Seq(name1, val1, name2, val2, ...)
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
+// scalastyle:on line.size.limit
case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 35a7b46020..ae6a94842f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -23,7 +23,10 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
-
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(expr1,expr2,expr3) - If expr1 is TRUE then IF() returns expr2; otherwise it returns expr3.")
+// scalastyle:on line.size.limit
case class If(predicate: Expression, trueValue: Expression, falseValue: Expression)
extends Expression {
@@ -85,6 +88,10 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
* @param branches seq of (branch condition, branch value)
* @param elseValue optional value for the else branch
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END - When a = true, returns b; when c = true, return d; else return e.")
+// scalastyle:on line.size.limit
case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[Expression] = None)
extends Expression with CodegenFallback {
@@ -256,6 +263,8 @@ object CaseKeyWhen {
* A function that returns the least value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(n1, ...) - Returns the least value of all parameters, skipping null values.")
case class Least(children: Seq[Expression]) extends Expression {
override def nullable: Boolean = children.forall(_.nullable)
@@ -315,6 +324,8 @@ case class Least(children: Seq[Expression]) extends Expression {
* A function that returns the greatest value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(n1, ...) - Returns the greatest value of all parameters, skipping null values.")
case class Greatest(children: Seq[Expression]) extends Expression {
override def nullable: Boolean = children.forall(_.nullable)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 1d0ea68d7a..9135753041 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -35,6 +35,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
+@ExpressionDescription(
+ usage = "_FUNC_() - Returns the current date at the start of query evaluation.")
case class CurrentDate() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
@@ -54,6 +56,8 @@ case class CurrentDate() extends LeafExpression with CodegenFallback {
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
+@ExpressionDescription(
+ usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.")
case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
@@ -70,6 +74,9 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
/**
* Adds a number of days to startdate.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days after start_date.",
+ extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-31'")
case class DateAdd(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -96,6 +103,9 @@ case class DateAdd(startDate: Expression, days: Expression)
/**
* Subtracts a number of days to startdate.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days before start_date.",
+ extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-29'")
case class DateSub(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
override def left: Expression = startDate
@@ -118,6 +128,9 @@ case class DateSub(startDate: Expression, days: Expression)
override def prettyName: String = "date_sub"
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the hour component of the string/timestamp/interval.",
+ extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 12")
case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -134,6 +147,9 @@ case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInpu
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the minute component of the string/timestamp/interval.",
+ extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 58")
case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -150,6 +166,9 @@ case class Minute(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the second component of the string/timestamp/interval.",
+ extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 59")
case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -166,6 +185,9 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the day of year of date/timestamp.",
+ extended = "> SELECT _FUNC_('2016-04-09');\n 100")
case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -182,7 +204,9 @@ case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCas
}
}
-
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the year component of the date/timestamp/interval.",
+ extended = "> SELECT _FUNC_('2016-07-30');\n 2016")
case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -199,6 +223,8 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the quarter of the year for date, in the range 1 to 4.")
case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -215,6 +241,9 @@ case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastI
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the month component of the date/timestamp/interval",
+ extended = "> SELECT _FUNC_('2016-07-30');\n 7")
case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -231,6 +260,9 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the day of month of date/timestamp, or the day of interval.",
+ extended = "> SELECT _FUNC_('2009-07-30');\n 30")
case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -247,6 +279,9 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(param) - Returns the week of the year of the given date.",
+ extended = "> SELECT _FUNC_('2008-02-20');\n 8")
case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -283,6 +318,11 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(date/timestamp/string, fmt) - Converts a date/timestamp/string to a value of string in the format specified by the date format fmt.",
+ extended = "> SELECT _FUNC_('2016-04-08', 'y')\n '2016'")
+// scalastyle:on line.size.limit
case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@@ -310,6 +350,8 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
* Converts time string with given pattern.
* Deterministic version of [[UnixTimestamp]], must have at least one parameter.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp of the give time.")
case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
override def left: Expression = timeExp
override def right: Expression = format
@@ -331,6 +373,8 @@ case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends Unix
* If the first parameter is a Date or Timestamp instead of String, we will ignore the
* second parameter.
*/
+@ExpressionDescription(
+ usage = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp of current or specified time.")
case class UnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
override def left: Expression = timeExp
override def right: Expression = format
@@ -459,6 +503,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
* format. If the format is missing, using format like "1970-01-01 00:00:00".
* Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(unix_time, format) - Returns unix_time in the specified format",
+ extended = "> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');\n '1970-01-01 00:00:00'")
case class FromUnixTime(sec: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -544,6 +591,9 @@ case class FromUnixTime(sec: Expression, format: Expression)
/**
* Returns the last day of the month which the date belongs to.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.",
+ extended = "> SELECT _FUNC_('2009-01-12');\n '2009-01-31'")
case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def child: Expression = startDate
@@ -570,6 +620,11 @@ case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitC
*
* Allowed "dayOfWeek" is defined in [[DateTimeUtils.getDayOfWeekFromString]].
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated.",
+ extended = "> SELECT _FUNC_('2015-01-14', 'TU');\n '2015-01-20'")
+// scalastyle:on line.size.limit
case class NextDay(startDate: Expression, dayOfWeek: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -654,6 +709,10 @@ case class TimeAdd(start: Expression, interval: Expression)
/**
* Assumes given timestamp is UTC and converts to given timezone.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is UTC and converts to given timezone.")
+// scalastyle:on line.size.limit
case class FromUTCTimestamp(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -729,6 +788,9 @@ case class TimeSub(start: Expression, interval: Expression)
/**
* Returns the date that is num_months after start_date.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.",
+ extended = "> SELECT _FUNC_('2016-08-31', 1);\n '2016-09-30'")
case class AddMonths(startDate: Expression, numMonths: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -756,6 +818,9 @@ case class AddMonths(startDate: Expression, numMonths: Expression)
/**
* Returns number of months between dates date1 and date2.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2.",
+ extended = "> SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677")
case class MonthsBetween(date1: Expression, date2: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -783,6 +848,10 @@ case class MonthsBetween(date1: Expression, date2: Expression)
/**
* Assumes given timestamp is in given timezone and converts to UTC.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC.")
+// scalastyle:on line.size.limit
case class ToUTCTimestamp(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -830,6 +899,9 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
/**
* Returns the date part of a timestamp or string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(expr) - Extracts the date part of the date or datetime expression expr.",
+ extended = "> SELECT _FUNC_('2009-07-30 04:17:52');\n '2009-07-30'")
case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
// Implicit casting of spark will accept string in both date and timestamp format, as
@@ -850,6 +922,11 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn
/**
* Returns date truncated to the unit specified by the format.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(date, fmt) - Returns returns date with the time portion of the day truncated to the unit specified by the format model fmt.",
+ extended = "> SELECT _FUNC_('2009-02-12', 'MM')\n '2009-02-01'\n> SELECT _FUNC_('2015-10-27', 'YEAR');\n '2015-01-01'")
+// scalastyle:on line.size.limit
case class TruncDate(date: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
override def left: Expression = date
@@ -921,6 +998,9 @@ case class TruncDate(date: Expression, format: Expression)
/**
* Returns the number of days from startDate to endDate.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(date1, date2) - Returns the number of days between date1 and date2.",
+ extended = "> SELECT _FUNC_('2009-07-30', '2009-07-31');\n 1")
case class DateDiff(endDate: Expression, startDate: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index e7ef21aa85..65d7a1d5a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -99,6 +99,10 @@ case class UserDefinedGenerator(
/**
* Given an input array produces a sequence of rows for each value in the array.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns.")
+// scalastyle:on line.size.limit
case class Explode(child: Expression) extends UnaryExpression with Generator with CodegenFallback {
override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 72b323587c..ecd09b7083 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -106,6 +106,8 @@ private[this] object SharedFactory {
* Extracts json object from a json string based on json path specified, and returns json string
* of the extracted json object. It will return null if the input json string is invalid.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(json_txt, path) - Extract a json object from path")
case class GetJsonObject(json: Expression, path: Expression)
extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
@@ -319,6 +321,10 @@ case class GetJsonObject(json: Expression, path: Expression)
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.")
+// scalastyle:on line.size.limit
case class JsonTuple(children: Seq[Expression])
extends Generator with CodegenFallback {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index e3d1bc127d..c8a28e8477 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -50,6 +50,7 @@ abstract class LeafMathExpression(c: Double, name: String)
/**
* A unary expression specifically for math functions. Math Functions expect a specific type of
* input format, therefore these functions extend `ExpectsInputTypes`.
+ *
* @param f The math function.
* @param name The short name of the function
*/
@@ -103,6 +104,7 @@ abstract class UnaryLogExpression(f: Double => Double, name: String)
/**
* A binary expression specifically for math functions that take two `Double`s as input and returns
* a `Double`.
+ *
* @param f The math function.
* @param name The short name of the function
*/
@@ -136,12 +138,18 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String)
* Euler's number. Note that there is no code generation because this is only
* evaluated by the optimizer during constant folding.
*/
+@ExpressionDescription(
+ usage = "_FUNC_() - Returns Euler's number, E.",
+ extended = "> SELECT _FUNC_();\n 2.718281828459045")
case class EulerNumber() extends LeafMathExpression(math.E, "E")
/**
* Pi. Note that there is no code generation because this is only
* evaluated by the optimizer during constant folding.
*/
+@ExpressionDescription(
+ usage = "_FUNC_() - Returns PI.",
+ extended = "> SELECT _FUNC_();\n 3.141592653589793")
case class Pi() extends LeafMathExpression(math.Pi, "PI")
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -150,14 +158,29 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI")
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the arc cosine of x if -1<=x<=1 or NaN otherwise.",
+ extended = "> SELECT _FUNC_(1);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the arc sin of x if -1<=x<=1 or NaN otherwise.",
+ extended = "> SELECT _FUNC_(0);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the arc tangent.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the cube root of a double value.",
+ extended = "> SELECT _FUNC_(27.0);\n 3.0")
case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the smallest integer not smaller than x.",
+ extended = "> SELECT _FUNC_(-0.1);\n 0\n> SELECT _FUNC_(5);\n 5")
case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") {
override def dataType: DataType = child.dataType match {
case dt @ DecimalType.Fixed(_, 0) => dt
@@ -184,16 +207,26 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the cosine of x.",
+ extended = "> SELECT _FUNC_(0);\n 1.0")
case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the hyperbolic cosine of x.",
+ extended = "> SELECT _FUNC_(0);\n 1.0")
case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH")
/**
* Convert a num from one base to another
+ *
* @param numExpr the number to be converted
* @param fromBaseExpr from which base
* @param toBaseExpr to which base
*/
+@ExpressionDescription(
+ usage = "_FUNC_(num, from_base, to_base) - Convert num from from_base to to_base.",
+ extended = "> SELECT _FUNC_('100', 2, 10);\n '4'\n> SELECT _FUNC_(-10, 16, -10);\n '16'")
case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -222,10 +255,19 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns e to the power of x.",
+ extended = "> SELECT _FUNC_(0);\n 1.0")
case class Exp(child: Expression) extends UnaryMathExpression(math.exp, "EXP")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns exp(x) - 1.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Expm1(child: Expression) extends UnaryMathExpression(math.expm1, "EXPM1")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the largest integer not greater than x.",
+ extended = "> SELECT _FUNC_(-0.1);\n -1\n> SELECT _FUNC_(5);\n 5")
case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") {
override def dataType: DataType = child.dataType match {
case dt @ DecimalType.Fixed(_, 0) => dt
@@ -283,6 +325,9 @@ object Factorial {
)
}
+@ExpressionDescription(
+ usage = "_FUNC_(n) - Returns n factorial for n is [0..20]. Otherwise, NULL.",
+ extended = "> SELECT _FUNC_(5);\n 120")
case class Factorial(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[DataType] = Seq(IntegerType)
@@ -315,8 +360,14 @@ case class Factorial(child: Expression) extends UnaryExpression with ImplicitCas
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the natural logarithm of x with base e.",
+ extended = "> SELECT _FUNC_(1);\n 0.0")
case class Log(child: Expression) extends UnaryLogExpression(math.log, "LOG")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the logarithm of x with base 2.",
+ extended = "> SELECT _FUNC_(2);\n 1.0")
case class Log2(child: Expression)
extends UnaryLogExpression((x: Double) => math.log(x) / math.log(2), "LOG2") {
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
@@ -332,36 +383,72 @@ case class Log2(child: Expression)
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the logarithm of x with base 10.",
+ extended = "> SELECT _FUNC_(10);\n 1.0")
case class Log10(child: Expression) extends UnaryLogExpression(math.log10, "LOG10")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns log(1 + x).",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Log1p(child: Expression) extends UnaryLogExpression(math.log1p, "LOG1P") {
protected override val yAsymptote: Double = -1.0
}
+@ExpressionDescription(
+ usage = "_FUNC_(x, d) - Return the rounded x at d decimal places.",
+ extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3")
case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") {
override def funcName: String = "rint"
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the sign of x.",
+ extended = "> SELECT _FUNC_(40);\n 1.0")
case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the sine of x.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the hyperbolic sine of x.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the square root of x.",
+ extended = "> SELECT _FUNC_(4);\n 2.0")
case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the tangent of x.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns the hyperbolic tangent of x.",
+ extended = "> SELECT _FUNC_(0);\n 0.0")
case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH")
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Converts radians to degrees.",
+ extended = "> SELECT _FUNC_(3.141592653589793);\n 180.0")
case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") {
override def funcName: String = "toDegrees"
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Converts degrees to radians.",
+ extended = "> SELECT _FUNC_(180);\n 3.141592653589793")
case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") {
override def funcName: String = "toRadians"
}
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Returns x in binary.",
+ extended = "> SELECT _FUNC_(13);\n '1101'")
case class Bin(child: Expression)
extends UnaryExpression with Serializable with ImplicitCastInputTypes {
@@ -453,6 +540,9 @@ object Hex {
* Otherwise if the number is a STRING, it converts each character into its hex representation
* and returns the resulting STRING. Negative numbers would be treated as two's complement.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Convert the argument to hexadecimal.",
+ extended = "> SELECT _FUNC_(17);\n '11'\n> SELECT _FUNC_('Spark SQL');\n '537061726B2053514C'")
case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] =
@@ -481,6 +571,9 @@ case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInput
* Performs the inverse operation of HEX.
* Resulting characters are returned as a byte array.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(x) - Converts hexadecimal argument to binary.",
+ extended = "> SELECT decode(_FUNC_('537061726B2053514C'),'UTF-8');\n 'Spark SQL'")
case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
@@ -509,7 +602,9 @@ case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInp
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
-
+@ExpressionDescription(
+ usage = "_FUNC_(x,y) - Returns the arc tangent2.",
+ extended = "> SELECT _FUNC_(0, 0);\n 0.0")
case class Atan2(left: Expression, right: Expression)
extends BinaryMathExpression(math.atan2, "ATAN2") {
@@ -523,6 +618,9 @@ case class Atan2(left: Expression, right: Expression)
}
}
+@ExpressionDescription(
+ usage = "_FUNC_(x1, x2) - Raise x1 to the power of x2.",
+ extended = "> SELECT _FUNC_(2, 3);\n 8.0")
case class Pow(left: Expression, right: Expression)
extends BinaryMathExpression(math.pow, "POWER") {
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
@@ -532,10 +630,14 @@ case class Pow(left: Expression, right: Expression)
/**
- * Bitwise unsigned left shift.
+ * Bitwise left shift.
+ *
* @param left the base number to shift.
* @param right number of bits to left shift.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a, b) - Bitwise left shift.",
+ extended = "> SELECT _FUNC_(2, 1);\n 4")
case class ShiftLeft(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -558,10 +660,14 @@ case class ShiftLeft(left: Expression, right: Expression)
/**
- * Bitwise unsigned left shift.
+ * Bitwise right shift.
+ *
* @param left the base number to shift.
- * @param right number of bits to left shift.
+ * @param right number of bits to right shift.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a, b) - Bitwise right shift.",
+ extended = "> SELECT _FUNC_(4, 1);\n 2")
case class ShiftRight(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -585,9 +691,13 @@ case class ShiftRight(left: Expression, right: Expression)
/**
* Bitwise unsigned right shift, for integer and long data type.
+ *
* @param left the base number.
* @param right the number of bits to right shift.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a, b) - Bitwise unsigned right shift.",
+ extended = "> SELECT _FUNC_(4, 1);\n 2")
case class ShiftRightUnsigned(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -608,16 +718,22 @@ case class ShiftRightUnsigned(left: Expression, right: Expression)
}
}
-
+@ExpressionDescription(
+ usage = "_FUNC_(a, b) - Returns sqrt(a**2 + b**2).",
+ extended = "> SELECT _FUNC_(3, 4);\n 5.0")
case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")
/**
* Computes the logarithm of a number.
+ *
* @param left the logarithm base, default to e.
* @param right the number to compute the logarithm of.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(b, x) - Returns the logarithm of x with base b.",
+ extended = "> SELECT _FUNC_(10, 100);\n 2.0")
case class Logarithm(left: Expression, right: Expression)
extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
@@ -674,6 +790,9 @@ case class Logarithm(left: Expression, right: Expression)
* @param child expr to be round, all [[NumericType]] is allowed as Input
* @param scale new scale to be round to, this should be a constant int at runtime
*/
+@ExpressionDescription(
+ usage = "_FUNC_(x, d) - Round x to d decimal places.",
+ extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3")
case class Round(child: Expression, scale: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index eb8dc1423a..4bd918ed01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -438,6 +438,8 @@ abstract class InterpretedHashFunction {
* We should use this hash function for both shuffle and bucket, so that we can guarantee shuffle
* and bucketing have same data distribution.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.")
case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
def this(arguments: Seq[Expression]) = this(arguments, 42)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index e22026d584..6a45249943 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -34,6 +34,9 @@ import org.apache.spark.sql.types._
* coalesce(null, null, null) => null
* }}}
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a1, a2, ...) - Returns the first non-null argument if exists. Otherwise, NULL.",
+ extended = "> SELECT _FUNC_(NULL, 1, NULL);\n 1")
case class Coalesce(children: Seq[Expression]) extends Expression {
/** Coalesce is nullable if all of its children are nullable, or if it has no children. */
@@ -89,6 +92,8 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
/**
* Evaluates to `true` iff it's NaN.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns true if a is NaN and false otherwise.")
case class IsNaN(child: Expression) extends UnaryExpression
with Predicate with ImplicitCastInputTypes {
@@ -126,6 +131,8 @@ case class IsNaN(child: Expression) extends UnaryExpression
* An Expression evaluates to `left` iff it's not NaN, or evaluates to `right` otherwise.
* This Expression is useful for mapping NaN values to null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a,b) - Returns a iff it's not NaN, or b otherwise.")
case class NaNvl(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -180,6 +187,8 @@ case class NaNvl(left: Expression, right: Expression)
/**
* An expression that is evaluated to true if the input is null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns true if a is NULL and false otherwise.")
case class IsNull(child: Expression) extends UnaryExpression with Predicate {
override def nullable: Boolean = false
@@ -201,6 +210,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
/**
* An expression that is evaluated to true if the input is not null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns true if a is not NULL and false otherwise.")
case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 4eb33258ac..38f1210a4e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -88,7 +88,8 @@ trait PredicateHelper {
expr.references.subsetOf(plan.outputSet)
}
-
+@ExpressionDescription(
+ usage = "_FUNC_ a - Logical not")
case class Not(child: Expression)
extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
@@ -109,6 +110,8 @@ case class Not(child: Expression)
/**
* Evaluates to `true` if `list` contains `value`.
*/
+@ExpressionDescription(
+ usage = "expr _FUNC_(val1, val2, ...) - Returns true if expr equals to any valN.")
case class In(value: Expression, list: Seq[Expression]) extends Predicate
with ImplicitCastInputTypes {
@@ -243,6 +246,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
}
}
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Logical AND.")
case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate {
override def inputType: AbstractDataType = BooleanType
@@ -306,7 +311,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
}
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Logical OR.")
case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate {
override def inputType: AbstractDataType = BooleanType
@@ -401,7 +407,8 @@ private[sql] object Equality {
}
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns TRUE if a equals b and false otherwise.")
case class EqualTo(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@@ -426,7 +433,9 @@ case class EqualTo(left: Expression, right: Expression)
}
}
-
+@ExpressionDescription(
+ usage = """a _FUNC_ b - Returns same result with EQUAL(=) operator for non-null operands,
+ but returns TRUE if both are NULL, FALSE if one of the them is NULL.""")
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
override def inputType: AbstractDataType = AnyDataType
@@ -467,7 +476,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
}
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns TRUE if a is less than b.")
case class LessThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@@ -480,7 +490,8 @@ case class LessThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2)
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns TRUE if a is not greater than b.")
case class LessThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@@ -493,7 +504,8 @@ case class LessThanOrEqual(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2)
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns TRUE if a is greater than b.")
case class GreaterThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@@ -506,7 +518,8 @@ case class GreaterThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2)
}
-
+@ExpressionDescription(
+ usage = "a _FUNC_ b - Returns TRUE if a is not smaller than b.")
case class GreaterThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 6be3cbcae6..1ec092a5be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -55,6 +55,8 @@ abstract class RDG extends LeafExpression with Nondeterministic {
}
/** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns a random column with i.i.d. uniformly distributed values in [0, 1).")
case class Rand(seed: Long) extends RDG {
override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
@@ -78,6 +80,8 @@ case class Rand(seed: Long) extends RDG {
}
/** Generate a random column with i.i.d. gaussian random distribution. */
+@ExpressionDescription(
+ usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.")
case class Randn(seed: Long) extends RDG {
override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index b68009331b..85a5429263 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -67,6 +67,8 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
/**
* Simple RegEx pattern matching function
*/
+@ExpressionDescription(
+ usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.")
case class Like(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
@@ -117,7 +119,8 @@ case class Like(left: Expression, right: Expression)
}
}
-
+@ExpressionDescription(
+ usage = "str _FUNC_ regexp - Returns true if str matches regexp and false otherwise.")
case class RLike(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
@@ -169,6 +172,9 @@ case class RLike(left: Expression, right: Expression)
/**
* Splits str around pat (pattern is a regular expression).
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str, regex) - Splits str around occurrences that match regex",
+ extended = "> SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');\n ['one', 'two', 'three']")
case class StringSplit(str: Expression, pattern: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -198,6 +204,9 @@ case class StringSplit(str: Expression, pattern: Expression)
*
* NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str, regexp, rep) - replace all substrings of str that match regexp with rep.",
+ extended = "> SELECT _FUNC_('100-200', '(\\d+)', 'num');\n 'num-num'")
case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -289,6 +298,9 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
*
* NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp.",
+ extended = "> SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);\n '100'")
case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
def this(s: Expression, r: Expression) = this(s, r, Literal(1))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 7e0e7a833b..a17482697d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -35,6 +35,9 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
* An expression that concatenates multiple input strings into a single string.
* If any input is null, concat returns null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN",
+ extended = "> SELECT _FUNC_('Spark','SQL');\n 'SparkSQL'")
case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
@@ -70,6 +73,10 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
*
* Returns null if the separator is null. Otherwise, concat_ws skips all null values.
*/
+@ExpressionDescription(
+ usage =
+ "_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by sep.",
+ extended = "> SELECT _FUNC_(' ', Spark', 'SQL');\n 'Spark SQL'")
case class ConcatWs(children: Seq[Expression])
extends Expression with ImplicitCastInputTypes {
@@ -188,7 +195,7 @@ case class Upper(child: Expression)
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Returns str with all characters changed to lowercase",
- extended = "> SELECT _FUNC_('SparkSql');\n'sparksql'")
+ extended = "> SELECT _FUNC_('SparkSql');\n 'sparksql'")
case class Lower(child: Expression) extends UnaryExpression with String2StringExpression {
override def convert(v: UTF8String): UTF8String = v.toLowerCase
@@ -270,6 +277,11 @@ object StringTranslate {
* The translate will happen when any character in the string matching with the character
* in the `matchingExpr`.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """_FUNC_(input, from, to) - Translates the input string by replacing the characters present in the from string with the corresponding characters in the to string""",
+ extended = "> SELECT _FUNC_('AaBbCc', 'abc', '123');\n 'A1B2C3'")
+// scalastyle:on line.size.limit
case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -325,6 +337,12 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
* delimited list (right). Returns 0, if the string wasn't found or if the given
* string (left) contains a comma.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """_FUNC_(str, str_array) - Returns the index (1-based) of the given string (left) in the comma-delimited list (right).
+ Returns 0, if the string wasn't found or if the given string (left) contains a comma.""",
+ extended = "> SELECT _FUNC_('ab','abc,b,ab,c,def');\n 3")
+// scalastyle:on
case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@@ -347,6 +365,9 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
/**
* A function that trim the spaces from both ends for the specified string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Removes the leading and trailing space characters from str.",
+ extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL'")
case class StringTrim(child: Expression)
extends UnaryExpression with String2StringExpression {
@@ -362,6 +383,9 @@ case class StringTrim(child: Expression)
/**
* A function that trim the spaces from left end for given string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Removes the leading space characters from str.",
+ extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL '")
case class StringTrimLeft(child: Expression)
extends UnaryExpression with String2StringExpression {
@@ -377,6 +401,9 @@ case class StringTrimLeft(child: Expression)
/**
* A function that trim the spaces from right end for given string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Removes the trailing space characters from str.",
+ extended = "> SELECT _FUNC_(' SparkSQL ');\n ' SparkSQL'")
case class StringTrimRight(child: Expression)
extends UnaryExpression with String2StringExpression {
@@ -396,6 +423,9 @@ case class StringTrimRight(child: Expression)
*
* NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str, substr) - Returns the (1-based) index of the first occurrence of substr in str.",
+ extended = "> SELECT _FUNC_('SparkSQL', 'SQL');\n 6")
case class StringInstr(str: Expression, substr: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -422,6 +452,15 @@ case class StringInstr(str: Expression, substr: Expression)
* returned. If count is negative, every to the right of the final delimiter (counting from the
* right) is returned. substring_index performs a case-sensitive match when searching for delim.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """_FUNC_(str, delim, count) - Returns the substring from str before count occurrences of the delimiter delim.
+ If count is positive, everything to the left of the final delimiter (counting from the
+ left) is returned. If count is negative, everything to the right of the final delimiter
+ (counting from the right) is returned. Substring_index performs a case-sensitive match
+ when searching for delim.""",
+ extended = "> SELECT _FUNC_('www.apache.org', '.', 2);\n 'www.apache'")
+// scalastyle:on line.size.limit
case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -445,6 +484,12 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
* A function that returns the position of the first occurrence of substr
* in given string after position pos.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = """_FUNC_(substr, str[, pos]) - Returns the position of the first occurrence of substr in str after position pos.
+ The given pos and return value are 1-based.""",
+ extended = "> SELECT _FUNC_('bar', 'foobarbar', 5);\n 7")
+// scalastyle:on line.size.limit
case class StringLocate(substr: Expression, str: Expression, start: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -510,6 +555,11 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
/**
* Returns str, left-padded with pad to a length of len.
*/
+@ExpressionDescription(
+ usage = """_FUNC_(str, len, pad) - Returns str, left-padded with pad to a length of len.
+ If str is longer than len, the return value is shortened to len characters.""",
+ extended = "> SELECT _FUNC_('hi', 5, '??');\n '???hi'\n" +
+ "> SELECT _FUNC_('hi', 1, '??');\n 'h'")
case class StringLPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -531,6 +581,11 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns str, right-padded with pad to a length of len.
*/
+@ExpressionDescription(
+ usage = """_FUNC_(str, len, pad) - Returns str, right-padded with pad to a length of len.
+ If str is longer than len, the return value is shortened to len characters.""",
+ extended = "> SELECT _FUNC_('hi', 5, '??');\n 'hi???'\n" +
+ "> SELECT _FUNC_('hi', 1, '??');\n 'h'")
case class StringRPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -552,6 +607,11 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns the input formatted according do printf-style format strings
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(String format, Obj... args) - Returns a formatted string from printf-style format strings.",
+ extended = "> SELECT _FUNC_(\"Hello World %d %s\", 100, \"days\");\n 'Hello World 100 days'")
+// scalastyle:on line.size.limit
case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
require(children.nonEmpty, "format_string() should take at least 1 argument")
@@ -642,6 +702,9 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI
/**
* Returns the string which repeat the given string value n times.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str, n) - Returns the string which repeat the given string value n times.",
+ extended = "> SELECT _FUNC_('123', 2);\n '123123'")
case class StringRepeat(str: Expression, times: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -664,6 +727,9 @@ case class StringRepeat(str: Expression, times: Expression)
/**
* Returns the reversed given string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Returns the reversed given string.",
+ extended = "> SELECT _FUNC_('Spark SQL');\n 'LQS krapS'")
case class StringReverse(child: Expression) extends UnaryExpression with String2StringExpression {
override def convert(v: UTF8String): UTF8String = v.reverse()
@@ -677,6 +743,9 @@ case class StringReverse(child: Expression) extends UnaryExpression with String2
/**
* Returns a n spaces string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(n) - Returns a n spaces string.",
+ extended = "> SELECT _FUNC_(2);\n ' '")
case class StringSpace(child: Expression)
extends UnaryExpression with ImplicitCastInputTypes {
@@ -699,7 +768,14 @@ case class StringSpace(child: Expression)
/**
* A function that takes a substring of its first argument starting at a given position.
* Defined for String and Binary types.
+ *
+ * NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
*/
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len.",
+ extended = "> SELECT _FUNC_('Spark SQL', 5);\n 'k SQL'\n> SELECT _FUNC_('Spark SQL', -3);\n 'SQL'\n> SELECT _FUNC_('Spark SQL', 5, 1);\n 'k'")
+// scalastyle:on line.size.limit
case class Substring(str: Expression, pos: Expression, len: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@@ -737,6 +813,9 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
/**
* A function that return the length of the given string or binary expression.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data.",
+ extended = "> SELECT _FUNC_('Spark SQL');\n 9")
case class Length(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -757,6 +836,9 @@ case class Length(child: Expression) extends UnaryExpression with ExpectsInputTy
/**
* A function that return the Levenshtein distance between the two given strings.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str1, str2) - Returns the Levenshtein distance between the two given strings.",
+ extended = "> SELECT _FUNC_('kitten', 'sitting');\n 3")
case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@@ -775,6 +857,9 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
/**
* A function that return soundex code of the given string expression.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Returns soundex code of the string.",
+ extended = "> SELECT _FUNC_('Miller');\n 'M460'")
case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = StringType
@@ -791,6 +876,10 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT
/**
* Returns the numeric value of the first character of str.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Returns the numeric value of the first character of str.",
+ extended = "> SELECT _FUNC_('222');\n 50\n" +
+ "> SELECT _FUNC_(2);\n 50")
case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
@@ -822,6 +911,8 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
/**
* Converts the argument from binary to a base 64 string.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(bin) - Convert the argument from binary to a base 64 string.")
case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = StringType
@@ -844,6 +935,8 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn
/**
* Converts the argument from a base 64 string to BINARY.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - Convert the argument from a base 64 string to binary.")
case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = BinaryType
@@ -865,6 +958,8 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(bin, str) - Decode the first argument using the second argument character set.")
case class Decode(bin: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -894,7 +989,9 @@ case class Decode(bin: Expression, charset: Expression)
* Encodes the first argument into a BINARY using the provided character set
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
-*/
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(str, str) - Encode the first argument using the second argument character set.")
case class Encode(value: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@@ -924,6 +1021,11 @@ case class Encode(value: Expression, charset: Expression)
* and returns the result as a string. If D is 0, the result has no decimal point or
* fractional part.
*/
+@ExpressionDescription(
+ usage = """_FUNC_(X, D) - Formats the number X like '#,###,###.##', rounded to D decimal places.
+ If D is 0, the result has no decimal point or fractional part.
+ This is supposed to function like MySQL's FORMAT.""",
+ extended = "> SELECT _FUNC_(12332.123456, 4);\n '12,332.1235'")
case class FormatNumber(x: Expression, d: Expression)
extends BinaryExpression with ExpectsInputTypes {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index dd648cdb81..695dda269a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -89,6 +89,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
"Function: abcadf not found.")
}
+ test("SPARK-14415: All functions should have own descriptions") {
+ for (f <- sqlContext.sessionState.functionRegistry.listFunction()) {
+ if (!Seq("cube", "grouping", "grouping_id", "rollup", "window").contains(f)) {
+ checkExistence(sql(s"describe function `$f`"), false, "To be added.")
+ }
+ }
+ }
+
test("SPARK-6743: no columns from cache") {
Seq(
(83, 0, 38),
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index f3796a9966..b4886eba7a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -238,7 +238,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkExistence(sql("describe functioN `~`"), true,
"Function: ~",
"Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
- "Usage: To be added.")
+ "Usage: ~ b - Bitwise NOT.")
// Hard coded describe functions
checkExistence(sql("describe function `<>`"), true,