From a7ce473bd0520c71154ed028f295dab64a7485fe Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 10 Apr 2016 11:46:45 -0700 Subject: [SPARK-14415][SQL] All functions should show usages by command `DESC FUNCTION` ## What changes were proposed in this pull request? Currently, many functions do now show usages like the followings. ``` scala> sql("desc function extended `sin`").collect().foreach(println) [Function: sin] [Class: org.apache.spark.sql.catalyst.expressions.Sin] [Usage: To be added.] [Extended Usage: To be added.] ``` This PR adds descriptions for functions and adds a testcase prevent adding function without usage. ``` scala> sql("desc function extended `sin`").collect().foreach(println); [Function: sin] [Class: org.apache.spark.sql.catalyst.expressions.Sin] [Usage: sin(x) - Returns the sine of x.] [Extended Usage: > SELECT sin(0); 0.0] ``` The only exceptions are `cube`, `grouping`, `grouping_id`, `rollup`, `window`. ## How was this patch tested? Pass the Jenkins tests (including new testcases.) Author: Dongjoon Hyun Closes #12185 from dongjoon-hyun/SPARK-14415. --- .../catalyst/expressions/aggregate/Average.scala | 2 + .../expressions/aggregate/CentralMomentAgg.scala | 14 +++ .../sql/catalyst/expressions/aggregate/Corr.scala | 2 + .../sql/catalyst/expressions/aggregate/Count.scala | 6 + .../expressions/aggregate/Covariance.scala | 4 + .../sql/catalyst/expressions/aggregate/First.scala | 5 + .../aggregate/HyperLogLogPlusPlus.scala | 7 +- .../sql/catalyst/expressions/aggregate/Last.scala | 2 + .../sql/catalyst/expressions/aggregate/Max.scala | 2 + .../sql/catalyst/expressions/aggregate/Min.scala | 3 +- .../sql/catalyst/expressions/aggregate/Sum.scala | 2 + .../sql/catalyst/expressions/arithmetic.scala | 23 +++- .../catalyst/expressions/bitwiseExpressions.scala | 12 ++ .../expressions/collectionOperations.scala | 10 ++ .../catalyst/expressions/complexTypeCreator.scala | 10 ++ .../expressions/conditionalExpressions.scala | 13 ++- .../catalyst/expressions/datetimeExpressions.scala | 82 ++++++++++++- .../sql/catalyst/expressions/generators.scala | 4 + .../sql/catalyst/expressions/jsonExpressions.scala | 6 + .../sql/catalyst/expressions/mathExpressions.scala | 129 ++++++++++++++++++++- .../spark/sql/catalyst/expressions/misc.scala | 2 + .../sql/catalyst/expressions/nullExpressions.scala | 11 ++ .../sql/catalyst/expressions/predicates.scala | 29 +++-- .../catalyst/expressions/randomExpressions.scala | 4 + .../catalyst/expressions/regexpExpressions.scala | 14 ++- .../catalyst/expressions/stringExpressions.scala | 106 ++++++++++++++++- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 8 ++ .../spark/sql/hive/execution/SQLQuerySuite.scala | 2 +- 28 files changed, 489 insertions(+), 25 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index 94ac4bf09b..ff70774847 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the mean calculated from values of a group.") case class Average(child: Expression) extends DeclarativeAggregate { override def prettyName: String = "avg" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala index 9d2db45144..17a7c6dce8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala @@ -130,6 +130,10 @@ abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate } // Compute the population standard deviation of a column +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the population standard deviation calculated from values of a group.") +// scalastyle:on line.size.limit case class StddevPop(child: Expression) extends CentralMomentAgg(child) { override protected def momentOrder = 2 @@ -143,6 +147,8 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) { } // Compute the sample standard deviation of a column +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the sample standard deviation calculated from values of a group.") case class StddevSamp(child: Expression) extends CentralMomentAgg(child) { override protected def momentOrder = 2 @@ -157,6 +163,8 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) { } // Compute the population variance of a column +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the population variance calculated from values of a group.") case class VariancePop(child: Expression) extends CentralMomentAgg(child) { override protected def momentOrder = 2 @@ -170,6 +178,8 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) { } // Compute the sample variance of a column +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the sample variance calculated from values of a group.") case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) { override protected def momentOrder = 2 @@ -183,6 +193,8 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) { override def prettyName: String = "var_samp" } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the Skewness value calculated from values of a group.") case class Skewness(child: Expression) extends CentralMomentAgg(child) { override def prettyName: String = "skewness" @@ -196,6 +208,8 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) { } } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the Kurtosis value calculated from values of a group.") case class Kurtosis(child: Expression) extends CentralMomentAgg(child) { override protected def momentOrder = 4 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala index e6b8214ef2..e29265e2f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._ * Definition of Pearson correlation can be found at * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient */ +@ExpressionDescription( + usage = "_FUNC_(x,y) - Returns Pearson coefficient of correlation between a set of number pairs.") case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = Seq(x, y) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index 663c69e799..17ae012af7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -21,6 +21,12 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """_FUNC_(*) - Returns the total number of retrieved rows, including rows containing NULL values. + _FUNC_(expr) - Returns the number of rows for which the supplied expression is non-NULL. + _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.""") +// scalastyle:on line.size.limit case class Count(children: Seq[Expression]) extends DeclarativeAggregate { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala index c175a8c4c7..d80afbebf7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala @@ -76,6 +76,8 @@ abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggre } } +@ExpressionDescription( + usage = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs.") case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) { override val evaluateExpression: Expression = { If(n === Literal(0.0), Literal.create(null, DoubleType), @@ -85,6 +87,8 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance } +@ExpressionDescription( + usage = "_FUNC_(x,y) - Returns the sample covariance of a set of number pairs.") case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) { override val evaluateExpression: Expression = { If(n === Literal(0.0), Literal.create(null, DoubleType), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index 35f57426fe..b8ab0364dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -28,6 +28,11 @@ import org.apache.spark.sql.types._ * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). */ +@ExpressionDescription( + usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows. + _FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows. + If isIgnoreNull is true, returns only non-null values. + """) case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala index b6bd56cff6..1d218da6db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala @@ -20,8 +20,6 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import java.lang.{Long => JLong} import java.util -import com.clearspring.analytics.hash.MurmurHash - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ @@ -48,6 +46,11 @@ import org.apache.spark.sql.types._ * @param relativeSD the maximum estimation error allowed. */ // scalastyle:on +@ExpressionDescription( + usage = """_FUNC_(expr) - Returns the estimated cardinality by HyperLogLog++. + _FUNC_(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++ + with relativeSD, the maximum estimation error allowed. + """) case class HyperLogLogPlusPlus( child: Expression, relativeSD: Double = 0.05, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index be7e12d7a2..b05d74b49b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._ * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). */ +@ExpressionDescription( + usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.") case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala index 906003188d..c534fe495f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the maximum value of expr.") case class Max(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala index 39f7afbd08..35289b4681 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala @@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ - +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the minimum value of expr.") case class Min(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index 08a67ea3df..ad217f25b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the sum calculated from values of a group.") case class Sum(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index b388091538..f3d42fc0b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval - +@ExpressionDescription( + usage = "_FUNC_(a) - Returns -a.") case class UnaryMinus(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -59,6 +60,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression override def sql: String = s"(-${child.sql})" } +@ExpressionDescription( + usage = "_FUNC_(a) - Returns a.") case class UnaryPositive(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { override def prettyName: String = "positive" @@ -79,8 +82,8 @@ case class UnaryPositive(child: Expression) * A function that get the absolute value of the numeric value. */ @ExpressionDescription( - usage = "_FUNC_(expr) - Returns the absolute value of the numeric value", - extended = "> SELECT _FUNC_('-1');\n1") + usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.", + extended = "> SELECT _FUNC_('-1');\n 1") case class Abs(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -126,6 +129,8 @@ private[sql] object BinaryArithmetic { def unapply(e: BinaryArithmetic): Option[(Expression, Expression)] = Some((e.left, e.right)) } +@ExpressionDescription( + usage = "a _FUNC_ b - Returns a+b.") case class Add(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { override def inputType: AbstractDataType = TypeCollection.NumericAndInterval @@ -155,6 +160,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic wit } } +@ExpressionDescription( + usage = "a _FUNC_ b - Returns a-b.") case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { @@ -185,6 +192,8 @@ case class Subtract(left: Expression, right: Expression) } } +@ExpressionDescription( + usage = "a _FUNC_ b - Multiplies a by b.") case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { @@ -198,6 +207,9 @@ case class Multiply(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2) } +@ExpressionDescription( + usage = "a _FUNC_ b - Divides a by b.", + extended = "> SELECT 3 _FUNC_ 2;\n 1.5") case class Divide(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { @@ -275,6 +287,8 @@ case class Divide(left: Expression, right: Expression) } } +@ExpressionDescription( + usage = "a _FUNC_ b - Returns the remainder when dividing a by b.") case class Remainder(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { @@ -464,6 +478,9 @@ case class MinOf(left: Expression, right: Expression) override def symbol: String = "min" } +@ExpressionDescription( + usage = "_FUNC_(a, b) - Returns the positive modulo", + extended = "> SELECT _FUNC_(10,3);\n 1") case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant { override def toString: String = s"pmod($left, $right)" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index 4c90b3f7d3..a7e1cd66f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -26,6 +26,9 @@ import org.apache.spark.sql.types._ * * Code generation inherited from BinaryArithmetic. */ +@ExpressionDescription( + usage = "a _FUNC_ b - Bitwise AND.", + extended = "> SELECT 3 _FUNC_ 5; 1") case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -51,6 +54,9 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme * * Code generation inherited from BinaryArithmetic. */ +@ExpressionDescription( + usage = "a _FUNC_ b - Bitwise OR.", + extended = "> SELECT 3 _FUNC_ 5; 7") case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -76,6 +82,9 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet * * Code generation inherited from BinaryArithmetic. */ +@ExpressionDescription( + usage = "a _FUNC_ b - Bitwise exclusive OR.", + extended = "> SELECT 3 _FUNC_ 5; 2") case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -99,6 +108,9 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme /** * A function that calculates bitwise not(~) of a number. */ +@ExpressionDescription( + usage = "_FUNC_ b - Bitwise NOT.", + extended = "> SELECT _FUNC_ 0; -1") case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index e36c985249..ab790cf372 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -26,6 +26,8 @@ import org.apache.spark.sql.types._ /** * Given an array or map, returns its size. */ +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the size of an array or a map.") case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes { override def dataType: DataType = IntegerType override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType)) @@ -44,6 +46,11 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType * Sorts the input array in ascending / descending order according to the natural ordering of * the array elements and returns it. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.", + extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 'a', 'b', 'c', 'd'") +// scalastyle:on line.size.limit case class SortArray(base: Expression, ascendingOrder: Expression) extends BinaryExpression with ExpectsInputTypes with CodegenFallback { @@ -125,6 +132,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression) /** * Checks if the array (left) has the element (right) */ +@ExpressionDescription( + usage = "_FUNC_(array, value) - Returns TRUE if the array contains value.", + extended = " > SELECT _FUNC_(array(1, 2, 3), 2);\n true") case class ArrayContains(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index c299586dde..74de4a776d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -27,6 +27,8 @@ import org.apache.spark.unsafe.types.UTF8String /** * Returns an Array containing the evaluation of all children expressions. */ +@ExpressionDescription( + usage = "_FUNC_(n0, ...) - Returns an array with the given elements.") case class CreateArray(children: Seq[Expression]) extends Expression { override def foldable: Boolean = children.forall(_.foldable) @@ -73,6 +75,8 @@ case class CreateArray(children: Seq[Expression]) extends Expression { * Returns a catalyst Map containing the evaluation of all children expressions as keys and values. * The children are a flatted sequence of kv pairs, e.g. (key1, value1, key2, value2, ...) */ +@ExpressionDescription( + usage = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs.") case class CreateMap(children: Seq[Expression]) extends Expression { private[sql] lazy val keys = children.indices.filter(_ % 2 == 0).map(children) private[sql] lazy val values = children.indices.filter(_ % 2 != 0).map(children) @@ -153,6 +157,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression { /** * Returns a Row containing the evaluation of all children expressions. */ +@ExpressionDescription( + usage = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.") case class CreateStruct(children: Seq[Expression]) extends Expression { override def foldable: Boolean = children.forall(_.foldable) @@ -204,6 +210,10 @@ case class CreateStruct(children: Seq[Expression]) extends Expression { * * @param children Seq(name1, val1, name2, val2, ...) */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.") +// scalastyle:on line.size.limit case class CreateNamedStruct(children: Seq[Expression]) extends Expression { /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 35a7b46020..ae6a94842f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -23,7 +23,10 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ - +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr1,expr2,expr3) - If expr1 is TRUE then IF() returns expr2; otherwise it returns expr3.") +// scalastyle:on line.size.limit case class If(predicate: Expression, trueValue: Expression, falseValue: Expression) extends Expression { @@ -85,6 +88,10 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi * @param branches seq of (branch condition, branch value) * @param elseValue optional value for the else branch */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END - When a = true, returns b; when c = true, return d; else return e.") +// scalastyle:on line.size.limit case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[Expression] = None) extends Expression with CodegenFallback { @@ -256,6 +263,8 @@ object CaseKeyWhen { * A function that returns the least value of all parameters, skipping null values. * It takes at least 2 parameters, and returns null iff all parameters are null. */ +@ExpressionDescription( + usage = "_FUNC_(n1, ...) - Returns the least value of all parameters, skipping null values.") case class Least(children: Seq[Expression]) extends Expression { override def nullable: Boolean = children.forall(_.nullable) @@ -315,6 +324,8 @@ case class Least(children: Seq[Expression]) extends Expression { * A function that returns the greatest value of all parameters, skipping null values. * It takes at least 2 parameters, and returns null iff all parameters are null. */ +@ExpressionDescription( + usage = "_FUNC_(n1, ...) - Returns the greatest value of all parameters, skipping null values.") case class Greatest(children: Seq[Expression]) extends Expression { override def nullable: Boolean = children.forall(_.nullable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 1d0ea68d7a..9135753041 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -35,6 +35,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} * * There is no code generation since this expression should get constant folded by the optimizer. */ +@ExpressionDescription( + usage = "_FUNC_() - Returns the current date at the start of query evaluation.") case class CurrentDate() extends LeafExpression with CodegenFallback { override def foldable: Boolean = true override def nullable: Boolean = false @@ -54,6 +56,8 @@ case class CurrentDate() extends LeafExpression with CodegenFallback { * * There is no code generation since this expression should get constant folded by the optimizer. */ +@ExpressionDescription( + usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.") case class CurrentTimestamp() extends LeafExpression with CodegenFallback { override def foldable: Boolean = true override def nullable: Boolean = false @@ -70,6 +74,9 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback { /** * Adds a number of days to startdate. */ +@ExpressionDescription( + usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days after start_date.", + extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-31'") case class DateAdd(startDate: Expression, days: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -96,6 +103,9 @@ case class DateAdd(startDate: Expression, days: Expression) /** * Subtracts a number of days to startdate. */ +@ExpressionDescription( + usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days before start_date.", + extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-29'") case class DateSub(startDate: Expression, days: Expression) extends BinaryExpression with ImplicitCastInputTypes { override def left: Expression = startDate @@ -118,6 +128,9 @@ case class DateSub(startDate: Expression, days: Expression) override def prettyName: String = "date_sub" } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the hour component of the string/timestamp/interval.", + extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 12") case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) @@ -134,6 +147,9 @@ case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInpu } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the minute component of the string/timestamp/interval.", + extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 58") case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) @@ -150,6 +166,9 @@ case class Minute(child: Expression) extends UnaryExpression with ImplicitCastIn } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the second component of the string/timestamp/interval.", + extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 59") case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) @@ -166,6 +185,9 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the day of year of date/timestamp.", + extended = "> SELECT _FUNC_('2016-04-09');\n 100") case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -182,7 +204,9 @@ case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCas } } - +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the year component of the date/timestamp/interval.", + extended = "> SELECT _FUNC_('2016-07-30');\n 2016") case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -199,6 +223,8 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the quarter of the year for date, in the range 1 to 4.") case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -215,6 +241,9 @@ case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastI } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the month component of the date/timestamp/interval", + extended = "> SELECT _FUNC_('2016-07-30');\n 7") case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -231,6 +260,9 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the day of month of date/timestamp, or the day of interval.", + extended = "> SELECT _FUNC_('2009-07-30');\n 30") case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -247,6 +279,9 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa } } +@ExpressionDescription( + usage = "_FUNC_(param) - Returns the week of the year of the given date.", + extended = "> SELECT _FUNC_('2008-02-20');\n 8") case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -283,6 +318,11 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(date/timestamp/string, fmt) - Converts a date/timestamp/string to a value of string in the format specified by the date format fmt.", + extended = "> SELECT _FUNC_('2016-04-08', 'y')\n '2016'") +// scalastyle:on line.size.limit case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -310,6 +350,8 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx * Converts time string with given pattern. * Deterministic version of [[UnixTimestamp]], must have at least one parameter. */ +@ExpressionDescription( + usage = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp of the give time.") case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime { override def left: Expression = timeExp override def right: Expression = format @@ -331,6 +373,8 @@ case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends Unix * If the first parameter is a Date or Timestamp instead of String, we will ignore the * second parameter. */ +@ExpressionDescription( + usage = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp of current or specified time.") case class UnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime { override def left: Expression = timeExp override def right: Expression = format @@ -459,6 +503,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { * format. If the format is missing, using format like "1970-01-01 00:00:00". * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null. */ +@ExpressionDescription( + usage = "_FUNC_(unix_time, format) - Returns unix_time in the specified format", + extended = "> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');\n '1970-01-01 00:00:00'") case class FromUnixTime(sec: Expression, format: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -544,6 +591,9 @@ case class FromUnixTime(sec: Expression, format: Expression) /** * Returns the last day of the month which the date belongs to. */ +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.", + extended = "> SELECT _FUNC_('2009-01-12');\n '2009-01-31'") case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def child: Expression = startDate @@ -570,6 +620,11 @@ case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitC * * Allowed "dayOfWeek" is defined in [[DateTimeUtils.getDayOfWeekFromString]]. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated.", + extended = "> SELECT _FUNC_('2015-01-14', 'TU');\n '2015-01-20'") +// scalastyle:on line.size.limit case class NextDay(startDate: Expression, dayOfWeek: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -654,6 +709,10 @@ case class TimeAdd(start: Expression, interval: Expression) /** * Assumes given timestamp is UTC and converts to given timezone. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is UTC and converts to given timezone.") +// scalastyle:on line.size.limit case class FromUTCTimestamp(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -729,6 +788,9 @@ case class TimeSub(start: Expression, interval: Expression) /** * Returns the date that is num_months after start_date. */ +@ExpressionDescription( + usage = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.", + extended = "> SELECT _FUNC_('2016-08-31', 1);\n '2016-09-30'") case class AddMonths(startDate: Expression, numMonths: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -756,6 +818,9 @@ case class AddMonths(startDate: Expression, numMonths: Expression) /** * Returns number of months between dates date1 and date2. */ +@ExpressionDescription( + usage = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2.", + extended = "> SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677") case class MonthsBetween(date1: Expression, date2: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -783,6 +848,10 @@ case class MonthsBetween(date1: Expression, date2: Expression) /** * Assumes given timestamp is in given timezone and converts to UTC. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC.") +// scalastyle:on line.size.limit case class ToUTCTimestamp(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -830,6 +899,9 @@ case class ToUTCTimestamp(left: Expression, right: Expression) /** * Returns the date part of a timestamp or string. */ +@ExpressionDescription( + usage = "_FUNC_(expr) - Extracts the date part of the date or datetime expression expr.", + extended = "> SELECT _FUNC_('2009-07-30 04:17:52');\n '2009-07-30'") case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { // Implicit casting of spark will accept string in both date and timestamp format, as @@ -850,6 +922,11 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn /** * Returns date truncated to the unit specified by the format. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(date, fmt) - Returns returns date with the time portion of the day truncated to the unit specified by the format model fmt.", + extended = "> SELECT _FUNC_('2009-02-12', 'MM')\n '2009-02-01'\n> SELECT _FUNC_('2015-10-27', 'YEAR');\n '2015-01-01'") +// scalastyle:on line.size.limit case class TruncDate(date: Expression, format: Expression) extends BinaryExpression with ImplicitCastInputTypes { override def left: Expression = date @@ -921,6 +998,9 @@ case class TruncDate(date: Expression, format: Expression) /** * Returns the number of days from startDate to endDate. */ +@ExpressionDescription( + usage = "_FUNC_(date1, date2) - Returns the number of days between date1 and date2.", + extended = "> SELECT _FUNC_('2009-07-30', '2009-07-31');\n 1") case class DateDiff(endDate: Expression, startDate: Expression) extends BinaryExpression with ImplicitCastInputTypes { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index e7ef21aa85..65d7a1d5a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -99,6 +99,10 @@ case class UserDefinedGenerator( /** * Given an input array produces a sequence of rows for each value in the array. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(a) - Separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns.") +// scalastyle:on line.size.limit case class Explode(child: Expression) extends UnaryExpression with Generator with CodegenFallback { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 72b323587c..ecd09b7083 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -106,6 +106,8 @@ private[this] object SharedFactory { * Extracts json object from a json string based on json path specified, and returns json string * of the extracted json object. It will return null if the input json string is invalid. */ +@ExpressionDescription( + usage = "_FUNC_(json_txt, path) - Extract a json object from path") case class GetJsonObject(json: Expression, path: Expression) extends BinaryExpression with ExpectsInputTypes with CodegenFallback { @@ -319,6 +321,10 @@ case class GetJsonObject(json: Expression, path: Expression) } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.") +// scalastyle:on line.size.limit case class JsonTuple(children: Seq[Expression]) extends Generator with CodegenFallback { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index e3d1bc127d..c8a28e8477 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -50,6 +50,7 @@ abstract class LeafMathExpression(c: Double, name: String) /** * A unary expression specifically for math functions. Math Functions expect a specific type of * input format, therefore these functions extend `ExpectsInputTypes`. + * * @param f The math function. * @param name The short name of the function */ @@ -103,6 +104,7 @@ abstract class UnaryLogExpression(f: Double => Double, name: String) /** * A binary expression specifically for math functions that take two `Double`s as input and returns * a `Double`. + * * @param f The math function. * @param name The short name of the function */ @@ -136,12 +138,18 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) * Euler's number. Note that there is no code generation because this is only * evaluated by the optimizer during constant folding. */ +@ExpressionDescription( + usage = "_FUNC_() - Returns Euler's number, E.", + extended = "> SELECT _FUNC_();\n 2.718281828459045") case class EulerNumber() extends LeafMathExpression(math.E, "E") /** * Pi. Note that there is no code generation because this is only * evaluated by the optimizer during constant folding. */ +@ExpressionDescription( + usage = "_FUNC_() - Returns PI.", + extended = "> SELECT _FUNC_();\n 3.141592653589793") case class Pi() extends LeafMathExpression(math.Pi, "PI") //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -150,14 +158,29 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI") //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the arc cosine of x if -1<=x<=1 or NaN otherwise.", + extended = "> SELECT _FUNC_(1);\n 0.0\n> SELECT _FUNC_(2);\n NaN") case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the arc sin of x if -1<=x<=1 or NaN otherwise.", + extended = "> SELECT _FUNC_(0);\n 0.0\n> SELECT _FUNC_(2);\n NaN") case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the arc tangent.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the cube root of a double value.", + extended = "> SELECT _FUNC_(27.0);\n 3.0") case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the smallest integer not smaller than x.", + extended = "> SELECT _FUNC_(-0.1);\n 0\n> SELECT _FUNC_(5);\n 5") case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") { override def dataType: DataType = child.dataType match { case dt @ DecimalType.Fixed(_, 0) => dt @@ -184,16 +207,26 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL" } } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the cosine of x.", + extended = "> SELECT _FUNC_(0);\n 1.0") case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the hyperbolic cosine of x.", + extended = "> SELECT _FUNC_(0);\n 1.0") case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH") /** * Convert a num from one base to another + * * @param numExpr the number to be converted * @param fromBaseExpr from which base * @param toBaseExpr to which base */ +@ExpressionDescription( + usage = "_FUNC_(num, from_base, to_base) - Convert num from from_base to to_base.", + extended = "> SELECT _FUNC_('100', 2, 10);\n '4'\n> SELECT _FUNC_(-10, 16, -10);\n '16'") case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -222,10 +255,19 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre } } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns e to the power of x.", + extended = "> SELECT _FUNC_(0);\n 1.0") case class Exp(child: Expression) extends UnaryMathExpression(math.exp, "EXP") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns exp(x) - 1.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Expm1(child: Expression) extends UnaryMathExpression(math.expm1, "EXPM1") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the largest integer not greater than x.", + extended = "> SELECT _FUNC_(-0.1);\n -1\n> SELECT _FUNC_(5);\n 5") case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") { override def dataType: DataType = child.dataType match { case dt @ DecimalType.Fixed(_, 0) => dt @@ -283,6 +325,9 @@ object Factorial { ) } +@ExpressionDescription( + usage = "_FUNC_(n) - Returns n factorial for n is [0..20]. Otherwise, NULL.", + extended = "> SELECT _FUNC_(5);\n 120") case class Factorial(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[DataType] = Seq(IntegerType) @@ -315,8 +360,14 @@ case class Factorial(child: Expression) extends UnaryExpression with ImplicitCas } } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the natural logarithm of x with base e.", + extended = "> SELECT _FUNC_(1);\n 0.0") case class Log(child: Expression) extends UnaryLogExpression(math.log, "LOG") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the logarithm of x with base 2.", + extended = "> SELECT _FUNC_(2);\n 1.0") case class Log2(child: Expression) extends UnaryLogExpression((x: Double) => math.log(x) / math.log(2), "LOG2") { override def genCode(ctx: CodegenContext, ev: ExprCode): String = { @@ -332,36 +383,72 @@ case class Log2(child: Expression) } } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the logarithm of x with base 10.", + extended = "> SELECT _FUNC_(10);\n 1.0") case class Log10(child: Expression) extends UnaryLogExpression(math.log10, "LOG10") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns log(1 + x).", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Log1p(child: Expression) extends UnaryLogExpression(math.log1p, "LOG1P") { protected override val yAsymptote: Double = -1.0 } +@ExpressionDescription( + usage = "_FUNC_(x, d) - Return the rounded x at d decimal places.", + extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3") case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") { override def funcName: String = "rint" } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the sign of x.", + extended = "> SELECT _FUNC_(40);\n 1.0") case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the sine of x.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the hyperbolic sine of x.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the square root of x.", + extended = "> SELECT _FUNC_(4);\n 2.0") case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the tangent of x.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN") +@ExpressionDescription( + usage = "_FUNC_(x) - Returns the hyperbolic tangent of x.", + extended = "> SELECT _FUNC_(0);\n 0.0") case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH") +@ExpressionDescription( + usage = "_FUNC_(x) - Converts radians to degrees.", + extended = "> SELECT _FUNC_(3.141592653589793);\n 180.0") case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") { override def funcName: String = "toDegrees" } +@ExpressionDescription( + usage = "_FUNC_(x) - Converts degrees to radians.", + extended = "> SELECT _FUNC_(180);\n 3.141592653589793") case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") { override def funcName: String = "toRadians" } +@ExpressionDescription( + usage = "_FUNC_(x) - Returns x in binary.", + extended = "> SELECT _FUNC_(13);\n '1101'") case class Bin(child: Expression) extends UnaryExpression with Serializable with ImplicitCastInputTypes { @@ -453,6 +540,9 @@ object Hex { * Otherwise if the number is a STRING, it converts each character into its hex representation * and returns the resulting STRING. Negative numbers would be treated as two's complement. */ +@ExpressionDescription( + usage = "_FUNC_(x) - Convert the argument to hexadecimal.", + extended = "> SELECT _FUNC_(17);\n '11'\n> SELECT _FUNC_('Spark SQL');\n '537061726B2053514C'") case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = @@ -481,6 +571,9 @@ case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInput * Performs the inverse operation of HEX. * Resulting characters are returned as a byte array. */ +@ExpressionDescription( + usage = "_FUNC_(x) - Converts hexadecimal argument to binary.", + extended = "> SELECT decode(_FUNC_('537061726B2053514C'),'UTF-8');\n 'Spark SQL'") case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(StringType) @@ -509,7 +602,9 @@ case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInp //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// - +@ExpressionDescription( + usage = "_FUNC_(x,y) - Returns the arc tangent2.", + extended = "> SELECT _FUNC_(0, 0);\n 0.0") case class Atan2(left: Expression, right: Expression) extends BinaryMathExpression(math.atan2, "ATAN2") { @@ -523,6 +618,9 @@ case class Atan2(left: Expression, right: Expression) } } +@ExpressionDescription( + usage = "_FUNC_(x1, x2) - Raise x1 to the power of x2.", + extended = "> SELECT _FUNC_(2, 3);\n 8.0") case class Pow(left: Expression, right: Expression) extends BinaryMathExpression(math.pow, "POWER") { override def genCode(ctx: CodegenContext, ev: ExprCode): String = { @@ -532,10 +630,14 @@ case class Pow(left: Expression, right: Expression) /** - * Bitwise unsigned left shift. + * Bitwise left shift. + * * @param left the base number to shift. * @param right number of bits to left shift. */ +@ExpressionDescription( + usage = "_FUNC_(a, b) - Bitwise left shift.", + extended = "> SELECT _FUNC_(2, 1);\n 4") case class ShiftLeft(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -558,10 +660,14 @@ case class ShiftLeft(left: Expression, right: Expression) /** - * Bitwise unsigned left shift. + * Bitwise right shift. + * * @param left the base number to shift. - * @param right number of bits to left shift. + * @param right number of bits to right shift. */ +@ExpressionDescription( + usage = "_FUNC_(a, b) - Bitwise right shift.", + extended = "> SELECT _FUNC_(4, 1);\n 2") case class ShiftRight(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -585,9 +691,13 @@ case class ShiftRight(left: Expression, right: Expression) /** * Bitwise unsigned right shift, for integer and long data type. + * * @param left the base number. * @param right the number of bits to right shift. */ +@ExpressionDescription( + usage = "_FUNC_(a, b) - Bitwise unsigned right shift.", + extended = "> SELECT _FUNC_(4, 1);\n 2") case class ShiftRightUnsigned(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -608,16 +718,22 @@ case class ShiftRightUnsigned(left: Expression, right: Expression) } } - +@ExpressionDescription( + usage = "_FUNC_(a, b) - Returns sqrt(a**2 + b**2).", + extended = "> SELECT _FUNC_(3, 4);\n 5.0") case class Hypot(left: Expression, right: Expression) extends BinaryMathExpression(math.hypot, "HYPOT") /** * Computes the logarithm of a number. + * * @param left the logarithm base, default to e. * @param right the number to compute the logarithm of. */ +@ExpressionDescription( + usage = "_FUNC_(b, x) - Returns the logarithm of x with base b.", + extended = "> SELECT _FUNC_(10, 100);\n 2.0") case class Logarithm(left: Expression, right: Expression) extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") { @@ -674,6 +790,9 @@ case class Logarithm(left: Expression, right: Expression) * @param child expr to be round, all [[NumericType]] is allowed as Input * @param scale new scale to be round to, this should be a constant int at runtime */ +@ExpressionDescription( + usage = "_FUNC_(x, d) - Round x to d decimal places.", + extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3") case class Round(child: Expression, scale: Expression) extends BinaryExpression with ImplicitCastInputTypes { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index eb8dc1423a..4bd918ed01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -438,6 +438,8 @@ abstract class InterpretedHashFunction { * We should use this hash function for both shuffle and bucket, so that we can guarantee shuffle * and bucketing have same data distribution. */ +@ExpressionDescription( + usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.") case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] { def this(arguments: Seq[Expression]) = this(arguments, 42) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index e22026d584..6a45249943 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -34,6 +34,9 @@ import org.apache.spark.sql.types._ * coalesce(null, null, null) => null * }}} */ +@ExpressionDescription( + usage = "_FUNC_(a1, a2, ...) - Returns the first non-null argument if exists. Otherwise, NULL.", + extended = "> SELECT _FUNC_(NULL, 1, NULL);\n 1") case class Coalesce(children: Seq[Expression]) extends Expression { /** Coalesce is nullable if all of its children are nullable, or if it has no children. */ @@ -89,6 +92,8 @@ case class Coalesce(children: Seq[Expression]) extends Expression { /** * Evaluates to `true` iff it's NaN. */ +@ExpressionDescription( + usage = "_FUNC_(a) - Returns true if a is NaN and false otherwise.") case class IsNaN(child: Expression) extends UnaryExpression with Predicate with ImplicitCastInputTypes { @@ -126,6 +131,8 @@ case class IsNaN(child: Expression) extends UnaryExpression * An Expression evaluates to `left` iff it's not NaN, or evaluates to `right` otherwise. * This Expression is useful for mapping NaN values to null. */ +@ExpressionDescription( + usage = "_FUNC_(a,b) - Returns a iff it's not NaN, or b otherwise.") case class NaNvl(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -180,6 +187,8 @@ case class NaNvl(left: Expression, right: Expression) /** * An expression that is evaluated to true if the input is null. */ +@ExpressionDescription( + usage = "_FUNC_(a) - Returns true if a is NULL and false otherwise.") case class IsNull(child: Expression) extends UnaryExpression with Predicate { override def nullable: Boolean = false @@ -201,6 +210,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate { /** * An expression that is evaluated to true if the input is not null. */ +@ExpressionDescription( + usage = "_FUNC_(a) - Returns true if a is not NULL and false otherwise.") case class IsNotNull(child: Expression) extends UnaryExpression with Predicate { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 4eb33258ac..38f1210a4e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -88,7 +88,8 @@ trait PredicateHelper { expr.references.subsetOf(plan.outputSet) } - +@ExpressionDescription( + usage = "_FUNC_ a - Logical not") case class Not(child: Expression) extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant { @@ -109,6 +110,8 @@ case class Not(child: Expression) /** * Evaluates to `true` if `list` contains `value`. */ +@ExpressionDescription( + usage = "expr _FUNC_(val1, val2, ...) - Returns true if expr equals to any valN.") case class In(value: Expression, list: Seq[Expression]) extends Predicate with ImplicitCastInputTypes { @@ -243,6 +246,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with } } +@ExpressionDescription( + usage = "a _FUNC_ b - Logical AND.") case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -306,7 +311,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with } } - +@ExpressionDescription( + usage = "a _FUNC_ b - Logical OR.") case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -401,7 +407,8 @@ private[sql] object Equality { } } - +@ExpressionDescription( + usage = "a _FUNC_ b - Returns TRUE if a equals b and false otherwise.") case class EqualTo(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -426,7 +433,9 @@ case class EqualTo(left: Expression, right: Expression) } } - +@ExpressionDescription( + usage = """a _FUNC_ b - Returns same result with EQUAL(=) operator for non-null operands, + but returns TRUE if both are NULL, FALSE if one of the them is NULL.""") case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison { override def inputType: AbstractDataType = AnyDataType @@ -467,7 +476,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp } } - +@ExpressionDescription( + usage = "a _FUNC_ b - Returns TRUE if a is less than b.") case class LessThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -480,7 +490,8 @@ case class LessThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2) } - +@ExpressionDescription( + usage = "a _FUNC_ b - Returns TRUE if a is not greater than b.") case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -493,7 +504,8 @@ case class LessThanOrEqual(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2) } - +@ExpressionDescription( + usage = "a _FUNC_ b - Returns TRUE if a is greater than b.") case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -506,7 +518,8 @@ case class GreaterThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2) } - +@ExpressionDescription( + usage = "a _FUNC_ b - Returns TRUE if a is not smaller than b.") case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 6be3cbcae6..1ec092a5be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -55,6 +55,8 @@ abstract class RDG extends LeafExpression with Nondeterministic { } /** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */ +@ExpressionDescription( + usage = "_FUNC_(a) - Returns a random column with i.i.d. uniformly distributed values in [0, 1).") case class Rand(seed: Long) extends RDG { override protected def evalInternal(input: InternalRow): Double = rng.nextDouble() @@ -78,6 +80,8 @@ case class Rand(seed: Long) extends RDG { } /** Generate a random column with i.i.d. gaussian random distribution. */ +@ExpressionDescription( + usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.") case class Randn(seed: Long) extends RDG { override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index b68009331b..85a5429263 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -67,6 +67,8 @@ trait StringRegexExpression extends ImplicitCastInputTypes { /** * Simple RegEx pattern matching function */ +@ExpressionDescription( + usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.") case class Like(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { @@ -117,7 +119,8 @@ case class Like(left: Expression, right: Expression) } } - +@ExpressionDescription( + usage = "str _FUNC_ regexp - Returns true if str matches regexp and false otherwise.") case class RLike(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { @@ -169,6 +172,9 @@ case class RLike(left: Expression, right: Expression) /** * Splits str around pat (pattern is a regular expression). */ +@ExpressionDescription( + usage = "_FUNC_(str, regex) - Splits str around occurrences that match regex", + extended = "> SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');\n ['one', 'two', 'three']") case class StringSplit(str: Expression, pattern: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -198,6 +204,9 @@ case class StringSplit(str: Expression, pattern: Expression) * * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status. */ +@ExpressionDescription( + usage = "_FUNC_(str, regexp, rep) - replace all substrings of str that match regexp with rep.", + extended = "> SELECT _FUNC_('100-200', '(\\d+)', 'num');\n 'num-num'") case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -289,6 +298,9 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio * * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status. */ +@ExpressionDescription( + usage = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp.", + extended = "> SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);\n '100'") case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression) extends TernaryExpression with ImplicitCastInputTypes { def this(s: Expression, r: Expression) = this(s, r, Literal(1)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 7e0e7a833b..a17482697d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -35,6 +35,9 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String} * An expression that concatenates multiple input strings into a single string. * If any input is null, concat returns null. */ +@ExpressionDescription( + usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN", + extended = "> SELECT _FUNC_('Spark','SQL');\n 'SparkSQL'") case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType) @@ -70,6 +73,10 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas * * Returns null if the separator is null. Otherwise, concat_ws skips all null values. */ +@ExpressionDescription( + usage = + "_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by sep.", + extended = "> SELECT _FUNC_(' ', Spark', 'SQL');\n 'Spark SQL'") case class ConcatWs(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes { @@ -188,7 +195,7 @@ case class Upper(child: Expression) */ @ExpressionDescription( usage = "_FUNC_(str) - Returns str with all characters changed to lowercase", - extended = "> SELECT _FUNC_('SparkSql');\n'sparksql'") + extended = "> SELECT _FUNC_('SparkSql');\n 'sparksql'") case class Lower(child: Expression) extends UnaryExpression with String2StringExpression { override def convert(v: UTF8String): UTF8String = v.toLowerCase @@ -270,6 +277,11 @@ object StringTranslate { * The translate will happen when any character in the string matching with the character * in the `matchingExpr`. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """_FUNC_(input, from, to) - Translates the input string by replacing the characters present in the from string with the corresponding characters in the to string""", + extended = "> SELECT _FUNC_('AaBbCc', 'abc', '123');\n 'A1B2C3'") +// scalastyle:on line.size.limit case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -325,6 +337,12 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac * delimited list (right). Returns 0, if the string wasn't found or if the given * string (left) contains a comma. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """_FUNC_(str, str_array) - Returns the index (1-based) of the given string (left) in the comma-delimited list (right). + Returns 0, if the string wasn't found or if the given string (left) contains a comma.""", + extended = "> SELECT _FUNC_('ab','abc,b,ab,c,def');\n 3") +// scalastyle:on case class FindInSet(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -347,6 +365,9 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi /** * A function that trim the spaces from both ends for the specified string. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Removes the leading and trailing space characters from str.", + extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL'") case class StringTrim(child: Expression) extends UnaryExpression with String2StringExpression { @@ -362,6 +383,9 @@ case class StringTrim(child: Expression) /** * A function that trim the spaces from left end for given string. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Removes the leading space characters from str.", + extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL '") case class StringTrimLeft(child: Expression) extends UnaryExpression with String2StringExpression { @@ -377,6 +401,9 @@ case class StringTrimLeft(child: Expression) /** * A function that trim the spaces from right end for given string. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Removes the trailing space characters from str.", + extended = "> SELECT _FUNC_(' SparkSQL ');\n ' SparkSQL'") case class StringTrimRight(child: Expression) extends UnaryExpression with String2StringExpression { @@ -396,6 +423,9 @@ case class StringTrimRight(child: Expression) * * NOTE: that this is not zero based, but 1-based index. The first character in str has index 1. */ +@ExpressionDescription( + usage = "_FUNC_(str, substr) - Returns the (1-based) index of the first occurrence of substr in str.", + extended = "> SELECT _FUNC_('SparkSQL', 'SQL');\n 6") case class StringInstr(str: Expression, substr: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -422,6 +452,15 @@ case class StringInstr(str: Expression, substr: Expression) * returned. If count is negative, every to the right of the final delimiter (counting from the * right) is returned. substring_index performs a case-sensitive match when searching for delim. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """_FUNC_(str, delim, count) - Returns the substring from str before count occurrences of the delimiter delim. + If count is positive, everything to the left of the final delimiter (counting from the + left) is returned. If count is negative, everything to the right of the final delimiter + (counting from the right) is returned. Substring_index performs a case-sensitive match + when searching for delim.""", + extended = "> SELECT _FUNC_('www.apache.org', '.', 2);\n 'www.apache'") +// scalastyle:on line.size.limit case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -445,6 +484,12 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: * A function that returns the position of the first occurrence of substr * in given string after position pos. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """_FUNC_(substr, str[, pos]) - Returns the position of the first occurrence of substr in str after position pos. + The given pos and return value are 1-based.""", + extended = "> SELECT _FUNC_('bar', 'foobarbar', 5);\n 7") +// scalastyle:on line.size.limit case class StringLocate(substr: Expression, str: Expression, start: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -510,6 +555,11 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression) /** * Returns str, left-padded with pad to a length of len. */ +@ExpressionDescription( + usage = """_FUNC_(str, len, pad) - Returns str, left-padded with pad to a length of len. + If str is longer than len, the return value is shortened to len characters.""", + extended = "> SELECT _FUNC_('hi', 5, '??');\n '???hi'\n" + + "> SELECT _FUNC_('hi', 1, '??');\n 'h'") case class StringLPad(str: Expression, len: Expression, pad: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -531,6 +581,11 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression) /** * Returns str, right-padded with pad to a length of len. */ +@ExpressionDescription( + usage = """_FUNC_(str, len, pad) - Returns str, right-padded with pad to a length of len. + If str is longer than len, the return value is shortened to len characters.""", + extended = "> SELECT _FUNC_('hi', 5, '??');\n 'hi???'\n" + + "> SELECT _FUNC_('hi', 1, '??');\n 'h'") case class StringRPad(str: Expression, len: Expression, pad: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -552,6 +607,11 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression) /** * Returns the input formatted according do printf-style format strings */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(String format, Obj... args) - Returns a formatted string from printf-style format strings.", + extended = "> SELECT _FUNC_(\"Hello World %d %s\", 100, \"days\");\n 'Hello World 100 days'") +// scalastyle:on line.size.limit case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes { require(children.nonEmpty, "format_string() should take at least 1 argument") @@ -642,6 +702,9 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI /** * Returns the string which repeat the given string value n times. */ +@ExpressionDescription( + usage = "_FUNC_(str, n) - Returns the string which repeat the given string value n times.", + extended = "> SELECT _FUNC_('123', 2);\n '123123'") case class StringRepeat(str: Expression, times: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -664,6 +727,9 @@ case class StringRepeat(str: Expression, times: Expression) /** * Returns the reversed given string. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Returns the reversed given string.", + extended = "> SELECT _FUNC_('Spark SQL');\n 'LQS krapS'") case class StringReverse(child: Expression) extends UnaryExpression with String2StringExpression { override def convert(v: UTF8String): UTF8String = v.reverse() @@ -677,6 +743,9 @@ case class StringReverse(child: Expression) extends UnaryExpression with String2 /** * Returns a n spaces string. */ +@ExpressionDescription( + usage = "_FUNC_(n) - Returns a n spaces string.", + extended = "> SELECT _FUNC_(2);\n ' '") case class StringSpace(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { @@ -699,7 +768,14 @@ case class StringSpace(child: Expression) /** * A function that takes a substring of its first argument starting at a given position. * Defined for String and Binary types. + * + * NOTE: that this is not zero based, but 1-based index. The first character in str has index 1. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len.", + extended = "> SELECT _FUNC_('Spark SQL', 5);\n 'k SQL'\n> SELECT _FUNC_('Spark SQL', -3);\n 'SQL'\n> SELECT _FUNC_('Spark SQL', 5, 1);\n 'k'") +// scalastyle:on line.size.limit case class Substring(str: Expression, pos: Expression, len: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -737,6 +813,9 @@ case class Substring(str: Expression, pos: Expression, len: Expression) /** * A function that return the length of the given string or binary expression. */ +@ExpressionDescription( + usage = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data.", + extended = "> SELECT _FUNC_('Spark SQL');\n 9") case class Length(child: Expression) extends UnaryExpression with ExpectsInputTypes { override def dataType: DataType = IntegerType override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType)) @@ -757,6 +836,9 @@ case class Length(child: Expression) extends UnaryExpression with ExpectsInputTy /** * A function that return the Levenshtein distance between the two given strings. */ +@ExpressionDescription( + usage = "_FUNC_(str1, str2) - Returns the Levenshtein distance between the two given strings.", + extended = "> SELECT _FUNC_('kitten', 'sitting');\n 3") case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -775,6 +857,9 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres /** * A function that return soundex code of the given string expression. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Returns soundex code of the string.", + extended = "> SELECT _FUNC_('Miller');\n 'M460'") case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes { override def dataType: DataType = StringType @@ -791,6 +876,10 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT /** * Returns the numeric value of the first character of str. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Returns the numeric value of the first character of str.", + extended = "> SELECT _FUNC_('222');\n 50\n" + + "> SELECT _FUNC_(2);\n 50") case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def dataType: DataType = IntegerType @@ -822,6 +911,8 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp /** * Converts the argument from binary to a base 64 string. */ +@ExpressionDescription( + usage = "_FUNC_(bin) - Convert the argument from binary to a base 64 string.") case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def dataType: DataType = StringType @@ -844,6 +935,8 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn /** * Converts the argument from a base 64 string to BINARY. */ +@ExpressionDescription( + usage = "_FUNC_(str) - Convert the argument from a base 64 string to binary.") case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def dataType: DataType = BinaryType @@ -865,6 +958,8 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). * If either argument is null, the result will also be null. */ +@ExpressionDescription( + usage = "_FUNC_(bin, str) - Decode the first argument using the second argument character set.") case class Decode(bin: Expression, charset: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -894,7 +989,9 @@ case class Decode(bin: Expression, charset: Expression) * Encodes the first argument into a BINARY using the provided character set * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). * If either argument is null, the result will also be null. -*/ + */ +@ExpressionDescription( + usage = "_FUNC_(str, str) - Encode the first argument using the second argument character set.") case class Encode(value: Expression, charset: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -924,6 +1021,11 @@ case class Encode(value: Expression, charset: Expression) * and returns the result as a string. If D is 0, the result has no decimal point or * fractional part. */ +@ExpressionDescription( + usage = """_FUNC_(X, D) - Formats the number X like '#,###,###.##', rounded to D decimal places. + If D is 0, the result has no decimal point or fractional part. + This is supposed to function like MySQL's FORMAT.""", + extended = "> SELECT _FUNC_(12332.123456, 4);\n '12,332.1235'") case class FormatNumber(x: Expression, d: Expression) extends BinaryExpression with ExpectsInputTypes { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index dd648cdb81..695dda269a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -89,6 +89,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { "Function: abcadf not found.") } + test("SPARK-14415: All functions should have own descriptions") { + for (f <- sqlContext.sessionState.functionRegistry.listFunction()) { + if (!Seq("cube", "grouping", "grouping_id", "rollup", "window").contains(f)) { + checkExistence(sql(s"describe function `$f`"), false, "To be added.") + } + } + } + test("SPARK-6743: no columns from cache") { Seq( (83, 0, 38), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index f3796a9966..b4886eba7a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -238,7 +238,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkExistence(sql("describe functioN `~`"), true, "Function: ~", "Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot", - "Usage: To be added.") + "Usage: ~ b - Bitwise NOT.") // Hard coded describe functions checkExistence(sql("describe function `<>`"), true, -- cgit v1.2.3