aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-08-02 11:36:11 -0700
committerReynold Xin <rxin@databricks.com>2015-08-02 11:36:11 -0700
commit8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01 (patch)
treec7235322a7fb4d1b4d7adb45ba0ace2dbe42b748 /sql
parent244016a95c43ce6db422378e85a9d527bfe59bf1 (diff)
downloadspark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.gz
spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.bz2
spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.zip
[SPARK-9208][SQL] Sort DataFrame functions alphabetically.
Author: Reynold Xin <rxin@databricks.com> Closes #7861 from rxin/api-audit and squashes the following commits: 7200256 [Reynold Xin] [SPARK-9208][SQL] Sort DataFrame functions alphabetically.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala634
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala20
2 files changed, 291 insertions, 363 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 197cd3de61..3595829907 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -70,15 +70,6 @@ object functions {
def column(colName: String): Column = Column(colName)
/**
- * Convert a number in string format from one base to another.
- *
- * @group math_funcs
- * @since 1.5.0
- */
- def conv(num: Column, fromBase: Int, toBase: Int): Column =
- Conv(num.expr, lit(fromBase).expr, lit(toBase).expr)
-
- /**
* Creates a [[Column]] of literal value.
*
* The passed in object is returned directly if it is already a [[Column]].
@@ -132,36 +123,54 @@ object functions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
- * Aggregate function: returns the sum of all values in the expression.
+ * Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def sum(e: Column): Column = Sum(e.expr)
+ def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
/**
- * Aggregate function: returns the sum of all values in the given column.
+ * Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def sum(columnName: String): Column = sum(Column(columnName))
+ def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
/**
- * Aggregate function: returns the sum of distinct values in the expression.
+ * Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def sumDistinct(e: Column): Column = SumDistinct(e.expr)
+ def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
/**
- * Aggregate function: returns the sum of distinct values in the expression.
+ * Aggregate function: returns the approximate number of distinct items in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+ def approxCountDistinct(columnName: String, rsd: Double): Column = {
+ approxCountDistinct(Column(columnName), rsd)
+ }
+
+ /**
+ * Aggregate function: returns the average of the values in a group.
+ *
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ def avg(e: Column): Column = Average(e.expr)
+
+ /**
+ * Aggregate function: returns the average of the values in a group.
+ *
+ * @group agg_funcs
+ * @since 1.3.0
+ */
+ def avg(columnName: String): Column = avg(Column(columnName))
/**
* Aggregate function: returns the number of items in a group.
@@ -204,140 +213,158 @@ object functions {
countDistinct(Column(columnName), columnNames.map(Column.apply) : _*)
/**
- * Aggregate function: returns the approximate number of distinct items in a group.
+ * Aggregate function: returns the first value in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
+ def first(e: Column): Column = First(e.expr)
/**
- * Aggregate function: returns the approximate number of distinct items in a group.
+ * Aggregate function: returns the first value of a column in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
+ def first(columnName: String): Column = first(Column(columnName))
/**
- * Aggregate function: returns the approximate number of distinct items in a group.
+ * Aggregate function: returns the last value in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
+ def last(e: Column): Column = Last(e.expr)
/**
- * Aggregate function: returns the approximate number of distinct items in a group.
+ * Aggregate function: returns the last value of the column in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def approxCountDistinct(columnName: String, rsd: Double): Column = {
- approxCountDistinct(Column(columnName), rsd)
- }
+ def last(columnName: String): Column = last(Column(columnName))
/**
- * Aggregate function: returns the average of the values in a group.
+ * Aggregate function: returns the maximum value of the expression in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def avg(e: Column): Column = Average(e.expr)
+ def max(e: Column): Column = Max(e.expr)
/**
- * Aggregate function: returns the average of the values in a group.
+ * Aggregate function: returns the maximum value of the column in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def avg(columnName: String): Column = avg(Column(columnName))
+ def max(columnName: String): Column = max(Column(columnName))
/**
- * Aggregate function: returns the first value in a group.
+ * Aggregate function: returns the average of the values in a group.
+ * Alias for avg.
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 1.4.0
*/
- def first(e: Column): Column = First(e.expr)
+ def mean(e: Column): Column = avg(e)
/**
- * Aggregate function: returns the first value of a column in a group.
+ * Aggregate function: returns the average of the values in a group.
+ * Alias for avg.
*
* @group agg_funcs
- * @since 1.3.0
+ * @since 1.4.0
*/
- def first(columnName: String): Column = first(Column(columnName))
+ def mean(columnName: String): Column = avg(columnName)
/**
- * Aggregate function: returns the last value in a group.
+ * Aggregate function: returns the minimum value of the expression in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def last(e: Column): Column = Last(e.expr)
+ def min(e: Column): Column = Min(e.expr)
/**
- * Aggregate function: returns the last value of the column in a group.
+ * Aggregate function: returns the minimum value of the column in a group.
*
* @group agg_funcs
* @since 1.3.0
*/
- def last(columnName: String): Column = last(Column(columnName))
+ def min(columnName: String): Column = min(Column(columnName))
/**
- * Aggregate function: returns the average of the values in a group.
- * Alias for avg.
+ * Aggregate function: returns the sum of all values in the expression.
*
* @group agg_funcs
- * @since 1.4.0
+ * @since 1.3.0
*/
- def mean(e: Column): Column = avg(e)
+ def sum(e: Column): Column = Sum(e.expr)
/**
- * Aggregate function: returns the average of the values in a group.
- * Alias for avg.
+ * Aggregate function: returns the sum of all values in the given column.
*
* @group agg_funcs
- * @since 1.4.0
+ * @since 1.3.0
*/
- def mean(columnName: String): Column = avg(columnName)
+ def sum(columnName: String): Column = sum(Column(columnName))
/**
- * Aggregate function: returns the minimum value of the expression in a group.
+ * Aggregate function: returns the sum of distinct values in the expression.
*
* @group agg_funcs
* @since 1.3.0
*/
- def min(e: Column): Column = Min(e.expr)
+ def sumDistinct(e: Column): Column = SumDistinct(e.expr)
/**
- * Aggregate function: returns the minimum value of the column in a group.
+ * Aggregate function: returns the sum of distinct values in the expression.
*
* @group agg_funcs
* @since 1.3.0
*/
- def min(columnName: String): Column = min(Column(columnName))
+ def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ // Window functions
+ //////////////////////////////////////////////////////////////////////////////////////////////
/**
- * Aggregate function: returns the maximum value of the expression in a group.
+ * Window function: returns the cumulative distribution of values within a window partition,
+ * i.e. the fraction of rows that are below the current row.
*
- * @group agg_funcs
- * @since 1.3.0
+ * {{{
+ * N = total number of rows in the partition
+ * cumeDist(x) = number of values before (and including) x / N
+ * }}}
+ *
+ *
+ * This is equivalent to the CUME_DIST function in SQL.
+ *
+ * @group window_funcs
+ * @since 1.4.0
*/
- def max(e: Column): Column = Max(e.expr)
+ def cumeDist(): Column = {
+ UnresolvedWindowFunction("cume_dist", Nil)
+ }
/**
- * Aggregate function: returns the maximum value of the column in a group.
+ * Window function: returns the rank of rows within a window partition, without any gaps.
*
- * @group agg_funcs
- * @since 1.3.0
+ * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
+ * sequence when there are ties. That is, if you were ranking a competition using denseRank
+ * and had three people tie for second place, you would say that all three were in second
+ * place and that the next person came in third.
+ *
+ * This is equivalent to the DENSE_RANK function in SQL.
+ *
+ * @group window_funcs
+ * @since 1.4.0
*/
- def max(columnName: String): Column = max(Column(columnName))
-
- //////////////////////////////////////////////////////////////////////////////////////////////
- // Window functions
- //////////////////////////////////////////////////////////////////////////////////////////////
+ def denseRank(): Column = {
+ UnresolvedWindowFunction("dense_rank", Nil)
+ }
/**
* Window function: returns the value that is `offset` rows before the current row, and
@@ -466,32 +493,20 @@ object functions {
}
/**
- * Window function: returns a sequential number starting at 1 within a window partition.
- *
- * This is equivalent to the ROW_NUMBER function in SQL.
- *
- * @group window_funcs
- * @since 1.4.0
- */
- def rowNumber(): Column = {
- UnresolvedWindowFunction("row_number", Nil)
- }
-
- /**
- * Window function: returns the rank of rows within a window partition, without any gaps.
+ * Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
*
- * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
- * sequence when there are ties. That is, if you were ranking a competition using denseRank
- * and had three people tie for second place, you would say that all three were in second
- * place and that the next person came in third.
+ * This is computed by:
+ * {{{
+ * (rank of row in its partition - 1) / (number of rows in the partition - 1)
+ * }}}
*
- * This is equivalent to the DENSE_RANK function in SQL.
+ * This is equivalent to the PERCENT_RANK function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def denseRank(): Column = {
- UnresolvedWindowFunction("dense_rank", Nil)
+ def percentRank(): Column = {
+ UnresolvedWindowFunction("percent_rank", Nil)
}
/**
@@ -512,39 +527,15 @@ object functions {
}
/**
- * Window function: returns the cumulative distribution of values within a window partition,
- * i.e. the fraction of rows that are below the current row.
- *
- * {{{
- * N = total number of rows in the partition
- * cumeDist(x) = number of values before (and including) x / N
- * }}}
- *
- *
- * This is equivalent to the CUME_DIST function in SQL.
- *
- * @group window_funcs
- * @since 1.4.0
- */
- def cumeDist(): Column = {
- UnresolvedWindowFunction("cume_dist", Nil)
- }
-
- /**
- * Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
- *
- * This is computed by:
- * {{{
- * (rank of row in its partition - 1) / (number of rows in the partition - 1)
- * }}}
+ * Window function: returns a sequential number starting at 1 within a window partition.
*
- * This is equivalent to the PERCENT_RANK function in SQL.
+ * This is equivalent to the ROW_NUMBER function in SQL.
*
* @group window_funcs
* @since 1.4.0
*/
- def percentRank(): Column = {
- UnresolvedWindowFunction("percent_rank", Nil)
+ def rowNumber(): Column = {
+ UnresolvedWindowFunction("row_number", Nil)
}
//////////////////////////////////////////////////////////////////////////////////////////////
@@ -595,10 +586,10 @@ object functions {
}
/**
- * Returns the first column that is not null and not NaN.
- * {{{
- * df.select(coalesce(df("a"), df("b")))
- * }}}
+ * Returns the first column that is not null, or null if all inputs are null.
+ *
+ * For example, `coalesce(a, b, c)` will return a if a is not null,
+ * or b if a is null and b is not null, or c if both a and b are null but c is not null.
*
* @group normal_funcs
* @since 1.3.0
@@ -607,9 +598,11 @@ object functions {
def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
/**
- * Creates a new row for each element in the given array or map column.
+ * Creates a string column for the file name of the current Spark task.
+ *
+ * @group normal_funcs
*/
- def explode(e: Column): Column = Explode(e.expr)
+ def inputFileName(): Column = InputFileName()
/**
* Return true iff the column is NaN.
@@ -637,13 +630,14 @@ object functions {
def monotonicallyIncreasingId(): Column = MonotonicallyIncreasingID()
/**
- * Return an alternative value `r` if `l` is NaN.
- * This function is useful for mapping NaN values to null.
+ * Returns col1 if it is not NaN, or col2 if col1 is NaN.
+ *
+ * Both inputs should be floating point columns (DoubleType or FloatType).
*
* @group normal_funcs
* @since 1.5.0
*/
- def nanvl(l: Column, r: Column): Column = NaNvl(l.expr, r.expr)
+ def nanvl(col1: Column, col2: Column): Column = NaNvl(col1.expr, col2.expr)
/**
* Unary minus, i.e. negate the expression.
@@ -677,31 +671,6 @@ object functions {
def not(e: Column): Column = !e
/**
- * Evaluates a list of conditions and returns one of multiple possible result expressions.
- * If otherwise is not defined at the end, null is returned for unmatched conditions.
- *
- * {{{
- * // Example: encoding gender string column into integer.
- *
- * // Scala:
- * people.select(when(people("gender") === "male", 0)
- * .when(people("gender") === "female", 1)
- * .otherwise(2))
- *
- * // Java:
- * people.select(when(col("gender").equalTo("male"), 0)
- * .when(col("gender").equalTo("female"), 1)
- * .otherwise(2))
- * }}}
- *
- * @group normal_funcs
- * @since 1.4.0
- */
- def when(condition: Column, value: Any): Column = {
- CaseWhen(Seq(condition.expr, lit(value).expr))
- }
-
- /**
* Generate a random column with i.i.d. samples from U[0.0, 1.0].
*
* @group normal_funcs
@@ -744,15 +713,6 @@ object functions {
def sparkPartitionId(): Column = SparkPartitionID()
/**
- * The file name of the current Spark task
- *
- * Note that this is indeterministic becuase it depends on what is currently being read in.
- *
- * @group normal_funcs
- */
- def inputFileName(): Column = InputFileName()
-
- /**
* Computes the square root of the specified float value.
*
* @group math_funcs
@@ -794,6 +754,31 @@ object functions {
}
/**
+ * Evaluates a list of conditions and returns one of multiple possible result expressions.
+ * If otherwise is not defined at the end, null is returned for unmatched conditions.
+ *
+ * {{{
+ * // Example: encoding gender string column into integer.
+ *
+ * // Scala:
+ * people.select(when(people("gender") === "male", 0)
+ * .when(people("gender") === "female", 1)
+ * .otherwise(2))
+ *
+ * // Java:
+ * people.select(when(col("gender").equalTo("male"), 0)
+ * .when(col("gender").equalTo("female"), 1)
+ * .otherwise(2))
+ * }}}
+ *
+ * @group normal_funcs
+ * @since 1.4.0
+ */
+ def when(condition: Column, value: Any): Column = {
+ CaseWhen(Seq(condition.expr, lit(value).expr))
+ }
+
+ /**
* Computes bitwise NOT.
*
* @group normal_funcs
@@ -993,6 +978,15 @@ object functions {
def ceil(columnName: String): Column = ceil(Column(columnName))
/**
+ * Convert a number in a string column from one base to another.
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def conv(num: Column, fromBase: Int, toBase: Int): Column =
+ Conv(num.expr, lit(fromBase).expr, lit(toBase).expr)
+
+ /**
* Computes the cosine of the given value.
*
* @group math_funcs
@@ -1025,22 +1019,6 @@ object functions {
def cosh(columnName: String): Column = cosh(Column(columnName))
/**
- * Returns the current date.
- *
- * @group datetime_funcs
- * @since 1.5.0
- */
- def current_date(): Column = CurrentDate()
-
- /**
- * Returns the current timestamp.
- *
- * @group datetime_funcs
- * @since 1.5.0
- */
- def current_timestamp(): Column = CurrentTimestamp()
-
- /**
* Computes the exponential of the given value.
*
* @group math_funcs
@@ -1671,109 +1649,75 @@ object functions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
- * Concatenates input strings together into a single string.
- *
- * @group string_funcs
- * @since 1.5.0
- */
- @scala.annotation.varargs
- def concat(exprs: Column*): Column = Concat(exprs.map(_.expr))
-
- /**
- * Concatenates input strings together into a single string, using the given separator.
+ * Computes the numeric value of the first character of the string column, and returns the
+ * result as a int column.
*
* @group string_funcs
* @since 1.5.0
*/
- @scala.annotation.varargs
- def concat_ws(sep: String, exprs: Column*): Column = {
- ConcatWs(Literal.create(sep, StringType) +: exprs.map(_.expr))
- }
+ def ascii(e: Column): Column = Ascii(e.expr)
/**
- * Computes the length of a given string / binary value.
+ * Computes the BASE64 encoding of a binary column and returns it as a string column.
+ * This is the reverse of unbase64.
*
* @group string_funcs
* @since 1.5.0
*/
- def length(e: Column): Column = Length(e.expr)
-
- /**
- * Converts a string expression to lower case.
- *
- * @group string_funcs
- * @since 1.3.0
- */
- def lower(e: Column): Column = Lower(e.expr)
-
- /**
- * Converts a string expression to upper case.
- *
- * @group string_funcs
- * @since 1.3.0
- */
- def upper(e: Column): Column = Upper(e.expr)
+ def base64(e: Column): Column = Base64(e.expr)
/**
- * Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
- * and returns the result as a string.
- * If d is 0, the result has no decimal point or fractional part.
- * If d < 0, the result will be null.
+ * Concatenates multiple input string columns together into a single string column.
*
* @group string_funcs
* @since 1.5.0
*/
- def format_number(x: Column, d: Int): Column = FormatNumber(x.expr, lit(d).expr)
+ @scala.annotation.varargs
+ def concat(exprs: Column*): Column = Concat(exprs.map(_.expr))
/**
- * Substring starts at `pos` and is of length `len` when str is String type or
- * returns the slice of byte array that starts at `pos` in byte and is of length `len`
- * when str is Binary type
+ * Concatenates multiple input string columns together into a single string column,
+ * using the given separator.
*
* @group string_funcs
* @since 1.5.0
*/
- def substring(str: Column, pos: Int, len: Int): Column =
- Substring(str.expr, lit(pos).expr, lit(len).expr)
-
- /**
- * Computes the Levenshtein distance of the two given string columns.
- * @group string_funcs
- * @since 1.5.0
- */
- def levenshtein(l: Column, r: Column): Column = Levenshtein(l.expr, r.expr)
+ @scala.annotation.varargs
+ def concat_ws(sep: String, exprs: Column*): Column = {
+ ConcatWs(Literal.create(sep, StringType) +: exprs.map(_.expr))
+ }
/**
- * Computes the numeric value of the first character of the specified string column.
+ * Computes the first argument into a string from a binary using the provided character set
+ * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ * If either argument is null, the result will also be null.
*
* @group string_funcs
* @since 1.5.0
*/
- def ascii(e: Column): Column = Ascii(e.expr)
+ def decode(value: Column, charset: String): Column = Decode(value.expr, lit(charset).expr)
/**
- * Trim the spaces from both ends for the specified string column.
+ * Computes the first argument into a binary from a string using the provided character set
+ * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ * If either argument is null, the result will also be null.
*
* @group string_funcs
* @since 1.5.0
*/
- def trim(e: Column): Column = StringTrim(e.expr)
+ def encode(value: Column, charset: String): Column = Encode(value.expr, lit(charset).expr)
/**
- * Trim the spaces from left end for the specified string value.
+ * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places,
+ * and returns the result as a string column.
*
- * @group string_funcs
- * @since 1.5.0
- */
- def ltrim(e: Column): Column = StringTrimLeft(e.expr)
-
- /**
- * Trim the spaces from right end for the specified string value.
+ * If d is 0, the result has no decimal point or fractional part.
+ * If d < 0, the result will be null.
*
* @group string_funcs
* @since 1.5.0
*/
- def rtrim(e: Column): Column = StringTrimRight(e.expr)
+ def format_number(x: Column, d: Int): Column = FormatNumber(x.expr, lit(d).expr)
/**
* Formats the arguments in printf-style and returns the result as a string column.
@@ -1787,9 +1731,11 @@ object functions {
}
/**
- * Returns string, with the first letter of each word in uppercase.
+ * Returns a new string column by converting the first letter of each word to uppercase.
* Words are delimited by whitespace.
*
+ * For example, "hello world" will become "Hello World".
+ *
* @group string_funcs
* @since 1.5.0
*/
@@ -1808,15 +1754,27 @@ object functions {
def instr(str: Column, substring: String): Column = StringInstr(str.expr, lit(substring).expr)
/**
- * Returns the substring from string str before count occurrences of the delimiter delim.
- * If count is positive, everything the left of the final delimiter (counting from left) is
- * returned. If count is negative, every to the right of the final delimiter (counting from the
- * right) is returned. substring_index performs a case-sensitive match when searching for delim.
+ * Computes the length of a given string or binary column.
*
* @group string_funcs
+ * @since 1.5.0
*/
- def substring_index(str: Column, delim: String, count: Int): Column =
- SubstringIndex(str.expr, lit(delim).expr, lit(count).expr)
+ def length(e: Column): Column = Length(e.expr)
+
+ /**
+ * Converts a string column to lower case.
+ *
+ * @group string_funcs
+ * @since 1.3.0
+ */
+ def lower(e: Column): Column = Lower(e.expr)
+
+ /**
+ * Computes the Levenshtein distance of the two given string columns.
+ * @group string_funcs
+ * @since 1.5.0
+ */
+ def levenshtein(l: Column, r: Column): Column = Levenshtein(l.expr, r.expr)
/**
* Locate the position of the first occurrence of substr.
@@ -1831,6 +1789,14 @@ object functions {
}
/**
+ * Trim the spaces from left end for the specified string value.
+ *
+ * @group string_funcs
+ * @since 1.5.0
+ */
+ def ltrim(e: Column): Column = StringTrimLeft(e.expr)
+
+ /**
* Locate the position of the first occurrence of substr in a string column, after position pos.
*
* NOTE: The position is not zero based, but 1 based index. returns 0 if substr
@@ -1843,6 +1809,15 @@ object functions {
StringLocate(lit(substr).expr, str.expr, lit(pos).expr)
}
+ /**
+ * Left-pad the string column with
+ *
+ * @group string_funcs
+ * @since 1.5.0
+ */
+ def lpad(str: Column, len: Int, pad: String): Column = {
+ StringLPad(str.expr, lit(len).expr, lit(pad).expr)
+ }
/**
* Extract a specific(idx) group identified by a java regex, from the specified string column.
@@ -1865,101 +1840,108 @@ object functions {
}
/**
- * Computes the BASE64 encoding of a binary column and returns it as a string column.
- * This is the reverse of unbase64.
+ * Decodes a BASE64 encoded string column and returns it as a binary column.
+ * This is the reverse of base64.
*
* @group string_funcs
* @since 1.5.0
*/
- def base64(e: Column): Column = Base64(e.expr)
+ def unbase64(e: Column): Column = UnBase64(e.expr)
/**
- * Decodes a BASE64 encoded string column and returns it as a binary column.
- * This is the reverse of base64.
+ * Right-padded with pad to a length of len.
*
* @group string_funcs
* @since 1.5.0
*/
- def unbase64(e: Column): Column = UnBase64(e.expr)
+ def rpad(str: Column, len: Int, pad: String): Column = {
+ StringRPad(str.expr, lit(len).expr, lit(pad).expr)
+ }
/**
- * Left-padded with pad to a length of len.
+ * Repeats a string column n times, and returns it as a new string column.
*
* @group string_funcs
* @since 1.5.0
*/
- def lpad(str: Column, len: Int, pad: String): Column = {
- StringLPad(str.expr, lit(len).expr, lit(pad).expr)
+ def repeat(str: Column, n: Int): Column = {
+ StringRepeat(str.expr, lit(n).expr)
}
/**
- * Computes the first argument into a binary from a string using the provided character set
- * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
- * If either argument is null, the result will also be null.
+ * Reverses the string column and returns it as a new string column.
*
* @group string_funcs
* @since 1.5.0
*/
- def encode(value: Column, charset: String): Column = Encode(value.expr, lit(charset).expr)
+ def reverse(str: Column): Column = {
+ StringReverse(str.expr)
+ }
/**
- * Computes the first argument into a string from a binary using the provided character set
- * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
- * If either argument is null, the result will also be null.
+ * Trim the spaces from right end for the specified string value.
*
* @group string_funcs
* @since 1.5.0
*/
- def decode(value: Column, charset: String): Column = Decode(value.expr, lit(charset).expr)
+ def rtrim(e: Column): Column = StringTrimRight(e.expr)
/**
- * Right-padded with pad to a length of len.
+ * * Return the soundex code for the specified expression.
*
* @group string_funcs
* @since 1.5.0
*/
- def rpad(str: Column, len: Int, pad: String): Column = {
- StringRPad(str.expr, lit(len).expr, lit(pad).expr)
- }
+ def soundex(e: Column): Column = SoundEx(e.expr)
/**
- * Repeats a string column n times, and returns it as a new string column.
+ * Splits str around pattern (pattern is a regular expression).
+ * NOTE: pattern is a string represent the regular expression.
*
* @group string_funcs
* @since 1.5.0
*/
- def repeat(str: Column, n: Int): Column = {
- StringRepeat(str.expr, lit(n).expr)
+ def split(str: Column, pattern: String): Column = {
+ StringSplit(str.expr, lit(pattern).expr)
}
/**
- * * Return the soundex code for the specified expression.
+ * Substring starts at `pos` and is of length `len` when str is String type or
+ * returns the slice of byte array that starts at `pos` in byte and is of length `len`
+ * when str is Binary type
*
* @group string_funcs
* @since 1.5.0
*/
- def soundex(e: Column): Column = SoundEx(e.expr)
+ def substring(str: Column, pos: Int, len: Int): Column =
+ Substring(str.expr, lit(pos).expr, lit(len).expr)
/**
- * Splits str around pattern (pattern is a regular expression).
- * NOTE: pattern is a string represent the regular expression.
+ * Returns the substring from string str before count occurrences of the delimiter delim.
+ * If count is positive, everything the left of the final delimiter (counting from left) is
+ * returned. If count is negative, every to the right of the final delimiter (counting from the
+ * right) is returned. substring_index performs a case-sensitive match when searching for delim.
*
* @group string_funcs
- * @since 1.5.0
*/
- def split(str: Column, pattern: String): Column = {
- StringSplit(str.expr, lit(pattern).expr)
- }
+ def substring_index(str: Column, delim: String, count: Int): Column =
+ SubstringIndex(str.expr, lit(delim).expr, lit(count).expr)
/**
- * Reversed the string for the specified value.
+ * Trim the spaces from both ends for the specified string column.
*
* @group string_funcs
* @since 1.5.0
*/
- def reverse(str: Column): Column = {
- StringReverse(str.expr)
- }
+ def trim(e: Column): Column = StringTrim(e.expr)
+
+ /**
+ * Converts a string column to upper case.
+ *
+ * @group string_funcs
+ * @since 1.3.0
+ */
+ def upper(e: Column): Column = Upper(e.expr)
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
@@ -1967,6 +1949,7 @@ object functions {
/**
* Returns the date that is numMonths after startDate.
+ *
* @group datetime_funcs
* @since 1.5.0
*/
@@ -1974,20 +1957,20 @@ object functions {
AddMonths(startDate.expr, Literal(numMonths))
/**
- * Converts a date/timestamp/string to a value of string in the format specified by the date
- * format given by the second argument.
+ * Returns the current date as a date column.
*
- * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
- * pattern letters of [[java.text.SimpleDateFormat]] can be used.
- *
- * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a
- * specialized implementation.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def current_date(): Column = CurrentDate()
+
+ /**
+ * Returns the current timestamp as a timestamp column.
*
* @group datetime_funcs
* @since 1.5.0
*/
- def date_format(dateExpr: Column, format: String): Column =
- DateFormatClass(dateExpr.expr, Literal(format))
+ def current_timestamp(): Column = CurrentTimestamp()
/**
* Converts a date/timestamp/string to a value of string in the format specified by the date
@@ -2002,8 +1985,8 @@ object functions {
* @group datetime_funcs
* @since 1.5.0
*/
- def date_format(dateColumnName: String, format: String): Column =
- date_format(Column(dateColumnName), format)
+ def date_format(dateExpr: Column, format: String): Column =
+ DateFormatClass(dateExpr.expr, Literal(format))
/**
* Returns the date that is `days` days after `start`
@@ -2034,13 +2017,6 @@ object functions {
def year(e: Column): Column = Year(e.expr)
/**
- * Extracts the year as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def year(columnName: String): Column = year(Column(columnName))
-
- /**
* Extracts the quarter as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2048,13 +2024,6 @@ object functions {
def quarter(e: Column): Column = Quarter(e.expr)
/**
- * Extracts the quarter as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def quarter(columnName: String): Column = quarter(Column(columnName))
-
- /**
* Extracts the month as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2062,13 +2031,6 @@ object functions {
def month(e: Column): Column = Month(e.expr)
/**
- * Extracts the month as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def month(columnName: String): Column = month(Column(columnName))
-
- /**
* Extracts the day of the month as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2076,13 +2038,6 @@ object functions {
def dayofmonth(e: Column): Column = DayOfMonth(e.expr)
/**
- * Extracts the day of the month as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def dayofmonth(columnName: String): Column = dayofmonth(Column(columnName))
-
- /**
* Extracts the day of the year as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2090,13 +2045,6 @@ object functions {
def dayofyear(e: Column): Column = DayOfYear(e.expr)
/**
- * Extracts the day of the year as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def dayofyear(columnName: String): Column = dayofyear(Column(columnName))
-
- /**
* Extracts the hours as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2104,13 +2052,6 @@ object functions {
def hour(e: Column): Column = Hour(e.expr)
/**
- * Extracts the hours as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def hour(columnName: String): Column = hour(Column(columnName))
-
- /**
* Given a date column, returns the last day of the month which the given date belongs to.
* For example, input "2015-07-27" returns "2015-07-31" since July 31 is the last day of the
* month in July 2015.
@@ -2127,13 +2068,6 @@ object functions {
*/
def minute(e: Column): Column = Minute(e.expr)
- /**
- * Extracts the minutes as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def minute(columnName: String): Column = minute(Column(columnName))
-
/*
* Returns number of months between dates `date1` and `date2`.
* @group datetime_funcs
@@ -2164,13 +2098,6 @@ object functions {
def second(e: Column): Column = Second(e.expr)
/**
- * Extracts the seconds as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def second(columnName: String): Column = second(Column(columnName))
-
- /**
* Extracts the week number as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2178,13 +2105,6 @@ object functions {
def weekofyear(e: Column): Column = WeekOfYear(e.expr)
/**
- * Extracts the week number as an integer from a given date/timestamp/string.
- * @group datetime_funcs
- * @since 1.5.0
- */
- def weekofyear(columnName: String): Column = weekofyear(Column(columnName))
-
- /**
* Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
* representing the timestamp of that moment in the current system time zone in the given
* format.
@@ -2226,7 +2146,7 @@ object functions {
*/
def unix_timestamp(s: Column, p: String): Column = UnixTimestamp(s.expr, Literal(p))
- /*
+ /**
* Converts the column into DateType.
*
* @group datetime_funcs
@@ -2265,6 +2185,14 @@ object functions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
+ * Creates a new row for each element in the given array or map column.
+ *
+ * @group collection_funcs
+ * @since 1.3.0
+ */
+ def explode(e: Column): Column = Explode(e.expr)
+
+ /**
* Returns length of array or map.
*
* @group collection_funcs
@@ -2279,7 +2207,7 @@ object functions {
* @group collection_funcs
* @since 1.5.0
*/
- def sort_array(e: Column): Column = sort_array(e, true)
+ def sort_array(e: Column): Column = sort_array(e, asc = true)
/**
* Sorts the input array for the given column in ascending / descending order,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 0850f5cf77..17897caf95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -89,7 +89,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(date_format("a", "y"), date_format("b", "y"), date_format("c", "y")),
+ df.select(date_format($"a", "y"), date_format($"b", "y"), date_format($"c", "y")),
Row("2015", "2015", "2013"))
checkAnswer(
@@ -101,7 +101,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(year("a"), year("b"), year("c")),
+ df.select(year($"a"), year($"b"), year($"c")),
Row(2015, 2015, 2013))
checkAnswer(
@@ -115,7 +115,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(quarter("a"), quarter("b"), quarter("c")),
+ df.select(quarter($"a"), quarter($"b"), quarter($"c")),
Row(2, 2, 4))
checkAnswer(
@@ -127,7 +127,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(month("a"), month("b"), month("c")),
+ df.select(month($"a"), month($"b"), month($"c")),
Row(4, 4, 4))
checkAnswer(
@@ -139,7 +139,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(dayofmonth("a"), dayofmonth("b"), dayofmonth("c")),
+ df.select(dayofmonth($"a"), dayofmonth($"b"), dayofmonth($"c")),
Row(8, 8, 8))
checkAnswer(
@@ -151,7 +151,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(dayofyear("a"), dayofyear("b"), dayofyear("c")),
+ df.select(dayofyear($"a"), dayofyear($"b"), dayofyear($"c")),
Row(98, 98, 98))
checkAnswer(
@@ -163,7 +163,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(hour("a"), hour("b"), hour("c")),
+ df.select(hour($"a"), hour($"b"), hour($"c")),
Row(0, 13, 13))
checkAnswer(
@@ -175,7 +175,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(minute("a"), minute("b"), minute("c")),
+ df.select(minute($"a"), minute($"b"), minute($"c")),
Row(0, 10, 10))
checkAnswer(
@@ -187,7 +187,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(second("a"), second("b"), second("c")),
+ df.select(second($"a"), second($"b"), second($"c")),
Row(0, 15, 15))
checkAnswer(
@@ -199,7 +199,7 @@ class DateFunctionsSuite extends QueryTest {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
checkAnswer(
- df.select(weekofyear("a"), weekofyear("b"), weekofyear("c")),
+ df.select(weekofyear($"a"), weekofyear($"b"), weekofyear($"c")),
Row(15, 15, 15))
checkAnswer(