[SPARK-9208][SQL] Sort DataFrame functions alphabetically.

Author: Reynold Xin <rxin@databricks.com> Closes #7861 from rxin/api-audit and squashes the following commits: 7200256 [Reynold Xin] [SPARK-9208][SQL] Sort DataFrame functions alphabetically.
author: Reynold Xin <rxin@databricks.com> 2015-08-02 11:36:11 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-08-02 11:36:11 -0700
commit: 8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01 (patch)
tree: c7235322a7fb4d1b4d7adb45ba0ace2dbe42b748
parent: 244016a95c43ce6db422378e85a9d527bfe59bf1 (diff)
download: spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.gz
spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.bz2
spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.zip
2 files changed, 291 insertions, 363 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 197cd3de61..3595829907 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -70,15 +70,6 @@ object functions {
   def column(colName: String): Column = Column(colName)
 
   /**
-   * Convert a number in string format from one base to another.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def conv(num: Column, fromBase: Int, toBase: Int): Column =
-    Conv(num.expr, lit(fromBase).expr, lit(toBase).expr)
-
-  /**
    * Creates a [[Column]] of literal value.
    *
    * The passed in object is returned directly if it is already a [[Column]].
@@ -132,36 +123,54 @@ object functions {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Aggregate function: returns the sum of all values in the expression.
+   * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def sum(e: Column): Column = Sum(e.expr)
+  def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
 
   /**
-   * Aggregate function: returns the sum of all values in the given column.
+   * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def sum(columnName: String): Column = sum(Column(columnName))
+  def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
 
   /**
-   * Aggregate function: returns the sum of distinct values in the expression.
+   * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def sumDistinct(e: Column): Column = SumDistinct(e.expr)
+  def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
 
   /**
-   * Aggregate function: returns the sum of distinct values in the expression.
+   * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+  def approxCountDistinct(columnName: String, rsd: Double): Column = {
+    approxCountDistinct(Column(columnName), rsd)
+  }
+
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  def avg(e: Column): Column = Average(e.expr)
+
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 1.3.0
+   */
+  def avg(columnName: String): Column = avg(Column(columnName))
 
   /**
    * Aggregate function: returns the number of items in a group.
@@ -204,140 +213,158 @@ object functions {
     countDistinct(Column(columnName), columnNames.map(Column.apply) : _*)
 
   /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
+   * Aggregate function: returns the first value in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
+  def first(e: Column): Column = First(e.expr)
 
   /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
+   * Aggregate function: returns the first value of a column in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
+  def first(columnName: String): Column = first(Column(columnName))
 
   /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
+   * Aggregate function: returns the last value in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
+  def last(e: Column): Column = Last(e.expr)
 
   /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
+   * Aggregate function: returns the last value of the column in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def approxCountDistinct(columnName: String, rsd: Double): Column = {
-    approxCountDistinct(Column(columnName), rsd)
-  }
+  def last(columnName: String): Column = last(Column(columnName))
 
   /**
-   * Aggregate function: returns the average of the values in a group.
+   * Aggregate function: returns the maximum value of the expression in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def avg(e: Column): Column = Average(e.expr)
+  def max(e: Column): Column = Max(e.expr)
 
   /**
-   * Aggregate function: returns the average of the values in a group.
+   * Aggregate function: returns the maximum value of the column in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def avg(columnName: String): Column = avg(Column(columnName))
+  def max(columnName: String): Column = max(Column(columnName))
 
   /**
-   * Aggregate function: returns the first value in a group.
+   * Aggregate function: returns the average of the values in a group.
+   * Alias for avg.
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 1.4.0
    */
-  def first(e: Column): Column = First(e.expr)
+  def mean(e: Column): Column = avg(e)
 
   /**
-   * Aggregate function: returns the first value of a column in a group.
+   * Aggregate function: returns the average of the values in a group.
+   * Alias for avg.
    *
    * @group agg_funcs
-   * @since 1.3.0
+   * @since 1.4.0
    */
-  def first(columnName: String): Column = first(Column(columnName))
+  def mean(columnName: String): Column = avg(columnName)
 
   /**
-   * Aggregate function: returns the last value in a group.
+   * Aggregate function: returns the minimum value of the expression in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def last(e: Column): Column = Last(e.expr)
+  def min(e: Column): Column = Min(e.expr)
 
   /**
-   * Aggregate function: returns the last value of the column in a group.
+   * Aggregate function: returns the minimum value of the column in a group.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def last(columnName: String): Column = last(Column(columnName))
+  def min(columnName: String): Column = min(Column(columnName))
 
   /**
-   * Aggregate function: returns the average of the values in a group.
-   * Alias for avg.
+   * Aggregate function: returns the sum of all values in the expression.
    *
    * @group agg_funcs
-   * @since 1.4.0
+   * @since 1.3.0
    */
-  def mean(e: Column): Column = avg(e)
+  def sum(e: Column): Column = Sum(e.expr)
 
   /**
-   * Aggregate function: returns the average of the values in a group.
-   * Alias for avg.
+   * Aggregate function: returns the sum of all values in the given column.
    *
    * @group agg_funcs
-   * @since 1.4.0
+   * @since 1.3.0
    */
-  def mean(columnName: String): Column = avg(columnName)
+  def sum(columnName: String): Column = sum(Column(columnName))
 
   /**
-   * Aggregate function: returns the minimum value of the expression in a group.
+   * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def min(e: Column): Column = Min(e.expr)
+  def sumDistinct(e: Column): Column = SumDistinct(e.expr)
 
   /**
-   * Aggregate function: returns the minimum value of the column in a group.
+   * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
    * @since 1.3.0
    */
-  def min(columnName: String): Column = min(Column(columnName))
+  def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Window functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Aggregate function: returns the maximum value of the expression in a group.
+   * Window function: returns the cumulative distribution of values within a window partition,
+   * i.e. the fraction of rows that are below the current row.
    *
-   * @group agg_funcs
-   * @since 1.3.0
+   * {{{
+   *   N = total number of rows in the partition
+   *   cumeDist(x) = number of values before (and including) x / N
+   * }}}
+   *
+   *
+   * This is equivalent to the CUME_DIST function in SQL.
+   *
+   * @group window_funcs
+   * @since 1.4.0
    */
-  def max(e: Column): Column = Max(e.expr)
+  def cumeDist(): Column = {
+    UnresolvedWindowFunction("cume_dist", Nil)
+  }
 
   /**
-   * Aggregate function: returns the maximum value of the column in a group.
+   * Window function: returns the rank of rows within a window partition, without any gaps.
    *
-   * @group agg_funcs
-   * @since 1.3.0
+   * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
+   * sequence when there are ties. That is, if you were ranking a competition using denseRank
+   * and had three people tie for second place, you would say that all three were in second
+   * place and that the next person came in third.
+   *
+   * This is equivalent to the DENSE_RANK function in SQL.
+   *
+   * @group window_funcs
+   * @since 1.4.0
    */
-  def max(columnName: String): Column = max(Column(columnName))
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Window functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
+  def denseRank(): Column = {
+    UnresolvedWindowFunction("dense_rank", Nil)
+  }
 
   /**
    * Window function: returns the value that is `offset` rows before the current row, and
@@ -466,32 +493,20 @@ object functions {
   }
 
   /**
-   * Window function: returns a sequential number starting at 1 within a window partition.
-   *
-   * This is equivalent to the ROW_NUMBER function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def rowNumber(): Column = {
-    UnresolvedWindowFunction("row_number", Nil)
-  }
-
-  /**
-   * Window function: returns the rank of rows within a window partition, without any gaps.
+   * Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
    *
-   * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-   * sequence when there are ties. That is, if you were ranking a competition using denseRank
-   * and had three people tie for second place, you would say that all three were in second
-   * place and that the next person came in third.
+   * This is computed by:
+   * {{{
+   *   (rank of row in its partition - 1) / (number of rows in the partition - 1)
+   * }}}
    *
-   * This is equivalent to the DENSE_RANK function in SQL.
+   * This is equivalent to the PERCENT_RANK function in SQL.
    *
    * @group window_funcs
    * @since 1.4.0
    */
-  def denseRank(): Column = {
-    UnresolvedWindowFunction("dense_rank", Nil)
+  def percentRank(): Column = {
+    UnresolvedWindowFunction("percent_rank", Nil)
   }
 
   /**
@@ -512,39 +527,15 @@ object functions {
   }
 
   /**
-   * Window function: returns the cumulative distribution of values within a window partition,
-   * i.e. the fraction of rows that are below the current row.
-   *
-   * {{{
-   *   N = total number of rows in the partition
-   *   cumeDist(x) = number of values before (and including) x / N
-   * }}}
-   *
-   *
-   * This is equivalent to the CUME_DIST function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def cumeDist(): Column = {
-    UnresolvedWindowFunction("cume_dist", Nil)
-  }
-
-  /**
-   * Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
-   *
-   * This is computed by:
-   * {{{
-   *   (rank of row in its partition - 1) / (number of rows in the partition - 1)
-   * }}}
+   * Window function: returns a sequential number starting at 1 within a window partition.
    *
-   * This is equivalent to the PERCENT_RANK function in SQL.
+   * This is equivalent to the ROW_NUMBER function in SQL.
    *
    * @group window_funcs
    * @since 1.4.0
    */
-  def percentRank(): Column = {
-    UnresolvedWindowFunction("percent_rank", Nil)
+  def rowNumber(): Column = {
+    UnresolvedWindowFunction("row_number", Nil)
   }
 
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -595,10 +586,10 @@ object functions {
   }
 
   /**
-   * Returns the first column that is not null and not NaN.
-   * {{{
-   *   df.select(coalesce(df("a"), df("b")))
-   * }}}
+   * Returns the first column that is not null, or null if all inputs are null.
+   *
+   * For example, `coalesce(a, b, c)` will return a if a is not null,
+   * or b if a is null and b is not null, or c if both a and b are null but c is not null.
    *
    * @group normal_funcs
    * @since 1.3.0
@@ -607,9 +598,11 @@ object functions {
   def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
 
   /**
-   * Creates a new row for each element in the given array or map column.
+   * Creates a string column for the file name of the current Spark task.
+   *
+   * @group normal_funcs
    */
-  def explode(e: Column): Column = Explode(e.expr)
+  def inputFileName(): Column = InputFileName()
 
   /**
    * Return true iff the column is NaN.
@@ -637,13 +630,14 @@ object functions {
   def monotonicallyIncreasingId(): Column = MonotonicallyIncreasingID()
 
   /**
-   * Return an alternative value `r` if `l` is NaN.
-   * This function is useful for mapping NaN values to null.
+   * Returns col1 if it is not NaN, or col2 if col1 is NaN.
+   *
+   * Both inputs should be floating point columns (DoubleType or FloatType).
    *
    * @group normal_funcs
    * @since 1.5.0
    */
-  def nanvl(l: Column, r: Column): Column = NaNvl(l.expr, r.expr)
+  def nanvl(col1: Column, col2: Column): Column = NaNvl(col1.expr, col2.expr)
 
   /**
    * Unary minus, i.e. negate the expression.
@@ -677,31 +671,6 @@ object functions {
   def not(e: Column): Column = !e
 
   /**
-   * Evaluates a list of conditions and returns one of multiple possible result expressions.
-   * If otherwise is not defined at the end, null is returned for unmatched conditions.
-   *
-   * {{{
-   *   // Example: encoding gender string column into integer.
-   *
-   *   // Scala:
-   *   people.select(when(people("gender") === "male", 0)
-   *     .when(people("gender") === "female", 1)
-   *     .otherwise(2))
-   *
-   *   // Java:
-   *   people.select(when(col("gender").equalTo("male"), 0)
-   *     .when(col("gender").equalTo("female"), 1)
-   *     .otherwise(2))
-   * }}}
-   *
-   * @group normal_funcs
-   * @since 1.4.0
-   */
-  def when(condition: Column, value: Any): Column = {
-    CaseWhen(Seq(condition.expr, lit(value).expr))
-  }
-
-  /**
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
    * @group normal_funcs
@@ -744,15 +713,6 @@ object functions {
   def sparkPartitionId(): Column = SparkPartitionID()
 
   /**
-   * The file name of the current Spark task
-   *
-   * Note that this is indeterministic becuase it depends on what is currently being read in.
-   *
-   * @group normal_funcs
-   */
-  def inputFileName(): Column = InputFileName()
-
-  /**
    * Computes the square root of the specified float value.
    *
    * @group math_funcs
@@ -794,6 +754,31 @@ object functions {
   }
 
   /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 1.4.0
+   */
+  def when(condition: Column, value: Any): Column = {
+    CaseWhen(Seq(condition.expr, lit(value).expr))
+  }
+
+  /**
    * Computes bitwise NOT.
    *
    * @group normal_funcs
@@ -993,6 +978,15 @@ object functions {
   def ceil(columnName: String): Column = ceil(Column(columnName))
 
   /**
+   * Convert a number in a string column from one base to another.
+   *
+   * @group math_funcs
+   * @since 1.5.0
+   */
+  def conv(num: Column, fromBase: Int, toBase: Int): Column =
+    Conv(num.expr, lit(fromBase).expr, lit(toBase).expr)
+
+  /**
    * Computes the cosine of the given value.
    *
    * @group math_funcs
@@ -1025,22 +1019,6 @@ object functions {
   def cosh(columnName: String): Column = cosh(Column(columnName))
 
   /**
-   * Returns the current date.
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def current_date(): Column = CurrentDate()
-
-  /**
-   * Returns the current timestamp.
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def current_timestamp(): Column = CurrentTimestamp()
-
-  /**
    * Computes the exponential of the given value.
    *
    * @group math_funcs
@@ -1671,109 +1649,75 @@ object functions {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Concatenates input strings together into a single string.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def concat(exprs: Column*): Column = Concat(exprs.map(_.expr))
-
-  /**
-   * Concatenates input strings together into a single string, using the given separator.
+   * Computes the numeric value of the first character of the string column, and returns the
+   * result as a int column.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  @scala.annotation.varargs
-  def concat_ws(sep: String, exprs: Column*): Column = {
-    ConcatWs(Literal.create(sep, StringType) +: exprs.map(_.expr))
-  }
+  def ascii(e: Column): Column = Ascii(e.expr)
 
   /**
-   * Computes the length of a given string / binary value.
+   * Computes the BASE64 encoding of a binary column and returns it as a string column.
+   * This is the reverse of unbase64.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def length(e: Column): Column = Length(e.expr)
-
-  /**
-   * Converts a string expression to lower case.
-   *
-   * @group string_funcs
-   * @since 1.3.0
-   */
-  def lower(e: Column): Column = Lower(e.expr)
-
-  /**
-   * Converts a string expression to upper case.
-   *
-   * @group string_funcs
-   * @since 1.3.0
-   */
-  def upper(e: Column): Column = Upper(e.expr)
+  def base64(e: Column): Column = Base64(e.expr)
 
   /**
-   * Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
-   * and returns the result as a string.
-   * If d is 0, the result has no decimal point or fractional part.
-   * If d < 0, the result will be null.
+   * Concatenates multiple input string columns together into a single string column.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def format_number(x: Column, d: Int): Column = FormatNumber(x.expr, lit(d).expr)
+  @scala.annotation.varargs
+  def concat(exprs: Column*): Column = Concat(exprs.map(_.expr))
 
   /**
-   * Substring starts at `pos` and is of length `len` when str is String type or
-   * returns the slice of byte array that starts at `pos` in byte and is of length `len`
-   * when str is Binary type
+   * Concatenates multiple input string columns together into a single string column,
+   * using the given separator.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def substring(str: Column, pos: Int, len: Int): Column =
-    Substring(str.expr, lit(pos).expr, lit(len).expr)
-
-  /**
-   * Computes the Levenshtein distance of the two given string columns.
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def levenshtein(l: Column, r: Column): Column = Levenshtein(l.expr, r.expr)
+  @scala.annotation.varargs
+  def concat_ws(sep: String, exprs: Column*): Column = {
+    ConcatWs(Literal.create(sep, StringType) +: exprs.map(_.expr))
+  }
 
   /**
-   * Computes the numeric value of the first character of the specified string column.
+   * Computes the first argument into a string from a binary using the provided character set
+   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+   * If either argument is null, the result will also be null.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def ascii(e: Column): Column = Ascii(e.expr)
+  def decode(value: Column, charset: String): Column = Decode(value.expr, lit(charset).expr)
 
   /**
-   * Trim the spaces from both ends for the specified string column.
+   * Computes the first argument into a binary from a string using the provided character set
+   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+   * If either argument is null, the result will also be null.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def trim(e: Column): Column = StringTrim(e.expr)
+  def encode(value: Column, charset: String): Column = Encode(value.expr, lit(charset).expr)
 
   /**
-   * Trim the spaces from left end for the specified string value.
+   * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places,
+   * and returns the result as a string column.
    *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def ltrim(e: Column): Column = StringTrimLeft(e.expr)
-
-  /**
-   * Trim the spaces from right end for the specified string value.
+   * If d is 0, the result has no decimal point or fractional part.
+   * If d < 0, the result will be null.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def rtrim(e: Column): Column = StringTrimRight(e.expr)
+  def format_number(x: Column, d: Int): Column = FormatNumber(x.expr, lit(d).expr)
 
   /**
    * Formats the arguments in printf-style and returns the result as a string column.
@@ -1787,9 +1731,11 @@ object functions {
   }
 
   /**
-   * Returns string, with the first letter of each word in uppercase.
+   * Returns a new string column by converting the first letter of each word to uppercase.
    * Words are delimited by whitespace.
    *
+   * For example, "hello world" will become "Hello World".
+   *
    * @group string_funcs
    * @since 1.5.0
    */
@@ -1808,15 +1754,27 @@ object functions {
   def instr(str: Column, substring: String): Column = StringInstr(str.expr, lit(substring).expr)
 
   /**
-   * Returns the substring from string str before count occurrences of the delimiter delim.
-   * If count is positive, everything the left of the final delimiter (counting from left) is
-   * returned. If count is negative, every to the right of the final delimiter (counting from the
-   * right) is returned. substring_index performs a case-sensitive match when searching for delim.
+   * Computes the length of a given string or binary column.
    *
    * @group string_funcs
+   * @since 1.5.0
    */
-  def substring_index(str: Column, delim: String, count: Int): Column =
-    SubstringIndex(str.expr, lit(delim).expr, lit(count).expr)
+  def length(e: Column): Column = Length(e.expr)
+
+  /**
+   * Converts a string column to lower case.
+   *
+   * @group string_funcs
+   * @since 1.3.0
+   */
+  def lower(e: Column): Column = Lower(e.expr)
+
+  /**
+   * Computes the Levenshtein distance of the two given string columns.
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  def levenshtein(l: Column, r: Column): Column = Levenshtein(l.expr, r.expr)
 
   /**
    * Locate the position of the first occurrence of substr.
@@ -1831,6 +1789,14 @@ object functions {
   }
 
   /**
+   * Trim the spaces from left end for the specified string value.
+   *
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  def ltrim(e: Column): Column = StringTrimLeft(e.expr)
+
+  /**
    * Locate the position of the first occurrence of substr in a string column, after position pos.
    *
    * NOTE: The position is not zero based, but 1 based index. returns 0 if substr
@@ -1843,6 +1809,15 @@ object functions {
     StringLocate(lit(substr).expr, str.expr, lit(pos).expr)
   }
 
+  /**
+   * Left-pad the string column with
+   *
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  def lpad(str: Column, len: Int, pad: String): Column = {
+    StringLPad(str.expr, lit(len).expr, lit(pad).expr)
+  }
 
   /**
    * Extract a specific(idx) group identified by a java regex, from the specified string column.
@@ -1865,101 +1840,108 @@ object functions {
   }
 
   /**
-   * Computes the BASE64 encoding of a binary column and returns it as a string column.
-   * This is the reverse of unbase64.
+   * Decodes a BASE64 encoded string column and returns it as a binary column.
+   * This is the reverse of base64.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def base64(e: Column): Column = Base64(e.expr)
+  def unbase64(e: Column): Column = UnBase64(e.expr)
 
   /**
-   * Decodes a BASE64 encoded string column and returns it as a binary column.
-   * This is the reverse of base64.
+   * Right-padded with pad to a length of len.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def unbase64(e: Column): Column = UnBase64(e.expr)
+  def rpad(str: Column, len: Int, pad: String): Column = {
+    StringRPad(str.expr, lit(len).expr, lit(pad).expr)
+  }
 
   /**
-   * Left-padded with pad to a length of len.
+   * Repeats a string column n times, and returns it as a new string column.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def lpad(str: Column, len: Int, pad: String): Column = {
-    StringLPad(str.expr, lit(len).expr, lit(pad).expr)
+  def repeat(str: Column, n: Int): Column = {
+    StringRepeat(str.expr, lit(n).expr)
   }
 
   /**
-   * Computes the first argument into a binary from a string using the provided character set
-   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
-   * If either argument is null, the result will also be null.
+   * Reverses the string column and returns it as a new string column.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def encode(value: Column, charset: String): Column = Encode(value.expr, lit(charset).expr)
+  def reverse(str: Column): Column = {
+    StringReverse(str.expr)
+  }
 
   /**
-   * Computes the first argument into a string from a binary using the provided character set
-   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
-   * If either argument is null, the result will also be null.
+   * Trim the spaces from right end for the specified string value.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def decode(value: Column, charset: String): Column = Decode(value.expr, lit(charset).expr)
+  def rtrim(e: Column): Column = StringTrimRight(e.expr)
 
   /**
-   * Right-padded with pad to a length of len.
+   * * Return the soundex code for the specified expression.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def rpad(str: Column, len: Int, pad: String): Column = {
-    StringRPad(str.expr, lit(len).expr, lit(pad).expr)
-  }
+  def soundex(e: Column): Column = SoundEx(e.expr)
 
   /**
-   * Repeats a string column n times, and returns it as a new string column.
+   * Splits str around pattern (pattern is a regular expression).
+   * NOTE: pattern is a string represent the regular expression.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def repeat(str: Column, n: Int): Column = {
-    StringRepeat(str.expr, lit(n).expr)
+  def split(str: Column, pattern: String): Column = {
+    StringSplit(str.expr, lit(pattern).expr)
   }
 
   /**
-   * * Return the soundex code for the specified expression.
+   * Substring starts at `pos` and is of length `len` when str is String type or
+   * returns the slice of byte array that starts at `pos` in byte and is of length `len`
+   * when str is Binary type
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def soundex(e: Column): Column = SoundEx(e.expr)
+  def substring(str: Column, pos: Int, len: Int): Column =
+    Substring(str.expr, lit(pos).expr, lit(len).expr)
 
   /**
-   * Splits str around pattern (pattern is a regular expression).
-   * NOTE: pattern is a string represent the regular expression.
+   * Returns the substring from string str before count occurrences of the delimiter delim.
+   * If count is positive, everything the left of the final delimiter (counting from left) is
+   * returned. If count is negative, every to the right of the final delimiter (counting from the
+   * right) is returned. substring_index performs a case-sensitive match when searching for delim.
    *
    * @group string_funcs
-   * @since 1.5.0
    */
-  def split(str: Column, pattern: String): Column = {
-    StringSplit(str.expr, lit(pattern).expr)
-  }
+  def substring_index(str: Column, delim: String, count: Int): Column =
+    SubstringIndex(str.expr, lit(delim).expr, lit(count).expr)
 
   /**
-   * Reversed the string for the specified value.
+   * Trim the spaces from both ends for the specified string column.
    *
    * @group string_funcs
    * @since 1.5.0
    */
-  def reverse(str: Column): Column = {
-    StringReverse(str.expr)
-  }
+  def trim(e: Column): Column = StringTrim(e.expr)
+
+  /**
+   * Converts a string column to upper case.
+   *
+   * @group string_funcs
+   * @since 1.3.0
+   */
+  def upper(e: Column): Column = Upper(e.expr)
 
   //////////////////////////////////////////////////////////////////////////////////////////////
   // DateTime functions
@@ -1967,6 +1949,7 @@ object functions {
 
   /**
    * Returns the date that is numMonths after startDate.
+   *
    * @group datetime_funcs
    * @since 1.5.0
    */
@@ -1974,20 +1957,20 @@ object functions {
     AddMonths(startDate.expr, Literal(numMonths))
 
   /**
-   * Converts a date/timestamp/string to a value of string in the format specified by the date
-   * format given by the second argument.
+   * Returns the current date as a date column.
    *
-   * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
-   * pattern letters of [[java.text.SimpleDateFormat]] can be used.
-   *
-   * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a
-   * specialized implementation.
+   * @group datetime_funcs
+   * @since 1.5.0
+   */
+  def current_date(): Column = CurrentDate()
+
+  /**
+   * Returns the current timestamp as a timestamp column.
    *
    * @group datetime_funcs
    * @since 1.5.0
    */
-  def date_format(dateExpr: Column, format: String): Column =
-    DateFormatClass(dateExpr.expr, Literal(format))
+  def current_timestamp(): Column = CurrentTimestamp()
 
   /**
    * Converts a date/timestamp/string to a value of string in the format specified by the date
@@ -2002,8 +1985,8 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  def date_format(dateColumnName: String, format: String): Column =
-    date_format(Column(dateColumnName), format)
+  def date_format(dateExpr: Column, format: String): Column =
+    DateFormatClass(dateExpr.expr, Literal(format))
 
   /**
    * Returns the date that is `days` days after `start`
@@ -2034,13 +2017,6 @@ object functions {
   def year(e: Column): Column = Year(e.expr)
 
   /**
-   * Extracts the year as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def year(columnName: String): Column = year(Column(columnName))
-
-  /**
    * Extracts the quarter as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2048,13 +2024,6 @@ object functions {
   def quarter(e: Column): Column = Quarter(e.expr)
 
   /**
-   * Extracts the quarter as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def quarter(columnName: String): Column = quarter(Column(columnName))
-
-  /**
    * Extracts the month as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2062,13 +2031,6 @@ object functions {
   def month(e: Column): Column = Month(e.expr)
 
   /**
-   * Extracts the month as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def month(columnName: String): Column = month(Column(columnName))
-
-  /**
    * Extracts the day of the month as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2076,13 +2038,6 @@ object functions {
   def dayofmonth(e: Column): Column = DayOfMonth(e.expr)
 
   /**
-   * Extracts the day of the month as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def dayofmonth(columnName: String): Column = dayofmonth(Column(columnName))
-
-  /**
    * Extracts the day of the year as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2090,13 +2045,6 @@ object functions {
   def dayofyear(e: Column): Column = DayOfYear(e.expr)
 
   /**
-   * Extracts the day of the year as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def dayofyear(columnName: String): Column = dayofyear(Column(columnName))
-
-  /**
    * Extracts the hours as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2104,13 +2052,6 @@ object functions {
   def hour(e: Column): Column = Hour(e.expr)
 
   /**
-   * Extracts the hours as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def hour(columnName: String): Column = hour(Column(columnName))
-
-  /**
    * Given a date column, returns the last day of the month which the given date belongs to.
    * For example, input "2015-07-27" returns "2015-07-31" since July 31 is the last day of the
    * month in July 2015.
@@ -2127,13 +2068,6 @@ object functions {
    */
   def minute(e: Column): Column = Minute(e.expr)
 
-  /**
-   * Extracts the minutes as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def minute(columnName: String): Column = minute(Column(columnName))
-
   /*
    * Returns number of months between dates `date1` and `date2`.
    * @group datetime_funcs
@@ -2164,13 +2098,6 @@ object functions {
   def second(e: Column): Column = Second(e.expr)
 
   /**
-   * Extracts the seconds as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def second(columnName: String): Column = second(Column(columnName))
-
-  /**
    * Extracts the week number as an integer from a given date/timestamp/string.
    * @group datetime_funcs
    * @since 1.5.0
@@ -2178,13 +2105,6 @@ object functions {
   def weekofyear(e: Column): Column = WeekOfYear(e.expr)
 
   /**
-   * Extracts the week number as an integer from a given date/timestamp/string.
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def weekofyear(columnName: String): Column = weekofyear(Column(columnName))
-
-  /**
    * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
    * representing the timestamp of that moment in the current system time zone in the given
    * format.
@@ -2226,7 +2146,7 @@ object functions {
    */
   def unix_timestamp(s: Column, p: String): Column = UnixTimestamp(s.expr, Literal(p))
 
-  /*
+  /**
    * Converts the column into DateType.
    *
    * @group datetime_funcs
@@ -2265,6 +2185,14 @@ object functions {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
+   * Creates a new row for each element in the given array or map column.
+   *
+   * @group collection_funcs
+   * @since 1.3.0
+   */
+  def explode(e: Column): Column = Explode(e.expr)
+
+  /**
    * Returns length of array or map.
    *
    * @group collection_funcs
@@ -2279,7 +2207,7 @@ object functions {
    * @group collection_funcs
    * @since 1.5.0
    */
-  def sort_array(e: Column): Column = sort_array(e, true)
+  def sort_array(e: Column): Column = sort_array(e, asc = true)
 
   /**
    * Sorts the input array for the given column in ascending / descending order,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 0850f5cf77..17897caf95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -89,7 +89,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(date_format("a", "y"), date_format("b", "y"), date_format("c", "y")),
+      df.select(date_format($"a", "y"), date_format($"b", "y"), date_format($"c", "y")),
       Row("2015", "2015", "2013"))
 
     checkAnswer(
@@ -101,7 +101,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(year("a"), year("b"), year("c")),
+      df.select(year($"a"), year($"b"), year($"c")),
       Row(2015, 2015, 2013))
 
     checkAnswer(
@@ -115,7 +115,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(quarter("a"), quarter("b"), quarter("c")),
+      df.select(quarter($"a"), quarter($"b"), quarter($"c")),
       Row(2, 2, 4))
 
     checkAnswer(
@@ -127,7 +127,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(month("a"), month("b"), month("c")),
+      df.select(month($"a"), month($"b"), month($"c")),
       Row(4, 4, 4))
 
     checkAnswer(
@@ -139,7 +139,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(dayofmonth("a"), dayofmonth("b"), dayofmonth("c")),
+      df.select(dayofmonth($"a"), dayofmonth($"b"), dayofmonth($"c")),
       Row(8, 8, 8))
 
     checkAnswer(
@@ -151,7 +151,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(dayofyear("a"), dayofyear("b"), dayofyear("c")),
+      df.select(dayofyear($"a"), dayofyear($"b"), dayofyear($"c")),
       Row(98, 98, 98))
 
     checkAnswer(
@@ -163,7 +163,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(hour("a"), hour("b"), hour("c")),
+      df.select(hour($"a"), hour($"b"), hour($"c")),
       Row(0, 13, 13))
 
     checkAnswer(
@@ -175,7 +175,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(minute("a"), minute("b"), minute("c")),
+      df.select(minute($"a"), minute($"b"), minute($"c")),
       Row(0, 10, 10))
 
     checkAnswer(
@@ -187,7 +187,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(second("a"), second("b"), second("c")),
+      df.select(second($"a"), second($"b"), second($"c")),
       Row(0, 15, 15))
 
     checkAnswer(
@@ -199,7 +199,7 @@ class DateFunctionsSuite extends QueryTest {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
-      df.select(weekofyear("a"), weekofyear("b"), weekofyear("c")),
+      df.select(weekofyear($"a"), weekofyear($"b"), weekofyear($"c")),
       Row(15, 15, 15))
 
     checkAnswer(
author	Reynold Xin <rxin@databricks.com>	2015-08-02 11:36:11 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-08-02 11:36:11 -0700
commit	8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01 (patch)
tree	c7235322a7fb4d1b4d7adb45ba0ace2dbe42b748
parent	244016a95c43ce6db422378e85a9d527bfe59bf1 (diff)
download	spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.gz spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.tar.bz2 spark-8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01.zip