aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-11-24 21:30:53 -0800
committerReynold Xin <rxin@databricks.com>2015-11-24 21:30:53 -0800
commit151d7c2baf18403e6e59e97c80c8bcded6148038 (patch)
treed6b60910109f12e5d91737370ac6ec1e2671241c /sql
parenta5d988763319f63a8e2b58673dd4f9098f17c835 (diff)
downloadspark-151d7c2baf18403e6e59e97c80c8bcded6148038.tar.gz
spark-151d7c2baf18403e6e59e97c80c8bcded6148038.tar.bz2
spark-151d7c2baf18403e6e59e97c80c8bcded6148038.zip
[SPARK-10621][SQL] Consistent naming for functions in SQL, Python, Scala
Author: Reynold Xin <rxin@databricks.com> Closes #9948 from rxin/SPARK-10621.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala124
1 files changed, 102 insertions, 22 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 77dd5bc725..276c5dfc8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -473,6 +473,13 @@ object functions extends LegacyFunctions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
+ * @group window_funcs
+ * @deprecated As of 1.6.0, replaced by `cume_dist`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0")
+ def cumeDist(): Column = cume_dist()
+
+ /**
* Window function: returns the cumulative distribution of values within a window partition,
* i.e. the fraction of rows that are below the current row.
*
@@ -481,13 +488,17 @@ object functions extends LegacyFunctions {
* cumeDist(x) = number of values before (and including) x / N
* }}}
*
- *
- * This is equivalent to the CUME_DIST function in SQL.
- *
* @group window_funcs
- * @since 1.4.0
+ * @since 1.6.0
*/
- def cumeDist(): Column = withExpr { UnresolvedWindowFunction("cume_dist", Nil) }
+ def cume_dist(): Column = withExpr { UnresolvedWindowFunction("cume_dist", Nil) }
+
+ /**
+ * @group window_funcs
+ * @deprecated As of 1.6.0, replaced by `dense_rank`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use dense_rank. This will be removed in Spark 2.0.", "1.6.0")
+ def denseRank(): Column = dense_rank()
/**
* Window function: returns the rank of rows within a window partition, without any gaps.
@@ -497,12 +508,10 @@ object functions extends LegacyFunctions {
* and had three people tie for second place, you would say that all three were in second
* place and that the next person came in third.
*
- * This is equivalent to the DENSE_RANK function in SQL.
- *
* @group window_funcs
- * @since 1.4.0
+ * @since 1.6.0
*/
- def denseRank(): Column = withExpr { UnresolvedWindowFunction("dense_rank", Nil) }
+ def dense_rank(): Column = withExpr { UnresolvedWindowFunction("dense_rank", Nil) }
/**
* Window function: returns the value that is `offset` rows before the current row, and
@@ -621,6 +630,13 @@ object functions extends LegacyFunctions {
def ntile(n: Int): Column = withExpr { UnresolvedWindowFunction("ntile", lit(n).expr :: Nil) }
/**
+ * @group window_funcs
+ * @deprecated As of 1.6.0, replaced by `percent_rank`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use percent_rank. This will be removed in Spark 2.0.", "1.6.0")
+ def percentRank(): Column = percent_rank()
+
+ /**
* Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
*
* This is computed by:
@@ -631,9 +647,9 @@ object functions extends LegacyFunctions {
* This is equivalent to the PERCENT_RANK function in SQL.
*
* @group window_funcs
- * @since 1.4.0
+ * @since 1.6.0
*/
- def percentRank(): Column = withExpr { UnresolvedWindowFunction("percent_rank", Nil) }
+ def percent_rank(): Column = withExpr { UnresolvedWindowFunction("percent_rank", Nil) }
/**
* Window function: returns the rank of rows within a window partition.
@@ -651,14 +667,19 @@ object functions extends LegacyFunctions {
def rank(): Column = withExpr { UnresolvedWindowFunction("rank", Nil) }
/**
+ * @group window_funcs
+ * @deprecated As of 1.6.0, replaced by `row_number`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use row_number. This will be removed in Spark 2.0.", "1.6.0")
+ def rowNumber(): Column = row_number()
+
+ /**
* Window function: returns a sequential number starting at 1 within a window partition.
*
- * This is equivalent to the ROW_NUMBER function in SQL.
- *
* @group window_funcs
- * @since 1.4.0
+ * @since 1.6.0
*/
- def rowNumber(): Column = withExpr { UnresolvedWindowFunction("row_number", Nil) }
+ def row_number(): Column = withExpr { UnresolvedWindowFunction("row_number", Nil) }
//////////////////////////////////////////////////////////////////////////////////////////////
// Non-aggregate functions
@@ -721,19 +742,42 @@ object functions extends LegacyFunctions {
def coalesce(e: Column*): Column = withExpr { Coalesce(e.map(_.expr)) }
/**
+ * @group normal_funcs
+ * @deprecated As of 1.6.0, replaced by `input_file_name`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use input_file_name. This will be removed in Spark 2.0.", "1.6.0")
+ def inputFileName(): Column = input_file_name()
+
+ /**
* Creates a string column for the file name of the current Spark task.
*
* @group normal_funcs
+ * @since 1.6.0
*/
- def inputFileName(): Column = withExpr { InputFileName() }
+ def input_file_name(): Column = withExpr { InputFileName() }
+
+ /**
+ * @group normal_funcs
+ * @deprecated As of 1.6.0, replaced by `isnan`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use isnan. This will be removed in Spark 2.0.", "1.6.0")
+ def isNaN(e: Column): Column = isnan(e)
/**
* Return true iff the column is NaN.
*
* @group normal_funcs
- * @since 1.5.0
+ * @since 1.6.0
+ */
+ def isnan(e: Column): Column = withExpr { IsNaN(e.expr) }
+
+ /**
+ * Return true iff the column is null.
+ *
+ * @group normal_funcs
+ * @since 1.6.0
*/
- def isNaN(e: Column): Column = withExpr { IsNaN(e.expr) }
+ def isnull(e: Column): Column = withExpr { IsNull(e.expr) }
/**
* A column expression that generates monotonically increasing 64-bit integers.
@@ -750,7 +794,24 @@ object functions extends LegacyFunctions {
* @group normal_funcs
* @since 1.4.0
*/
- def monotonicallyIncreasingId(): Column = withExpr { MonotonicallyIncreasingID() }
+ def monotonicallyIncreasingId(): Column = monotonically_increasing_id()
+
+ /**
+ * A column expression that generates monotonically increasing 64-bit integers.
+ *
+ * The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
+ * The current implementation puts the partition ID in the upper 31 bits, and the record number
+ * within each partition in the lower 33 bits. The assumption is that the data frame has
+ * less than 1 billion partitions, and each partition has less than 8 billion records.
+ *
+ * As an example, consider a [[DataFrame]] with two partitions, each with 3 records.
+ * This expression would return the following IDs:
+ * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+ *
+ * @group normal_funcs
+ * @since 1.6.0
+ */
+ def monotonically_increasing_id(): Column = withExpr { MonotonicallyIncreasingID() }
/**
* Returns col1 if it is not NaN, or col2 if col1 is NaN.
@@ -826,14 +887,22 @@ object functions extends LegacyFunctions {
def randn(): Column = randn(Utils.random.nextLong)
/**
+ * @group normal_funcs
+ * @since 1.4.0
+ * @deprecated As of 1.6.0, replaced by `spark_partition_id`. This will be removed in Spark 2.0.
+ */
+ @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0")
+ def sparkPartitionId(): Column = spark_partition_id()
+
+ /**
* Partition ID of the Spark task.
*
* Note that this is indeterministic because it depends on data partitioning and task scheduling.
*
* @group normal_funcs
- * @since 1.4.0
+ * @since 1.6.0
*/
- def sparkPartitionId(): Column = withExpr { SparkPartitionID() }
+ def spark_partition_id(): Column = withExpr { SparkPartitionID() }
/**
* Computes the square root of the specified float value.
@@ -2306,6 +2375,17 @@ object functions extends LegacyFunctions {
def explode(e: Column): Column = withExpr { Explode(e.expr) }
/**
+ * Extracts json object from a json string based on json path specified, and returns json string
+ * of the extracted json object. It will return null if the input json string is invalid.
+ *
+ * @group collection_funcs
+ * @since 1.6.0
+ */
+ def get_json_object(e: Column, path: String): Column = withExpr {
+ GetJsonObject(e.expr, lit(path).expr)
+ }
+
+ /**
* Creates a new row for a json column according to the given field names.
*
* @group collection_funcs
@@ -2313,7 +2393,7 @@ object functions extends LegacyFunctions {
*/
@scala.annotation.varargs
def json_tuple(json: Column, fields: String*): Column = withExpr {
- require(fields.length > 0, "at least 1 field name should be given.")
+ require(fields.nonEmpty, "at least 1 field name should be given.")
JsonTuple(json.expr +: fields.map(Literal.apply))
}