aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-11-02 20:56:30 -0700
committergatorsmile <gatorsmile@gmail.com>2016-11-02 20:56:30 -0700
commit7eb2ca8e338e04034a662920261e028f56b07395 (patch)
tree60ba82749182efb7bc86408985dba150bf4e1b99 /sql/hive
parent3a1bc6f4780f8384c1211b1335e7394a4a28377e (diff)
downloadspark-7eb2ca8e338e04034a662920261e028f56b07395.tar.gz
spark-7eb2ca8e338e04034a662920261e028f56b07395.tar.bz2
spark-7eb2ca8e338e04034a662920261e028f56b07395.zip
[SPARK-17963][SQL][DOCUMENTATION] Add examples (extend) in each expression and improve documentation
## What changes were proposed in this pull request? This PR proposes to change the documentation for functions. Please refer the discussion from https://github.com/apache/spark/pull/15513 The changes include - Re-indent the documentation - Add examples/arguments in `extended` where the arguments are multiple or specific format (e.g. xml/ json). For examples, the documentation was updated as below: ### Functions with single line usage **Before** - `pow` ``` sql Usage: pow(x1, x2) - Raise x1 to the power of x2. Extended Usage: > SELECT pow(2, 3); 8.0 ``` - `current_timestamp` ``` sql Usage: current_timestamp() - Returns the current timestamp at the start of query evaluation. Extended Usage: No example for current_timestamp. ``` **After** - `pow` ``` sql Usage: pow(expr1, expr2) - Raises `expr1` to the power of `expr2`. Extended Usage: Examples: > SELECT pow(2, 3); 8.0 ``` - `current_timestamp` ``` sql Usage: current_timestamp() - Returns the current timestamp at the start of query evaluation. Extended Usage: No example/argument for current_timestamp. ``` ### Functions with (already) multiple line usage **Before** - `approx_count_distinct` ``` sql Usage: approx_count_distinct(expr) - Returns the estimated cardinality by HyperLogLog++. approx_count_distinct(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++ with relativeSD, the maximum estimation error allowed. Extended Usage: No example for approx_count_distinct. ``` - `percentile_approx` ``` sql Usage: percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is a positive integer literal which controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of the approximation. percentile_approx(col, array(percentage1 [, percentage2]...) [, accuracy]) - Returns the approximate percentile array of column `col` at the given percentage array. Each value of the percentage array must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is a positive integer literal which controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of the approximation. Extended Usage: No example for percentile_approx. ``` **After** - `approx_count_distinct` ``` sql Usage: approx_count_distinct(expr[, relativeSD]) - Returns the estimated cardinality by HyperLogLog++. `relativeSD` defines the maximum estimation error allowed. Extended Usage: No example/argument for approx_count_distinct. ``` - `percentile_approx` ``` sql Usage: percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of the approximation. When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column `col` at the given percentage array. Extended Usage: Examples: > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100); [10.0,10.0,10.0] > SELECT percentile_approx(10.0, 0.5, 100); 10.0 ``` ## How was this patch tested? Manually tested **When examples are multiple** ``` sql spark-sql> describe function extended reflect; Function: reflect Class: org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection Usage: reflect(class, method[, arg1[, arg2 ..]]) - Calls a method with reflection. Extended Usage: Examples: > SELECT reflect('java.util.UUID', 'randomUUID'); c33fb387-8500-4bfa-81d2-6e0e3e930df2 > SELECT reflect('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2'); a5cf6c42-0c85-418f-af6c-3e4e5b1328f2 ``` **When `Usage` is in single line** ``` sql spark-sql> describe function extended min; Function: min Class: org.apache.spark.sql.catalyst.expressions.aggregate.Min Usage: min(expr) - Returns the minimum value of `expr`. Extended Usage: No example/argument for min. ``` **When `Usage` is already in multiple lines** ``` sql spark-sql> describe function extended percentile_approx; Function: percentile_approx Class: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile Usage: percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of the approximation. When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column `col` at the given percentage array. Extended Usage: Examples: > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100); [10.0,10.0,10.0] > SELECT percentile_approx(10.0, 0.5, 100); 10.0 ``` **When example/argument is missing** ``` sql spark-sql> describe function extended rank; Function: rank Class: org.apache.spark.sql.catalyst.expressions.Rank Usage: rank() - Computes the rank of a value in a group of values. The result is one plus the number of rows preceding or equal to the current row in the ordering of the partition. The values will produce gaps in the sequence. Extended Usage: No example/argument for rank. ``` Author: hyukjinkwon <gurwls223@gmail.com> Closes #15677 from HyukjinKwon/SPARK-17963-1.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala24
1 files changed, 14 insertions, 10 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 3a597d6afb..ad70835d06 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -271,15 +271,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkKeywordsExist(sql("describe function extended upper"),
"Function: upper",
"Class: org.apache.spark.sql.catalyst.expressions.Upper",
- "Usage: upper(str) - Returns str with all characters changed to uppercase",
+ "Usage: upper(str) - Returns `str` with all characters changed to uppercase",
"Extended Usage:",
- "> SELECT upper('SparkSql')",
- "'SPARKSQL'")
+ "Examples:",
+ "> SELECT upper('SparkSql');",
+ "SPARKSQL")
checkKeywordsExist(sql("describe functioN Upper"),
"Function: upper",
"Class: org.apache.spark.sql.catalyst.expressions.Upper",
- "Usage: upper(str) - Returns str with all characters changed to uppercase")
+ "Usage: upper(str) - Returns `str` with all characters changed to uppercase")
checkKeywordsNotExist(sql("describe functioN Upper"),
"Extended Usage")
@@ -290,25 +291,28 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkKeywordsExist(sql("describe functioN `~`"),
"Function: ~",
"Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
- "Usage: ~ b - Bitwise NOT.")
+ "Usage: ~ expr - Returns the result of bitwise NOT of `expr`.")
// Hard coded describe functions
checkKeywordsExist(sql("describe function `<>`"),
"Function: <>",
- "Usage: a <> b - Returns TRUE if a is not equal to b")
+ "Usage: expr1 <> expr2 - Returns true if `expr1` is not equal to `expr2`")
checkKeywordsExist(sql("describe function `!=`"),
"Function: !=",
- "Usage: a != b - Returns TRUE if a is not equal to b")
+ "Usage: expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`")
checkKeywordsExist(sql("describe function `between`"),
"Function: between",
- "Usage: a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c")
+ "Usage: expr1 [NOT] BETWEEN expr2 AND expr3 - " +
+ "evaluate if `expr1` is [not] in between `expr2` and `expr3`")
checkKeywordsExist(sql("describe function `case`"),
"Function: case",
- "Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
- "When a = b, returns c; when a = d, return e; else return f")
+ "Usage: CASE expr1 WHEN expr2 THEN expr3 " +
+ "[WHEN expr4 THEN expr5]* [ELSE expr6] END - " +
+ "When `expr1` = `expr2`, returns `expr3`; " +
+ "when `expr1` = `expr4`, return `expr5`; else return `expr6`")
}
test("describe functions - user defined functions") {