aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-12-07 23:50:57 -0800
committerXiangrui Meng <meng@databricks.com>2015-12-07 23:50:57 -0800
commit4a39b5a1bee28cec792d509654f6236390cafdcb (patch)
tree1637657b13ee5294d74abf8f3f2f4c3f5bf9ba86 /mllib
parent7d05a624510f7299b3dd07f87c203db1ff7caa3e (diff)
downloadspark-4a39b5a1bee28cec792d509654f6236390cafdcb.tar.gz
spark-4a39b5a1bee28cec792d509654f6236390cafdcb.tar.bz2
spark-4a39b5a1bee28cec792d509654f6236390cafdcb.zip
[SPARK-11958][SPARK-11957][ML][DOC] SQLTransformer user guide and example code
Add ```SQLTransformer``` user guide, example code and make Scala API doc more clear. Author: Yanbo Liang <ybliang8@gmail.com> Closes #10006 from yanboliang/spark-11958.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala11
1 files changed, 9 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index 3a735017ba..c09f4d076c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -27,9 +27,16 @@ import org.apache.spark.sql.types.StructType
/**
* :: Experimental ::
- * Implements the transforms which are defined by SQL statement.
- * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
+ * Implements the transformations which are defined by SQL statement.
+ * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__ ...'
* where '__THIS__' represents the underlying table of the input dataset.
+ * The select clause specifies the fields, constants, and expressions to display in
+ * the output, it can be any select clause that Spark SQL supports. Users can also
+ * use Spark SQL built-in function and UDFs to operate on these selected columns.
+ * For example, [[SQLTransformer]] supports statements like:
+ * - SELECT a, a + b AS a_b FROM __THIS__
+ * - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
+ * - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
*/
@Experimental
@Since("1.6.0")