aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala11
1 files changed, 9 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index 3a735017ba..c09f4d076c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -27,9 +27,16 @@ import org.apache.spark.sql.types.StructType
/**
* :: Experimental ::
- * Implements the transforms which are defined by SQL statement.
- * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
+ * Implements the transformations which are defined by SQL statement.
+ * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__ ...'
* where '__THIS__' represents the underlying table of the input dataset.
+ * The select clause specifies the fields, constants, and expressions to display in
+ * the output, it can be any select clause that Spark SQL supports. Users can also
+ * use Spark SQL built-in function and UDFs to operate on these selected columns.
+ * For example, [[SQLTransformer]] supports statements like:
+ * - SELECT a, a + b AS a_b FROM __THIS__
+ * - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
+ * - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
*/
@Experimental
@Since("1.6.0")