aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-12-10 22:23:10 +0800
committerReynold Xin <rxin@databricks.com>2015-12-10 22:23:10 +0800
commit76540b6df5370b463277d3498097b2cc2d2e97a8 (patch)
tree93a195e733f17a6e30a22dacc80858a9f413316a /sql
parent21b3d2a75f679b252e293000d706741dca33624a (diff)
downloadspark-76540b6df5370b463277d3498097b2cc2d2e97a8.tar.gz
spark-76540b6df5370b463277d3498097b2cc2d2e97a8.tar.bz2
spark-76540b6df5370b463277d3498097b2cc2d2e97a8.zip
[SPARK-12242][SQL] Add DataFrame.transform method
Author: Reynold Xin <rxin@databricks.com> Closes #10226 from rxin/df-transform.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala13
2 files changed, 14 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index d641fcac1c..297ef2299c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -84,7 +84,7 @@ class TypedColumn[-T, U](
* col("`a.column.with.dots`") // Escape `.` in column names.
* $"columnName" // Scala short hand for a named column.
* expr("a + 1") // A column that is constructed from a parsed SQL Expression.
- * lit("1") // A column that produces a literal (constant) value.
+ * lit("abc") // A column that produces a literal (constant) value.
* }}}
*
* [[Column]] objects can be composed to form complex expressions:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 243a8c853f..da180a2ba0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1422,6 +1422,19 @@ class DataFrame private[sql](
def first(): Row = head()
/**
+ * Concise syntax for chaining custom transformations.
+ * {{{
+ * def featurize(ds: DataFrame) = ...
+ *
+ * df
+ * .transform(featurize)
+ * .transform(...)
+ * }}}
+ * @since 1.6.0
+ */
+ def transform[U](t: DataFrame => DataFrame): DataFrame = t(this)
+
+ /**
* Returns a new RDD by applying a function to all rows of this DataFrame.
* @group rdd
* @since 1.3.0