aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-05-25 19:10:57 -0700
committerReynold Xin <rxin@databricks.com>2016-05-25 19:10:57 -0700
commit06ed1fa3e45adfc11b0f615cb8b97c99fadc735f (patch)
tree1c307c0b71ebcdbd8922832c6800d19b864ad26b /sql
parent527499b624e743583fe0f93ea0b487031891ac3a (diff)
downloadspark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.gz
spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.bz2
spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.zip
[SPARK-15533][SQL] Deprecate Dataset.explode
## What changes were proposed in this pull request? This patch deprecates `Dataset.explode` and documents appropriate workarounds to use `flatMap()` or `functions.explode()` instead. ## How was this patch tested? N/A Author: Sameer Agarwal <sameer@databricks.com> Closes #13312 from sameeragarwal/deprecate.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala33
1 files changed, 22 insertions, 11 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 78a167eef2..e5140fcf13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1556,30 +1556,33 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Scala-specific) Returns a new [[Dataset]] where each row has been expanded to zero or more
* rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of
* the input row are implicitly joined with each row that is output by the function.
*
- * The following example uses this function to count the number of books which contain
- * a given word:
+ * Given that this is deprecated, as an alternative, you can explode columns either using
+ * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count
+ * the number of books that contain a given word:
*
* {{{
* case class Book(title: String, words: String)
* val ds: Dataset[Book]
*
- * case class Word(word: String)
- * val allWords = ds.explode('words) {
- * case Row(words: String) => words.split(" ").map(Word(_))
- * }
+ * val allWords = ds.select('title, explode(split('words, " ")).as("word"))
*
* val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
* }}}
*
+ * Using `flatMap()` this can similarly be exploded as:
+ *
+ * {{{
+ * ds.flatMap(_.words.split(" "))
+ * }}}
+ *
* @group untypedrel
* @since 2.0.0
*/
- @Experimental
+ @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
val elementSchema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -1596,19 +1599,27 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Scala-specific) Returns a new [[Dataset]] where a single column has been expanded to zero
* or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
* columns of the input row are implicitly joined with each value that is output by the function.
*
+ * Given that this is deprecated, as an alternative, you can explode columns either using
+ * `functions.explode()`:
+ *
+ * {{{
+ * ds.select(explode(split('words, " ")).as("word"))
+ * }}}
+ *
+ * or `flatMap()`:
+ *
* {{{
- * ds.explode("words", "word") {words: String => words.split(" ")}
+ * ds.flatMap(_.words.split(" "))
* }}}
*
* @group untypedrel
* @since 2.0.0
*/
- @Experimental
+ @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B])
: DataFrame = {
val dataType = ScalaReflection.schemaFor[B].dataType