[SPARK-15533][SQL] Deprecate Dataset.explode

## What changes were proposed in this pull request? This patch deprecates `Dataset.explode` and documents appropriate workarounds to use `flatMap()` or `functions.explode()` instead. ## How was this patch tested? N/A Author: Sameer Agarwal <sameer@databricks.com> Closes #13312 from sameeragarwal/deprecate.
author: Sameer Agarwal <sameer@databricks.com> 2016-05-25 19:10:57 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-05-25 19:10:57 -0700
commit: 06ed1fa3e45adfc11b0f615cb8b97c99fadc735f (patch)
tree: 1c307c0b71ebcdbd8922832c6800d19b864ad26b /sql
parent: 527499b624e743583fe0f93ea0b487031891ac3a (diff)
download: spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.gz
spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.bz2
spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.zip
1 files changed, 22 insertions, 11 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 78a167eef2..e5140fcf13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1556,30 +1556,33 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific) Returns a new [[Dataset]] where each row has been expanded to zero or more
    * rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of
    * the input row are implicitly joined with each row that is output by the function.
    *
-   * The following example uses this function to count the number of books which contain
-   * a given word:
+   * Given that this is deprecated, as an alternative, you can explode columns either using
+   * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count
+   * the number of books that contain a given word:
    *
    * {{{
    *   case class Book(title: String, words: String)
    *   val ds: Dataset[Book]
    *
-   *   case class Word(word: String)
-   *   val allWords = ds.explode('words) {
-   *     case Row(words: String) => words.split(" ").map(Word(_))
-   *   }
+   *   val allWords = ds.select('title, explode(split('words, " ")).as("word"))
    *
    *   val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
    * }}}
    *
+   * Using `flatMap()` this can similarly be exploded as:
+   *
+   * {{{
+   *   ds.flatMap(_.words.split(" "))
+   * }}}
+   *
    * @group untypedrel
    * @since 2.0.0
    */
-  @Experimental
+  @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
   def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
     val elementSchema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
 
@@ -1596,19 +1599,27 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific) Returns a new [[Dataset]] where a single column has been expanded to zero
    * or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
    * columns of the input row are implicitly joined with each value that is output by the function.
    *
+   * Given that this is deprecated, as an alternative, you can explode columns either using
+   * `functions.explode()`:
+   *
+   * {{{
+   *   ds.select(explode(split('words, " ")).as("word"))
+   * }}}
+   *
+   * or `flatMap()`:
+   *
    * {{{
-   *   ds.explode("words", "word") {words: String => words.split(" ")}
+   *   ds.flatMap(_.words.split(" "))
    * }}}
    *
    * @group untypedrel
    * @since 2.0.0
    */
-  @Experimental
+  @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
   def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B])
     : DataFrame = {
     val dataType = ScalaReflection.schemaFor[B].dataType
author	Sameer Agarwal <sameer@databricks.com>	2016-05-25 19:10:57 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-05-25 19:10:57 -0700
commit	06ed1fa3e45adfc11b0f615cb8b97c99fadc735f (patch)
tree	1c307c0b71ebcdbd8922832c6800d19b864ad26b /sql
parent	527499b624e743583fe0f93ea0b487031891ac3a (diff)
download	spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.gz spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.tar.bz2 spark-06ed1fa3e45adfc11b0f615cb8b97c99fadc735f.zip