aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-04-28 22:48:04 -0700
committerReynold Xin <rxin@databricks.com>2015-04-28 22:48:04 -0700
commit271c4c621d91d3f610ae89e5d2e5dab1a2009ca6 (patch)
tree47b00efa3b4d3daf1710e0495fc494bdb31661c1 /sql/catalyst/src
parent5ef006fc4d010905e02cb905c9115b95ba55282b (diff)
downloadspark-271c4c621d91d3f610ae89e5d2e5dab1a2009ca6.tar.gz
spark-271c4c621d91d3f610ae89e5d2e5dab1a2009ca6.tar.bz2
spark-271c4c621d91d3f610ae89e5d2e5dab1a2009ca6.zip
[SPARK-7215] made coalesce and repartition a part of the query plan
Coalesce and repartition now show up as part of the query plan, rather than resulting in a new `DataFrame`. cc rxin Author: Burak Yavuz <brkyvz@gmail.com> Closes #5762 from brkyvz/df-repartition and squashes the following commits: b1e76dd [Burak Yavuz] added documentation on repartitions 5807e35 [Burak Yavuz] renamed coalescepartitions fa4509f [Burak Yavuz] rename coalesce 2c349b5 [Burak Yavuz] address comments f2e6af1 [Burak Yavuz] add ticks 686c90b [Burak Yavuz] made coalesce and repartition a part of the query plan
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala11
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala8
2 files changed, 18 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index bbc94a7ab3..608e272da7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -311,6 +311,17 @@ case class Distinct(child: LogicalPlan) extends UnaryNode {
}
/**
+ * Return a new RDD that has exactly `numPartitions` partitions. Differs from
+ * [[RepartitionByExpression]] as this method is called directly by DataFrame's, because the user
+ * asked for `coalesce` or `repartition`. [[RepartitionByExpression]] is used when the consumer
+ * of the output requires some specific ordering or distribution of the data.
+ */
+case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
+ extends UnaryNode {
+ override def output: Seq[Attribute] = child.output
+}
+
+/**
* A relation with one row. This is used in "SELECT ..." without a from clause.
*/
case object OneRowRelation extends LeafNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
index e737418d9c..63df2c1ee7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
@@ -32,5 +32,11 @@ abstract class RedistributeData extends UnaryNode {
case class SortPartitions(sortExpressions: Seq[SortOrder], child: LogicalPlan)
extends RedistributeData
-case class Repartition(partitionExpressions: Seq[Expression], child: LogicalPlan)
+/**
+ * This method repartitions data using [[Expression]]s, and receives information about the
+ * number of partitions during execution. Used when a specific ordering or distribution is
+ * expected by the consumer of the query result. Use [[Repartition]] for RDD-like
+ * `coalesce` and `repartition`.
+ */
+case class RepartitionByExpression(partitionExpressions: Seq[Expression], child: LogicalPlan)
extends RedistributeData