aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-08-07 19:09:28 -0700
committerReynold Xin <rxin@databricks.com>2015-08-07 19:09:28 -0700
commit998f4ff94df1d9db1c9e32c04091017c25cd4e81 (patch)
treef88d36691e51b97f66a227cbc460cdb0dc315ef3 /sql
parent85be65b39ce669f937a898195a844844d757666b (diff)
downloadspark-998f4ff94df1d9db1c9e32c04091017c25cd4e81.tar.gz
spark-998f4ff94df1d9db1c9e32c04091017c25cd4e81.tar.bz2
spark-998f4ff94df1d9db1c9e32c04091017c25cd4e81.zip
[SPARK-9754][SQL] Remove TypeCheck in debug package.
TypeCheck no longer applies in the new "Tungsten" world. Author: Reynold Xin <rxin@databricks.com> Closes #8043 from rxin/SPARK-9754 and squashes the following commits: 4ec471e [Reynold Xin] [SPARK-9754][SQL] Remove TypeCheck in debug package.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala104
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala4
2 files changed, 4 insertions, 104 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index dd3858ea2b..74892e4e13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -17,21 +17,16 @@
package org.apache.spark.sql.execution
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.unsafe.types.UTF8String
-
import scala.collection.mutable.HashSet
-import org.apache.spark.{AccumulatorParam, Accumulator, Logging}
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.trees.TreeNodeRef
-import org.apache.spark.sql.types._
+import org.apache.spark.{Accumulator, AccumulatorParam, Logging}
/**
- * :: DeveloperApi ::
* Contains methods for debugging query execution.
*
* Usage:
@@ -53,10 +48,8 @@ package object debug {
}
/**
- * :: DeveloperApi ::
* Augments [[DataFrame]]s with debug methods.
*/
- @DeveloperApi
implicit class DebugQuery(query: DataFrame) extends Logging {
def debug(): Unit = {
val plan = query.queryExecution.executedPlan
@@ -72,23 +65,6 @@ package object debug {
case _ =>
}
}
-
- def typeCheck(): Unit = {
- val plan = query.queryExecution.executedPlan
- val visited = new collection.mutable.HashSet[TreeNodeRef]()
- val debugPlan = plan transform {
- case s: SparkPlan if !visited.contains(new TreeNodeRef(s)) =>
- visited += new TreeNodeRef(s)
- TypeCheck(s)
- }
- try {
- logDebug(s"Results returned: ${debugPlan.execute().count()}")
- } catch {
- case e: Exception =>
- def unwrap(e: Throwable): Throwable = if (e.getCause == null) e else unwrap(e.getCause)
- logDebug(s"Deepest Error: ${unwrap(e)}")
- }
- }
}
private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
@@ -148,76 +124,4 @@ package object debug {
}
}
}
-
- /**
- * Helper functions for checking that runtime types match a given schema.
- */
- private[sql] object TypeCheck {
- def typeCheck(data: Any, schema: DataType): Unit = (data, schema) match {
- case (null, _) =>
-
- case (row: InternalRow, s: StructType) =>
- row.toSeq(s).zip(s.map(_.dataType)).foreach { case(d, t) => typeCheck(d, t) }
- case (a: ArrayData, ArrayType(elemType, _)) =>
- a.foreach(elemType, (_, e) => {
- typeCheck(e, elemType)
- })
- case (m: MapData, MapType(keyType, valueType, _)) =>
- m.keyArray().foreach(keyType, (_, e) => {
- typeCheck(e, keyType)
- })
- m.valueArray().foreach(valueType, (_, e) => {
- typeCheck(e, valueType)
- })
-
- case (_: Long, LongType) =>
- case (_: Int, IntegerType) =>
- case (_: UTF8String, StringType) =>
- case (_: Float, FloatType) =>
- case (_: Byte, ByteType) =>
- case (_: Short, ShortType) =>
- case (_: Boolean, BooleanType) =>
- case (_: Double, DoubleType) =>
- case (_: Int, DateType) =>
- case (_: Long, TimestampType) =>
- case (v, udt: UserDefinedType[_]) => typeCheck(v, udt.sqlType)
-
- case (d, t) => sys.error(s"Invalid data found: got $d (${d.getClass}) expected $t")
- }
- }
-
- /**
- * Augments [[DataFrame]]s with debug methods.
- */
- private[sql] case class TypeCheck(child: SparkPlan) extends SparkPlan {
- import TypeCheck._
-
- override def nodeName: String = ""
-
- /* Only required when defining this class in a REPL.
- override def makeCopy(args: Array[Object]): this.type =
- TypeCheck(args(0).asInstanceOf[SparkPlan]).asInstanceOf[this.type]
- */
-
- def output: Seq[Attribute] = child.output
-
- def children: List[SparkPlan] = child :: Nil
-
- protected override def doExecute(): RDD[InternalRow] = {
- child.execute().map { row =>
- try typeCheck(row, child.schema) catch {
- case e: Exception =>
- sys.error(
- s"""
- |ERROR WHEN TYPE CHECKING QUERY
- |==============================
- |$e
- |======== BAD TREE ============
- |$child
- """.stripMargin)
- }
- row
- }
- }
- }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 8ec3985e00..239deb7973 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -25,8 +25,4 @@ class DebuggingSuite extends SparkFunSuite {
test("DataFrame.debug()") {
testData.debug()
}
-
- test("DataFrame.typeCheck()") {
- testData.typeCheck()
- }
}