aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2015-12-21 12:46:06 -0800
committerMichael Armbrust <michael@databricks.com>2015-12-21 12:46:06 -0800
commit474eb21a30f7ee898f76a625a5470c8245af1d22 (patch)
tree0dbc33fd1658edf2939731630f9781af8d628bcb /sql
parent1920d72a1f7b9844323d06e8094818347f413df6 (diff)
downloadspark-474eb21a30f7ee898f76a625a5470c8245af1d22.tar.gz
spark-474eb21a30f7ee898f76a625a5470c8245af1d22.tar.bz2
spark-474eb21a30f7ee898f76a625a5470c8245af1d22.zip
[SPARK-12398] Smart truncation of DataFrame / Dataset toString
When a DataFrame or Dataset has a long schema, we should intelligently truncate to avoid flooding the screen with unreadable information. // Standard output [a: int, b: int] // Truncate many top level fields [a: int, b, string ... 10 more fields] // Truncate long inner structs [a: struct<a: Int ... 10 more fields>] Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #10373 from dilipbiswal/spark-12398.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala17
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala15
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala39
4 files changed, 73 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 4b54c31dcc..b0c43c4100 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -66,6 +66,9 @@ abstract class DataType extends AbstractDataType {
/** Readable string representation for the type. */
def simpleString: String = typeName
+ /** Readable string representation for the type with truncation */
+ private[sql] def simpleString(maxNumberFields: Int): String = simpleString
+
/**
* Check if `this` and `other` are the same data type when ignoring nullability
* (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 9778df271d..d568022765 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -278,6 +278,23 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
s"struct<${fieldTypes.mkString(",")}>"
}
+ private[sql] override def simpleString(maxNumberFields: Int): String = {
+ val builder = new StringBuilder
+ val fieldTypes = fields.take(maxNumberFields).map {
+ case f => s"${f.name}: ${f.dataType.simpleString(maxNumberFields)}"
+ }
+ builder.append("struct<")
+ builder.append(fieldTypes.mkString(", "))
+ if (fields.length > 2) {
+ if (fields.length - fieldTypes.size == 1) {
+ builder.append(" ... 1 more field")
+ } else {
+ builder.append(" ... " + (fields.length - 2) + " more fields")
+ }
+ }
+ builder.append(">").toString()
+ }
+
/**
* Merges with another schema (`StructType`). For a struct field A from `this` and a struct field
* B from `that`,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala
index b397d42612..3f391fd9a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala
@@ -31,7 +31,20 @@ private[sql] trait Queryable {
override def toString: String = {
try {
- schema.map(f => s"${f.name}: ${f.dataType.simpleString}").mkString("[", ", ", "]")
+ val builder = new StringBuilder
+ val fields = schema.take(2).map {
+ case f => s"${f.name}: ${f.dataType.simpleString(2)}"
+ }
+ builder.append("[")
+ builder.append(fields.mkString(", "))
+ if (schema.length > 2) {
+ if (schema.length - fields.size == 1) {
+ builder.append(" ... 1 more field")
+ } else {
+ builder.append(" ... " + (schema.length - 2) + " more fields")
+ }
+ }
+ builder.append("]").toString()
} catch {
case NonFatal(e) =>
s"Invalid tree; ${e.getMessage}:\n$queryExecution"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 4c3e12af72..1a0f1b61cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1177,4 +1177,43 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
val primitiveUDF = udf((i: Int) => i * 2)
checkAnswer(df.select(primitiveUDF($"age")), Row(44) :: Row(null) :: Nil)
}
+
+ test("SPARK-12398 truncated toString") {
+ val df1 = Seq((1L, "row1")).toDF("id", "name")
+ assert(df1.toString() === "[id: bigint, name: string]")
+
+ val df2 = Seq((1L, "c2", false)).toDF("c1", "c2", "c3")
+ assert(df2.toString === "[c1: bigint, c2: string ... 1 more field]")
+
+ val df3 = Seq((1L, "c2", false, 10)).toDF("c1", "c2", "c3", "c4")
+ assert(df3.toString === "[c1: bigint, c2: string ... 2 more fields]")
+
+ val df4 = Seq((1L, Tuple2(1L, "val"))).toDF("c1", "c2")
+ assert(df4.toString === "[c1: bigint, c2: struct<_1: bigint, _2: string>]")
+
+ val df5 = Seq((1L, Tuple2(1L, "val"), 20.0)).toDF("c1", "c2", "c3")
+ assert(df5.toString === "[c1: bigint, c2: struct<_1: bigint, _2: string> ... 1 more field]")
+
+ val df6 = Seq((1L, Tuple2(1L, "val"), 20.0, 1)).toDF("c1", "c2", "c3", "c4")
+ assert(df6.toString === "[c1: bigint, c2: struct<_1: bigint, _2: string> ... 2 more fields]")
+
+ val df7 = Seq((1L, Tuple3(1L, "val", 2), 20.0, 1)).toDF("c1", "c2", "c3", "c4")
+ assert(
+ df7.toString ===
+ "[c1: bigint, c2: struct<_1: bigint, _2: string ... 1 more field> ... 2 more fields]")
+
+ val df8 = Seq((1L, Tuple7(1L, "val", 2, 3, 4, 5, 6), 20.0, 1)).toDF("c1", "c2", "c3", "c4")
+ assert(
+ df8.toString ===
+ "[c1: bigint, c2: struct<_1: bigint, _2: string ... 5 more fields> ... 2 more fields]")
+
+ val df9 =
+ Seq((1L, Tuple4(1L, Tuple4(1L, 2L, 3L, 4L), 2L, 3L), 20.0, 1)).toDF("c1", "c2", "c3", "c4")
+ assert(
+ df9.toString ===
+ "[c1: bigint, c2: struct<_1: bigint," +
+ " _2: struct<_1: bigint," +
+ " _2: bigint ... 2 more fields> ... 2 more fields> ... 2 more fields]")
+
+ }
}