diff options
author | Dilip Biswal <dbiswal@us.ibm.com> | 2015-12-21 12:46:06 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-12-21 12:46:06 -0800 |
commit | 474eb21a30f7ee898f76a625a5470c8245af1d22 (patch) | |
tree | 0dbc33fd1658edf2939731630f9781af8d628bcb /sql/catalyst | |
parent | 1920d72a1f7b9844323d06e8094818347f413df6 (diff) | |
download | spark-474eb21a30f7ee898f76a625a5470c8245af1d22.tar.gz spark-474eb21a30f7ee898f76a625a5470c8245af1d22.tar.bz2 spark-474eb21a30f7ee898f76a625a5470c8245af1d22.zip |
[SPARK-12398] Smart truncation of DataFrame / Dataset toString
When a DataFrame or Dataset has a long schema, we should intelligently truncate to avoid flooding the screen with unreadable information.
// Standard output
[a: int, b: int]
// Truncate many top level fields
[a: int, b, string ... 10 more fields]
// Truncate long inner structs
[a: struct<a: Int ... 10 more fields>]
Author: Dilip Biswal <dbiswal@us.ibm.com>
Closes #10373 from dilipbiswal/spark-12398.
Diffstat (limited to 'sql/catalyst')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala | 3 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala | 17 |
2 files changed, 20 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 4b54c31dcc..b0c43c4100 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -66,6 +66,9 @@ abstract class DataType extends AbstractDataType { /** Readable string representation for the type. */ def simpleString: String = typeName + /** Readable string representation for the type with truncation */ + private[sql] def simpleString(maxNumberFields: Int): String = simpleString + /** * Check if `this` and `other` are the same data type when ignoring nullability * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index 9778df271d..d568022765 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -278,6 +278,23 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru s"struct<${fieldTypes.mkString(",")}>" } + private[sql] override def simpleString(maxNumberFields: Int): String = { + val builder = new StringBuilder + val fieldTypes = fields.take(maxNumberFields).map { + case f => s"${f.name}: ${f.dataType.simpleString(maxNumberFields)}" + } + builder.append("struct<") + builder.append(fieldTypes.mkString(", ")) + if (fields.length > 2) { + if (fields.length - fieldTypes.size == 1) { + builder.append(" ... 1 more field") + } else { + builder.append(" ... " + (fields.length - 2) + " more fields") + } + } + builder.append(">").toString() + } + /** * Merges with another schema (`StructType`). For a struct field A from `this` and a struct field * B from `that`, |