[SPARK-7993] [SQL] Improved DataFrame.show() output

Closes #6633 Author: akhilthatipamula <130050068@iitb.ac.in> Author: zsxwing <zsxwing@gmail.com> Closes #6784 from zsxwing/pr6633 and squashes the following commits: 5da1c51 [zsxwing] Address comments and add unit tests 17eab7b [akhilthatipamula] refactored code 19874b3 [akhilthatipamula] Update DataFrame.scala 0a76a5e [akhilthatipamula] Optimised showString() e3dd03f [akhilthatipamula] Modified showString() method a21012b [akhilthatipamula] improved the show() 4bb742f [akhilthatipamula] Modified dataframe.show() method
author: akhilthatipamula <130050068@iitb.ac.in> 2015-06-12 10:40:28 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-06-12 10:40:28 -0700
commit: 19834fa9184f0365a160bcb54bcd33eaa87c70dc (patch)
tree: 6a0ac6e63dc3d827e185c8040962797eaa992d98
parent: 71cc17bfa7ff32f820742fdc2c45237b624c5370 (diff)
download: spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.gz
spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.bz2
spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.zip
2 files changed, 75 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 59f64dd4bc..f041fd397b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -169,23 +169,34 @@ class DataFrame private[sql](
 
   /**
    * Internal API for Python
-   * @param numRows Number of rows to show
+   * @param _numRows Number of rows to show
    */
-  private[sql] def showString(numRows: Int): String = {
+  private[sql] def showString(_numRows: Int): String = {
+    val numRows = _numRows.max(0)
     val sb = new StringBuilder
-    val data = take(numRows)
+    val takeResult = take(numRows + 1)
+    val hasMoreData = takeResult.length > numRows
+    val data = takeResult.take(numRows)
     val numCols = schema.fieldNames.length
 
+    // For array values, replace Seq and Array with square brackets
     // For cells that are beyond 20 characters, replace it with the first 17 and "..."
     val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: data.map { row =>
       row.toSeq.map { cell =>
-        val str = if (cell == null) "null" else cell.toString
+        val str = cell match {
+          case null => "null"
+          case array: Array[_] => array.mkString("[", ", ", "]")
+          case seq: Seq[_] => seq.mkString("[", ", ", "]")
+          case _ => cell.toString
+        }
         if (str.length > 20) str.substring(0, 17) + "..." else str
       }: Seq[String]
     }
 
+    // Initialise the width of each column to a minimum value of '3'
+    val colWidths = Array.fill(numCols)(3)
+
     // Compute the width of each column
-    val colWidths = Array.fill(numCols)(0)
     for (row <- rows) {
       for ((cell, i) <- row.zipWithIndex) {
         colWidths(i) = math.max(colWidths(i), cell.length)
@@ -197,7 +208,7 @@ class DataFrame private[sql](
 
     // column names
     rows.head.zipWithIndex.map { case (cell, i) =>
-      StringUtils.leftPad(cell.toString, colWidths(i))
+      StringUtils.leftPad(cell, colWidths(i))
     }.addString(sb, "|", "|", "|\n")
 
     sb.append(sep)
@@ -210,6 +221,13 @@ class DataFrame private[sql](
     }
 
     sb.append(sep)
+
+    // For Data that has more than "numRows" records
+    if (hasMoreData) {
+      val rowsString = if (numRows == 1) "row" else "rows"
+      sb.append(s"only showing top $numRows ${rowsString}\n")
+    }
+
     sb.toString()
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index bb8621abe6..84835c0db7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -469,12 +469,63 @@ class DataFrameSuite extends QueryTest {
     testData.select($"*").show(1000)
   }
 
+  test("showString(negative)") {
+    val expectedAnswer = """+---+-----+
+                           ||key|value|
+                           |+---+-----+
+                           |+---+-----+
+                           |only showing top 0 rows
+                           |""".stripMargin
+    assert(testData.select($"*").showString(-1) === expectedAnswer)
+  }
+
+  test("showString(0)") {
+    val expectedAnswer = """+---+-----+
+                           ||key|value|
+                           |+---+-----+
+                           |+---+-----+
+                           |only showing top 0 rows
+                           |""".stripMargin
+    assert(testData.select($"*").showString(0) === expectedAnswer)
+  }
+
+  test("showString: array") {
+    val df = Seq(
+      (Array(1, 2, 3), Array(1, 2, 3)),
+      (Array(2, 3, 4), Array(2, 3, 4))
+    ).toDF()
+    val expectedAnswer = """+---------+---------+
+                           ||       _1|       _2|
+                           |+---------+---------+
+                           ||[1, 2, 3]|[1, 2, 3]|
+                           ||[2, 3, 4]|[2, 3, 4]|
+                           |+---------+---------+
+                           |""".stripMargin
+    assert(df.showString(10) === expectedAnswer)
+  }
+
+  test("showString: minimum column width") {
+    val df = Seq(
+      (1, 1),
+      (2, 2)
+    ).toDF()
+    val expectedAnswer = """+---+---+
+                           || _1| _2|
+                           |+---+---+
+                           ||  1|  1|
+                           ||  2|  2|
+                           |+---+---+
+                           |""".stripMargin
+    assert(df.showString(10) === expectedAnswer)
+  }
+
   test("SPARK-7319 showString") {
     val expectedAnswer = """+---+-----+
                            ||key|value|
                            |+---+-----+
                            ||  1|    1|
                            |+---+-----+
+                           |only showing top 1 row
                            |""".stripMargin
     assert(testData.select($"*").showString(1) === expectedAnswer)
   }
author	akhilthatipamula <130050068@iitb.ac.in>	2015-06-12 10:40:28 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-06-12 10:40:28 -0700
commit	19834fa9184f0365a160bcb54bcd33eaa87c70dc (patch)
tree	6a0ac6e63dc3d827e185c8040962797eaa992d98
parent	71cc17bfa7ff32f820742fdc2c45237b624c5370 (diff)
download	spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.gz spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.bz2 spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.zip