aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakhilthatipamula <130050068@iitb.ac.in>2015-06-12 10:40:28 -0700
committerReynold Xin <rxin@databricks.com>2015-06-12 10:40:28 -0700
commit19834fa9184f0365a160bcb54bcd33eaa87c70dc (patch)
tree6a0ac6e63dc3d827e185c8040962797eaa992d98
parent71cc17bfa7ff32f820742fdc2c45237b624c5370 (diff)
downloadspark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.gz
spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.tar.bz2
spark-19834fa9184f0365a160bcb54bcd33eaa87c70dc.zip
[SPARK-7993] [SQL] Improved DataFrame.show() output
Closes #6633 Author: akhilthatipamula <130050068@iitb.ac.in> Author: zsxwing <zsxwing@gmail.com> Closes #6784 from zsxwing/pr6633 and squashes the following commits: 5da1c51 [zsxwing] Address comments and add unit tests 17eab7b [akhilthatipamula] refactored code 19874b3 [akhilthatipamula] Update DataFrame.scala 0a76a5e [akhilthatipamula] Optimised showString() e3dd03f [akhilthatipamula] Modified showString() method a21012b [akhilthatipamula] improved the show() 4bb742f [akhilthatipamula] Modified dataframe.show() method
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala30
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala51
2 files changed, 75 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 59f64dd4bc..f041fd397b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -169,23 +169,34 @@ class DataFrame private[sql](
/**
* Internal API for Python
- * @param numRows Number of rows to show
+ * @param _numRows Number of rows to show
*/
- private[sql] def showString(numRows: Int): String = {
+ private[sql] def showString(_numRows: Int): String = {
+ val numRows = _numRows.max(0)
val sb = new StringBuilder
- val data = take(numRows)
+ val takeResult = take(numRows + 1)
+ val hasMoreData = takeResult.length > numRows
+ val data = takeResult.take(numRows)
val numCols = schema.fieldNames.length
+ // For array values, replace Seq and Array with square brackets
// For cells that are beyond 20 characters, replace it with the first 17 and "..."
val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: data.map { row =>
row.toSeq.map { cell =>
- val str = if (cell == null) "null" else cell.toString
+ val str = cell match {
+ case null => "null"
+ case array: Array[_] => array.mkString("[", ", ", "]")
+ case seq: Seq[_] => seq.mkString("[", ", ", "]")
+ case _ => cell.toString
+ }
if (str.length > 20) str.substring(0, 17) + "..." else str
}: Seq[String]
}
+ // Initialise the width of each column to a minimum value of '3'
+ val colWidths = Array.fill(numCols)(3)
+
// Compute the width of each column
- val colWidths = Array.fill(numCols)(0)
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
@@ -197,7 +208,7 @@ class DataFrame private[sql](
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
- StringUtils.leftPad(cell.toString, colWidths(i))
+ StringUtils.leftPad(cell, colWidths(i))
}.addString(sb, "|", "|", "|\n")
sb.append(sep)
@@ -210,6 +221,13 @@ class DataFrame private[sql](
}
sb.append(sep)
+
+ // For Data that has more than "numRows" records
+ if (hasMoreData) {
+ val rowsString = if (numRows == 1) "row" else "rows"
+ sb.append(s"only showing top $numRows ${rowsString}\n")
+ }
+
sb.toString()
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index bb8621abe6..84835c0db7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -469,12 +469,63 @@ class DataFrameSuite extends QueryTest {
testData.select($"*").show(1000)
}
+ test("showString(negative)") {
+ val expectedAnswer = """+---+-----+
+ ||key|value|
+ |+---+-----+
+ |+---+-----+
+ |only showing top 0 rows
+ |""".stripMargin
+ assert(testData.select($"*").showString(-1) === expectedAnswer)
+ }
+
+ test("showString(0)") {
+ val expectedAnswer = """+---+-----+
+ ||key|value|
+ |+---+-----+
+ |+---+-----+
+ |only showing top 0 rows
+ |""".stripMargin
+ assert(testData.select($"*").showString(0) === expectedAnswer)
+ }
+
+ test("showString: array") {
+ val df = Seq(
+ (Array(1, 2, 3), Array(1, 2, 3)),
+ (Array(2, 3, 4), Array(2, 3, 4))
+ ).toDF()
+ val expectedAnswer = """+---------+---------+
+ || _1| _2|
+ |+---------+---------+
+ ||[1, 2, 3]|[1, 2, 3]|
+ ||[2, 3, 4]|[2, 3, 4]|
+ |+---------+---------+
+ |""".stripMargin
+ assert(df.showString(10) === expectedAnswer)
+ }
+
+ test("showString: minimum column width") {
+ val df = Seq(
+ (1, 1),
+ (2, 2)
+ ).toDF()
+ val expectedAnswer = """+---+---+
+ || _1| _2|
+ |+---+---+
+ || 1| 1|
+ || 2| 2|
+ |+---+---+
+ |""".stripMargin
+ assert(df.showString(10) === expectedAnswer)
+ }
+
test("SPARK-7319 showString") {
val expectedAnswer = """+---+-----+
||key|value|
|+---+-----+
|| 1| 1|
|+---+-----+
+ |only showing top 1 row
|""".stripMargin
assert(testData.select($"*").showString(1) === expectedAnswer)
}