aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsxwing <zsxwing@gmail.com>2015-06-29 23:44:11 -0700
committerReynold Xin <rxin@databricks.com>2015-06-29 23:44:11 -0700
commit12671dd5e468beedc2681ff2bdf95fba81f8f29c (patch)
tree17fbc6f7141b79134002b8d3bb13900d773fb6a8
parent6c5a6db4d53d6db8aa3464ea6713cf0d3a3bdfb5 (diff)
downloadspark-12671dd5e468beedc2681ff2bdf95fba81f8f29c.tar.gz
spark-12671dd5e468beedc2681ff2bdf95fba81f8f29c.tar.bz2
spark-12671dd5e468beedc2681ff2bdf95fba81f8f29c.zip
[SPARK-8434][SQL]Add a "pretty" parameter to the "show" method to display long strings
Sometimes the user may want to show the complete content of cells. Now `sql("set -v").show()` displays: ![screen shot 2015-06-18 at 4 34 51 pm](https://cloud.githubusercontent.com/assets/1000778/8227339/14d3c5ea-15d9-11e5-99b9-f00b7e93beef.png) The user needs to use something like `sql("set -v").collect().foreach(r => r.toSeq.mkString("\t"))` to show the complete content. This PR adds a `pretty` parameter to show. If `pretty` is false, `show` won't truncate strings or align cells right. ![screen shot 2015-06-18 at 4 21 44 pm](https://cloud.githubusercontent.com/assets/1000778/8227407/b6f8dcac-15d9-11e5-8219-8079280d76fc.png) Author: zsxwing <zsxwing@gmail.com> Closes #6877 from zsxwing/show and squashes the following commits: 22e28e9 [zsxwing] pretty -> truncate e582628 [zsxwing] Add pretty parameter to the show method in R a3cd55b [zsxwing] Fix calling showString in R 923cee4 [zsxwing] Add a "pretty" parameter to show to display long strings
-rw-r--r--R/pkg/R/DataFrame.R4
-rw-r--r--python/pyspark/sql/dataframe.py7
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala55
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala21
4 files changed, 76 insertions, 11 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 6feabf4189..60702824ac 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -169,8 +169,8 @@ setMethod("isLocal",
#'}
setMethod("showDF",
signature(x = "DataFrame"),
- function(x, numRows = 20) {
- s <- callJMethod(x@sdf, "showString", numToInt(numRows))
+ function(x, numRows = 20, truncate = TRUE) {
+ s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
cat(s)
})
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 152b87351d..4b9efa0a21 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -247,9 +247,12 @@ class DataFrame(object):
return self._jdf.isLocal()
@since(1.3)
- def show(self, n=20):
+ def show(self, n=20, truncate=True):
"""Prints the first ``n`` rows to the console.
+ :param n: Number of rows to show.
+ :param truncate: Whether truncate long strings and align cells right.
+
>>> df
DataFrame[age: int, name: string]
>>> df.show()
@@ -260,7 +263,7 @@ class DataFrame(object):
| 5| Bob|
+---+-----+
"""
- print(self._jdf.showString(n))
+ print(self._jdf.showString(n, truncate))
def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 986e591339..8fe1f7e34c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -169,8 +169,9 @@ class DataFrame private[sql](
/**
* Internal API for Python
* @param _numRows Number of rows to show
+ * @param truncate Whether truncate long strings and align cells right
*/
- private[sql] def showString(_numRows: Int): String = {
+ private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
val numRows = _numRows.max(0)
val sb = new StringBuilder
val takeResult = take(numRows + 1)
@@ -188,7 +189,7 @@ class DataFrame private[sql](
case seq: Seq[_] => seq.mkString("[", ", ", "]")
case _ => cell.toString
}
- if (str.length > 20) str.substring(0, 17) + "..." else str
+ if (truncate && str.length > 20) str.substring(0, 17) + "..." else str
}: Seq[String]
}
@@ -207,7 +208,11 @@ class DataFrame private[sql](
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
- StringUtils.leftPad(cell, colWidths(i))
+ if (truncate) {
+ StringUtils.leftPad(cell, colWidths(i))
+ } else {
+ StringUtils.rightPad(cell, colWidths(i))
+ }
}.addString(sb, "|", "|", "|\n")
sb.append(sep)
@@ -215,7 +220,11 @@ class DataFrame private[sql](
// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
- StringUtils.leftPad(cell.toString, colWidths(i))
+ if (truncate) {
+ StringUtils.leftPad(cell.toString, colWidths(i))
+ } else {
+ StringUtils.rightPad(cell.toString, colWidths(i))
+ }
}.addString(sb, "|", "|", "|\n")
}
@@ -331,7 +340,8 @@ class DataFrame private[sql](
def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
/**
- * Displays the [[DataFrame]] in a tabular form. For example:
+ * Displays the [[DataFrame]] in a tabular form. Strings more than 20 characters will be
+ * truncated, and all cells will be aligned right. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
@@ -345,16 +355,47 @@ class DataFrame private[sql](
* @group action
* @since 1.3.0
*/
- def show(numRows: Int): Unit = println(showString(numRows))
+ def show(numRows: Int): Unit = show(numRows, true)
/**
- * Displays the top 20 rows of [[DataFrame]] in a tabular form.
+ * Displays the top 20 rows of [[DataFrame]] in a tabular form. Strings more than 20 characters
+ * will be truncated, and all cells will be aligned right.
* @group action
* @since 1.3.0
*/
def show(): Unit = show(20)
/**
+ * Displays the top 20 rows of [[DataFrame]] in a tabular form.
+ *
+ * @param truncate Whether truncate long strings. If true, strings more than 20 characters will
+ * be truncated and all cells will be aligned right
+ *
+ * @group action
+ * @since 1.5.0
+ */
+ def show(truncate: Boolean): Unit = show(20, truncate)
+
+ /**
+ * Displays the [[DataFrame]] in a tabular form. For example:
+ * {{{
+ * year month AVG('Adj Close) MAX('Adj Close)
+ * 1980 12 0.503218 0.595103
+ * 1981 01 0.523289 0.570307
+ * 1982 02 0.436504 0.475256
+ * 1983 03 0.410516 0.442194
+ * 1984 04 0.450090 0.483521
+ * }}}
+ * @param numRows Number of rows to show
+ * @param truncate Whether truncate long strings. If true, strings more than 20 characters will
+ * be truncated and all cells will be aligned right
+ *
+ * @group action
+ * @since 1.5.0
+ */
+ def show(numRows: Int, truncate: Boolean): Unit = println(showString(numRows, truncate))
+
+ /**
* Returns a [[DataFrameNaFunctions]] for working with missing data.
* {{{
* // Dropping rows containing any null values.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d06b9c5785..50d324c068 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -492,6 +492,27 @@ class DataFrameSuite extends QueryTest {
testData.select($"*").show(1000)
}
+ test("showString: truncate = [true, false]") {
+ val longString = Array.fill(21)("1").mkString
+ val df = ctx.sparkContext.parallelize(Seq("1", longString)).toDF()
+ val expectedAnswerForFalse = """+---------------------+
+ ||_1 |
+ |+---------------------+
+ ||1 |
+ ||111111111111111111111|
+ |+---------------------+
+ |""".stripMargin
+ assert(df.showString(10, false) === expectedAnswerForFalse)
+ val expectedAnswerForTrue = """+--------------------+
+ || _1|
+ |+--------------------+
+ || 1|
+ ||11111111111111111...|
+ |+--------------------+
+ |""".stripMargin
+ assert(df.showString(10, true) === expectedAnswerForTrue)
+ }
+
test("showString(negative)") {
val expectedAnswer = """+---+-----+
||key|value|