aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--R/pkg/R/DataFrame.R11
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R8
-rw-r--r--python/pyspark/sql/dataframe.py18
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala47
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala27
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala2
7 files changed, 97 insertions, 17 deletions
diff --git a/.gitignore b/.gitignore
index 9f8cd0b4cb..b4dd1d05a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,3 +77,4 @@ spark-warehouse/
# For R session data
.RData
.RHistory
+.Rhistory
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f856979c2a..61d47a8c2d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -176,8 +176,8 @@ setMethod("isLocal",
#' @param x A SparkDataFrame
#' @param numRows The number of rows to print. Defaults to 20.
#' @param truncate Whether truncate long strings. If true, strings more than 20 characters will be
-#' truncated and all cells will be aligned right
-#'
+#' truncated. However, if set greater than zero, truncates strings longer than `truncate`
+#' characters and all cells will be aligned right.
#' @family SparkDataFrame functions
#' @rdname showDF
#' @name showDF
@@ -193,7 +193,12 @@ setMethod("isLocal",
setMethod("showDF",
signature(x = "SparkDataFrame"),
function(x, numRows = 20, truncate = TRUE) {
- s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
+ if (is.logical(truncate) && truncate) {
+ s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20))
+ } else {
+ truncate2 <- as.numeric(truncate)
+ s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2))
+ }
cat(s)
})
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 74def5ce42..7562fa95e3 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1582,7 +1582,15 @@ test_that("showDF()", {
"| 30| Andy|\n",
"| 19| Justin|\n",
"+----+-------+\n", sep = "")
+ expected2 <- paste("+---+----+\n",
+ "|age|name|\n",
+ "+---+----+\n",
+ "|nul| Mic|\n",
+ "| 30| And|\n",
+ "| 19| Jus|\n",
+ "+---+----+\n", sep = "")
expect_output(showDF(df), expected)
+ expect_output(showDF(df, truncate = 3), expected2)
})
test_that("isLocal()", {
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index acf9d08b23..a2443ed3d6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -271,7 +271,9 @@ class DataFrame(object):
"""Prints the first ``n`` rows to the console.
:param n: Number of rows to show.
- :param truncate: Whether truncate long strings and align cells right.
+ :param truncate: If set to True, truncate strings longer than 20 chars by default.
+ If set to a number greater than one, truncates long strings to length ``truncate``
+ and align cells right.
>>> df
DataFrame[age: int, name: string]
@@ -282,8 +284,18 @@ class DataFrame(object):
| 2|Alice|
| 5| Bob|
+---+-----+
- """
- print(self._jdf.showString(n, truncate))
+ >>> df.show(truncate=3)
+ +---+----+
+ |age|name|
+ +---+----+
+ | 2| Ali|
+ | 5| Bob|
+ +---+----+
+ """
+ if isinstance(truncate, bool) and truncate:
+ print(self._jdf.showString(n, 20))
+ else:
+ print(self._jdf.showString(n, int(truncate)))
def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 85d060639c..9997162f7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -232,16 +232,18 @@ class Dataset[T] private[sql](
* Compose the string representing rows for output
*
* @param _numRows Number of rows to show
- * @param truncate Whether truncate long strings and align cells right
+ * @param truncate If set to more than 0, truncates strings to `truncate` characters and
+ * all cells will be aligned right.
*/
- private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
+ private[sql] def showString(_numRows: Int, truncate: Int = 20): String = {
val numRows = _numRows.max(0)
val takeResult = toDF().take(numRows + 1)
val hasMoreData = takeResult.length > numRows
val data = takeResult.take(numRows)
// For array values, replace Seq and Array with square brackets
- // For cells that are beyond 20 characters, replace it with the first 17 and "..."
+ // For cells that are beyond `truncate` characters, replace it with the
+ // first `truncate-3` and "..."
val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: data.map { row =>
row.toSeq.map { cell =>
val str = cell match {
@@ -251,7 +253,13 @@ class Dataset[T] private[sql](
case seq: Seq[_] => seq.mkString("[", ", ", "]")
case _ => cell.toString
}
- if (truncate && str.length > 20) str.substring(0, 17) + "..." else str
+ if (truncate > 0 && str.length > truncate) {
+ // do not show ellipses for strings shorter than 4 characters.
+ if (truncate < 4) str.substring(0, truncate)
+ else str.substring(0, truncate - 3) + "..."
+ } else {
+ str
+ }
}: Seq[String]
}
@@ -273,7 +281,7 @@ class Dataset[T] private[sql](
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
- if (truncate) {
+ if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
@@ -285,7 +293,7 @@ class Dataset[T] private[sql](
// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
- if (truncate) {
+ if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
@@ -523,7 +531,32 @@ class Dataset[T] private[sql](
* @since 1.6.0
*/
// scalastyle:off println
- def show(numRows: Int, truncate: Boolean): Unit = println(showString(numRows, truncate))
+ def show(numRows: Int, truncate: Boolean): Unit = if (truncate) {
+ println(showString(numRows, truncate = 20))
+ } else {
+ println(showString(numRows, truncate = 0))
+ }
+ // scalastyle:on println
+
+ /**
+ * Displays the Dataset in a tabular form. For example:
+ * {{{
+ * year month AVG('Adj Close) MAX('Adj Close)
+ * 1980 12 0.503218 0.595103
+ * 1981 01 0.523289 0.570307
+ * 1982 02 0.436504 0.475256
+ * 1983 03 0.410516 0.442194
+ * 1984 04 0.450090 0.483521
+ * }}}
+ *
+ * @param numRows Number of rows to show
+ * @param truncate If set to more than 0, truncates strings to `truncate` characters and
+ * all cells will be aligned right.
+ * @group action
+ * @since 1.6.0
+ */
+ // scalastyle:off println
+ def show(numRows: Int, truncate: Int): Unit = println(showString(numRows, truncate))
// scalastyle:on println
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 1afee9f021..6a0a7df3f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -723,7 +723,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
testData.select($"*").show(1000)
}
- test("showString: truncate = [true, false]") {
+ test("showString: truncate = [0, 20]") {
val longString = Array.fill(21)("1").mkString
val df = sparkContext.parallelize(Seq("1", longString)).toDF()
val expectedAnswerForFalse = """+---------------------+
@@ -733,7 +733,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
||111111111111111111111|
|+---------------------+
|""".stripMargin
- assert(df.showString(10, false) === expectedAnswerForFalse)
+ assert(df.showString(10, truncate = 0) === expectedAnswerForFalse)
val expectedAnswerForTrue = """+--------------------+
|| value|
|+--------------------+
@@ -741,7 +741,28 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
||11111111111111111...|
|+--------------------+
|""".stripMargin
- assert(df.showString(10, true) === expectedAnswerForTrue)
+ assert(df.showString(10, truncate = 20) === expectedAnswerForTrue)
+ }
+
+ test("showString: truncate = [3, 17]") {
+ val longString = Array.fill(21)("1").mkString
+ val df = sparkContext.parallelize(Seq("1", longString)).toDF()
+ val expectedAnswerForFalse = """+-----+
+ ||value|
+ |+-----+
+ || 1|
+ || 111|
+ |+-----+
+ |""".stripMargin
+ assert(df.showString(10, truncate = 3) === expectedAnswerForFalse)
+ val expectedAnswerForTrue = """+-----------------+
+ || value|
+ |+-----------------+
+ || 1|
+ ||11111111111111...|
+ |+-----------------+
+ |""".stripMargin
+ assert(df.showString(10, truncate = 17) === expectedAnswerForTrue)
}
test("showString(negative)") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 43cbc03b7a..0b6f40872f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -730,7 +730,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
private def checkShowString[T](ds: Dataset[T], expected: String): Unit = {
val numRows = expected.split("\n").length - 4
- val actual = ds.showString(numRows, truncate = true)
+ val actual = ds.showString(numRows, truncate = 20)
if (expected != actual) {
fail(