aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala47
-rw-r--r--core/src/test/scala/org/apache/spark/util/UtilsSuite.scala8
2 files changed, 55 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 1a9dbcae8c..f9d05409e1 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -26,6 +26,7 @@ import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.{Locale, Properties, Random, UUID}
import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicBoolean
import javax.net.ssl.HttpsURLConnection
import scala.annotation.tailrec
@@ -78,6 +79,52 @@ private[spark] object Utils extends Logging {
private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
@volatile private var localRootDirs: Array[String] = null
+ /**
+ * The performance overhead of creating and logging strings for wide schemas can be large. To
+ * limit the impact, we bound the number of fields to include by default. This can be overriden
+ * by setting the 'spark.debug.maxToStringFields' conf in SparkEnv.
+ */
+ val DEFAULT_MAX_TO_STRING_FIELDS = 25
+
+ private def maxNumToStringFields = {
+ if (SparkEnv.get != null) {
+ SparkEnv.get.conf.getInt("spark.debug.maxToStringFields", DEFAULT_MAX_TO_STRING_FIELDS)
+ } else {
+ DEFAULT_MAX_TO_STRING_FIELDS
+ }
+ }
+
+ /** Whether we have warned about plan string truncation yet. */
+ private val truncationWarningPrinted = new AtomicBoolean(false)
+
+ /**
+ * Format a sequence with semantics similar to calling .mkString(). Any elements beyond
+ * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder.
+ *
+ * @return the trimmed and formatted string.
+ */
+ def truncatedString[T](
+ seq: Seq[T],
+ start: String,
+ sep: String,
+ end: String,
+ maxNumFields: Int = maxNumToStringFields): String = {
+ if (seq.length > maxNumFields) {
+ if (truncationWarningPrinted.compareAndSet(false, true)) {
+ logWarning(
+ "Truncated the string representation of a plan since it was too large. This " +
+ "behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.")
+ }
+ val numFields = math.max(0, maxNumFields - 1)
+ seq.take(numFields).mkString(
+ start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
+ } else {
+ seq.mkString(start, sep, end)
+ }
+ }
+
+ /** Shorthand for calling truncatedString() without start or end strings. */
+ def truncatedString[T](seq: Seq[T], sep: String): String = truncatedString(seq, "", sep, "")
/** Serialize an object using Java serialization */
def serialize[T](o: T): Array[Byte] = {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 6698749866..a5363f0bfd 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -40,6 +40,14 @@ import org.apache.spark.network.util.ByteUnit
class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
+ test("truncatedString") {
+ assert(Utils.truncatedString(Nil, "[", ", ", "]", 2) == "[]")
+ assert(Utils.truncatedString(Seq(1, 2), "[", ", ", "]", 2) == "[1, 2]")
+ assert(Utils.truncatedString(Seq(1, 2, 3), "[", ", ", "]", 2) == "[1, ... 2 more fields]")
+ assert(Utils.truncatedString(Seq(1, 2, 3), "[", ", ", "]", -5) == "[, ... 3 more fields]")
+ assert(Utils.truncatedString(Seq(1, 2, 3), ", ") == "1, 2, 3")
+ }
+
test("timeConversion") {
// Test -1
assert(Utils.timeStringAsSeconds("-1") === -1)