diff options
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala | 8 | ||||
-rw-r--r-- | docs/configuration.md | 8 |
2 files changed, 12 insertions, 4 deletions
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala index 8c23584d60..073a0a5029 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala @@ -84,7 +84,7 @@ extends Broadcast[T](id) with Logging with Serializable { val start = System.nanoTime logInfo("Started reading broadcast variable " + id) - // Master might send invalid values + // Initialize @transient variables that will receive garbage values from the master. resetWorkerVariables() if (receiveBroadcast(id)) { @@ -135,8 +135,9 @@ extends Broadcast[T](id) with Logging with Serializable { } attemptId -= 1 } - if (totalBlocks == -1) + if (totalBlocks == -1) { return false + } // Receive actual blocks val recvOrder = new Random().shuffle(Array.iterate(0, totalBlocks)(_ + 1).toList) @@ -151,8 +152,7 @@ extends Broadcast[T](id) with Logging with Serializable { pieceId, arrayOfBlocks(pid), StorageLevel.MEMORY_AND_DISK, true) case None => - throw new SparkException( - "Failed to get " + pieceId + " of " + broadcastId) + throw new SparkException("Failed to get " + pieceId + " of " + broadcastId) } } } diff --git a/docs/configuration.md b/docs/configuration.md index 7940d41a27..c5900d0e09 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -319,6 +319,14 @@ Apart from these, the following properties are also available, and may be useful Should be greater than or equal to 1. Number of allowed retries = this value - 1. </td> </tr> +<tr> + <td>spark.broadcast.blockSize</td> + <td>4096</td> + <td> + Size of each piece of a block in kilobytes for <code>TorrentBroadcastFactory</code>. + Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, <code>BlockManager</code> might take a performance hit. + </td> +</tr> </table> |