Merge pull request #228 from pwendell/master

Document missing configs and set shuffle consolidation to false.
author: Patrick Wendell <pwendell@gmail.com> 2013-12-05 12:31:24 -0800
committer: Patrick Wendell <pwendell@gmail.com> 2013-12-05 12:31:24 -0800
commit: 5d460253d6080d871cb71efb112ea17be0873771 (patch)
tree: b6f2db0b07ed9d9e58384f0e3a48ab1cc26debe2
parent: 72b696156c8662cae2cef4b943520b4be86148ea (diff)
parent: 75d161b35702b6790aa66fff06b07f306442f5a3 (diff)
download: spark-5d460253d6080d871cb71efb112ea17be0873771.tar.gz
spark-5d460253d6080d871cb71efb112ea17be0873771.tar.bz2
spark-5d460253d6080d871cb71efb112ea17be0873771.zip
3 files changed, 49 insertions, 4 deletions
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index 2f1b049ce4..e828e1d1c5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -62,7 +62,7 @@ class ShuffleBlockManager(blockManager: BlockManager) {
   // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
   // TODO: Remove this once the shuffle file consolidation feature is stable.
   val consolidateShuffleFiles =
-    System.getProperty("spark.shuffle.consolidateFiles", "true").toBoolean
+    System.getProperty("spark.shuffle.consolidateFiles", "false").toBoolean
 
   private val bufferSize = System.getProperty("spark.shuffle.file.buffer.kb", "100").toInt * 1024
 
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 0b9056344c..ef4c4c0f14 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -5,9 +5,9 @@ import java.io.{FileWriter, File}
 import scala.collection.mutable
 
 import com.google.common.io.Files
-import org.scalatest.{BeforeAndAfterEach, FunSuite}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 
-class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
+class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll {
 
   val rootDir0 = Files.createTempDir()
   rootDir0.deleteOnExit()
@@ -16,6 +16,12 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
   val rootDirs = rootDir0.getName + "," + rootDir1.getName
   println("Created root dirs: " + rootDirs)
 
+  // This suite focuses primarily on consolidation features,
+  // so we coerce consolidation if not already enabled.
+  val consolidateProp = "spark.shuffle.consolidateFiles"
+  val oldConsolidate = Option(System.getProperty(consolidateProp))
+  System.setProperty(consolidateProp, "true")
+
   val shuffleBlockManager = new ShuffleBlockManager(null) {
     var idToSegmentMap = mutable.Map[ShuffleBlockId, FileSegment]()
     override def getBlockLocation(id: ShuffleBlockId) = idToSegmentMap(id)
@@ -23,6 +29,10 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
 
   var diskBlockManager: DiskBlockManager = _
 
+  override def afterAll() {
+    oldConsolidate.map(c => System.setProperty(consolidateProp, c))
+  }
+
   override def beforeEach() {
     diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs)
     shuffleBlockManager.idToSegmentMap.clear()
diff --git a/docs/configuration.md b/docs/configuration.md
index 97183bafdb..22abe1c5a9 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -327,7 +327,42 @@ Apart from these, the following properties are also available, and may be useful
     Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, <code>BlockManager</code> might take a performance hit.
   </td>
 </tr>
-
+<tr>
+  <td>spark.shuffle.consolidateFiles</td>
+  <td>false</td>
+  <td>
+    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance if you run shuffles with large numbers of reduce tasks.
+  </td>
+</tr>
+<tr>
+<tr>
+  <td>spark.speculation</td>
+  <td>false</td>
+  <td>
+    If set to "true", performs speculative execution of tasks. This means if one or more tasks are running slowly in a stage, they will be re-launched.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.interval</td>
+  <td>100</td>
+  <td>
+    How often Spark will check for tasks to speculate, in milliseconds.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.quantile</td>
+  <td>0.75</td>
+  <td>
+    Percentage of tasks which must be complete before speculation is enabled for a particular stage.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.multiplier</td>
+  <td>1.5</td>
+  <td>
+    How many times slower a task is than the median to be considered for speculation.
+  </td>
+</tr>
 </table>
 
 # Environment Variables
author	Patrick Wendell <pwendell@gmail.com>	2013-12-05 12:31:24 -0800
committer	Patrick Wendell <pwendell@gmail.com>	2013-12-05 12:31:24 -0800
commit	5d460253d6080d871cb71efb112ea17be0873771 (patch)
tree	b6f2db0b07ed9d9e58384f0e3a48ab1cc26debe2
parent	72b696156c8662cae2cef4b943520b4be86148ea (diff)
parent	75d161b35702b6790aa66fff06b07f306442f5a3 (diff)
download	spark-5d460253d6080d871cb71efb112ea17be0873771.tar.gz spark-5d460253d6080d871cb71efb112ea17be0873771.tar.bz2 spark-5d460253d6080d871cb71efb112ea17be0873771.zip