aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJacek Laskowski <jacek@japila.pl>2017-03-30 16:07:27 +0100
committerSean Owen <sowen@cloudera.com>2017-03-30 16:07:27 +0100
commit0197262a358fd174a188f8246ae777e53157610e (patch)
tree0d0b52965bc6ea18785e97ada5eaca4f29e90b68 /core
parentb454d4402e5ee7d1a7385d1fe3737581f84d2c72 (diff)
downloadspark-0197262a358fd174a188f8246ae777e53157610e.tar.gz
spark-0197262a358fd174a188f8246ae777e53157610e.tar.bz2
spark-0197262a358fd174a188f8246ae777e53157610e.zip
[DOCS] Docs-only improvements
…adoc ## What changes were proposed in this pull request? Use recommended values for row boundaries in Window's scaladoc, i.e. `Window.unboundedPreceding`, `Window.unboundedFollowing`, and `Window.currentRow` (that were introduced in 2.1.0). ## How was this patch tested? Local build Author: Jacek Laskowski <jacek@japila.pl> Closes #17417 from jaceklaskowski/window-expression-scaladoc.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/java/org/apache/spark/memory/MemoryConsumer.java2
-rw-r--r--core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java5
-rw-r--r--core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala5
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/Task.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/Serializer.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala2
9 files changed, 12 insertions, 17 deletions
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index fc1f3a8023..48cf4b9455 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -60,8 +60,6 @@ public abstract class MemoryConsumer {
/**
* Force spill during building.
- *
- * For testing.
*/
public void spill() throws IOException {
spill(Long.MAX_VALUE, this);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 4a15559e55..323a5d3c52 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -52,8 +52,7 @@ import org.apache.spark.util.Utils;
* This class implements sort-based shuffle's hash-style shuffle fallback path. This write path
* writes incoming records to separate files, one file per reduce partition, then concatenates these
* per-partition files to form a single output file, regions of which are served to reducers.
- * Records are not buffered in memory. This is essentially identical to
- * {@link org.apache.spark.shuffle.hash.HashShuffleWriter}, except that it writes output in a format
+ * Records are not buffered in memory. It writes output in a format
* that can be served / consumed via {@link org.apache.spark.shuffle.IndexShuffleBlockResolver}.
* <p>
* This write path is inefficient for shuffles with large numbers of reduce partitions because it
@@ -61,7 +60,7 @@ import org.apache.spark.util.Utils;
* {@link SortShuffleManager} only selects this write path when
* <ul>
* <li>no Ordering is specified,</li>
- * <li>no Aggregator is specific, and</li>
+ * <li>no Aggregator is specified, and</li>
* <li>the number of partitions is less than
* <code>spark.shuffle.sort.bypassMergeThreshold</code>.</li>
* </ul>
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index e4b9f8111e..9112d93a86 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -71,13 +71,12 @@ private[spark] trait ExecutorAllocationClient {
/**
* Request that the cluster manager kill every executor on the specified host.
- * Results in a call to killExecutors for each executor on the host, with the replace
- * and force arguments set to true.
+ *
* @return whether the request is acknowledged by the cluster manager.
*/
def killExecutorsOnHost(host: String): Boolean
- /**
+ /**
* Request that the cluster manager kill the specified executor.
* @return whether the request is acknowledged by the cluster manager.
*/
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 46ef23f316..7fd2918960 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -149,7 +149,7 @@ private[spark] abstract class Task[T](
def preferredLocations: Seq[TaskLocation] = Nil
- // Map output tracker epoch. Will be set by TaskScheduler.
+ // Map output tracker epoch. Will be set by TaskSetManager.
var epoch: Long = -1
// Task context, to be initialized in run().
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index 008b038789..01bbda0b5e 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -77,7 +77,7 @@ abstract class Serializer {
* position = 0
* serOut.write(obj1)
* serOut.flush()
- * position = # of bytes writen to stream so far
+ * position = # of bytes written to stream so far
* obj1Bytes = output[0:position-1]
* serOut.write(obj2)
* serOut.flush()
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 8b2e26cdd9..ba3e0e395e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -95,8 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
// Sort the output if there is a sort ordering defined.
dep.keyOrdering match {
case Some(keyOrd: Ordering[K]) =>
- // Create an ExternalSorter to sort the data. Note that if spark.shuffle.spill is disabled,
- // the ExternalSorter won't spill to disk.
+ // Create an ExternalSorter to sort the data.
val sorter =
new ExternalSorter[K, C, C](context, ordering = Some(keyOrd), serializer = dep.serializer)
sorter.insertAll(aggregatedIter)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 91858f0912..1554048517 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -61,7 +61,7 @@ private[spark] class IndexShuffleBlockResolver(
/**
* Remove data file and index file that contain the output data from one map.
- * */
+ */
def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
var file = getDataFile(shuffleId, mapId)
if (file.exists()) {
@@ -132,7 +132,7 @@ private[spark] class IndexShuffleBlockResolver(
* replace them with new ones.
*
* Note: the `lengths` will be updated to match the existing index file if use the existing ones.
- * */
+ */
def writeIndexFileAndCommit(
shuffleId: Int,
mapId: Int,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 5e977a16fe..bfb4dc698e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -82,13 +82,13 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
/**
- * Register a shuffle with the manager and obtain a handle for it to pass to tasks.
+ * Obtains a [[ShuffleHandle]] to pass to tasks.
*/
override def registerShuffle[K, V, C](
shuffleId: Int,
numMaps: Int,
dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
- if (SortShuffleWriter.shouldBypassMergeSort(SparkEnv.get.conf, dependency)) {
+ if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
// If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't
// need map-side aggregation, then write numPartitions files directly and just concatenate
// them at the end. This avoids doing serialization and deserialization twice to merge
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 00e0cf257c..7479de5514 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -279,7 +279,7 @@ private[spark] object AccumulatorContext {
/**
- * An [[AccumulatorV2 accumulator]] for computing sum, count, and averages for 64-bit integers.
+ * An [[AccumulatorV2 accumulator]] for computing sum, count, and average of 64-bit integers.
*
* @since 2.0.0
*/