aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/tests.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-09-19 21:40:21 -0700
committerReynold Xin <rxin@databricks.com>2015-09-19 21:40:21 -0700
commit2117eea71ece825fbc3797c8b38184ae221f5223 (patch)
tree06481ef1968367118e89779335e24245f57f2017 /python/pyspark/tests.py
parente789000b88a6bd840f821c53f42c08b97dc02496 (diff)
downloadspark-2117eea71ece825fbc3797c8b38184ae221f5223.tar.gz
spark-2117eea71ece825fbc3797c8b38184ae221f5223.tar.bz2
spark-2117eea71ece825fbc3797c8b38184ae221f5223.zip
[SPARK-10710] Remove ability to disable spilling in core and SQL
It does not make much sense to set `spark.shuffle.spill` or `spark.sql.planner.externalSort` to false: I believe that these configurations were initially added as "escape hatches" to guard against bugs in the external operators, but these operators are now mature and well-tested. In addition, these configurations are not handled in a consistent way anymore: SQL's Tungsten codepath ignores these configurations and will continue to use spilling operators. Similarly, Spark Core's `tungsten-sort` shuffle manager does not respect `spark.shuffle.spill=false`. This pull request removes these configurations, adds warnings at the appropriate places, and deletes a large amount of code which was only used in code paths that did not support spilling. Author: Josh Rosen <joshrosen@databricks.com> Closes #8831 from JoshRosen/remove-ability-to-disable-spilling.
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r--python/pyspark/tests.py13
1 files changed, 1 insertions, 12 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 647504c32f..f11aaf001c 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -62,7 +62,7 @@ from pyspark.serializers import read_int, BatchedSerializer, MarshalSerializer,
CloudPickleSerializer, CompressedSerializer, UTF8Deserializer, NoOpSerializer, \
PairDeserializer, CartesianDeserializer, AutoBatchedSerializer, AutoSerializer, \
FlattenedValuesSerializer
-from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, ExternalSorter
+from pyspark.shuffle import Aggregator, ExternalMerger, ExternalSorter
from pyspark import shuffle
from pyspark.profiler import BasicProfiler
@@ -95,17 +95,6 @@ class MergerTests(unittest.TestCase):
lambda x, y: x.append(y) or x,
lambda x, y: x.extend(y) or x)
- def test_in_memory(self):
- m = InMemoryMerger(self.agg)
- m.mergeValues(self.data)
- self.assertEqual(sum(sum(v) for k, v in m.items()),
- sum(xrange(self.N)))
-
- m = InMemoryMerger(self.agg)
- m.mergeCombiners(map(lambda x_y: (x_y[0], [x_y[1]]), self.data))
- self.assertEqual(sum(sum(v) for k, v in m.items()),
- sum(xrange(self.N)))
-
def test_small_dataset(self):
m = ExternalMerger(self.agg, 1000)
m.mergeValues(self.data)