[SPARK-15391] [SQL] manage the temporary memory of timsort

## What changes were proposed in this pull request? Currently, the memory for temporary buffer used by TimSort is always allocated as on-heap without bookkeeping, it could cause OOM both in on-heap and off-heap mode. This PR will try to manage that by preallocate it together with the pointer array, same with RadixSort. It both works for on-heap and off-heap mode. This PR also change the loadFactor of BytesToBytesMap to 0.5 (it was 0.70), it enables use to radix sort also makes sure that we have enough memory for timsort. ## How was this patch tested? Existing tests. Author: Davies Liu <davies@databricks.com> Closes #13318 from davies/fix_timsort.
author: Davies Liu <davies@databricks.com> 2016-06-03 16:45:09 -0700
committer: Davies Liu <davies.liu@gmail.com> 2016-06-03 16:45:09 -0700
commit: 3074f575a3c84108fddab3f5f56eb1929a4b2cff (patch)
tree: 381fdb85dd2b52bc79b0e18c27eded1bd66d622d /core/src/test/java
parent: 67cc89ff028324ba4a7a7d9c19a268b9afea0031 (diff)
download: spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.tar.gz
spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.tar.bz2
spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.zip
2 files changed, 22 insertions, 15 deletions
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index f9dc20d8b7..7dd61f85ab 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -21,12 +21,15 @@ import java.io.*;
 import java.nio.ByteBuffer;
 import java.util.*;
 
-import scala.*;
+import scala.Option;
+import scala.Product2;
+import scala.Tuple2;
+import scala.Tuple2$;
 import scala.collection.Iterator;
 import scala.runtime.AbstractFunction1;
 
-import com.google.common.collect.Iterators;
 import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Iterators;
 import com.google.common.io.ByteStreams;
 import org.junit.After;
 import org.junit.Before;
@@ -35,29 +38,33 @@ import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.lessThan;
-import static org.junit.Assert.*;
-import static org.mockito.Answers.RETURNS_SMART_NULLS;
-import static org.mockito.Mockito.*;
 
-import org.apache.spark.*;
+import org.apache.spark.HashPartitioner;
+import org.apache.spark.ShuffleDependency;
+import org.apache.spark.SparkConf;
+import org.apache.spark.TaskContext;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.executor.TaskMetrics;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.LZ4CompressionCodec;
 import org.apache.spark.io.LZFCompressionCodec;
 import org.apache.spark.io.SnappyCompressionCodec;
-import org.apache.spark.executor.ShuffleWriteMetrics;
-import org.apache.spark.executor.TaskMetrics;
+import org.apache.spark.memory.TaskMemoryManager;
+import org.apache.spark.memory.TestMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
-import org.apache.spark.serializer.*;
 import org.apache.spark.scheduler.MapStatus;
+import org.apache.spark.serializer.*;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.storage.*;
-import org.apache.spark.memory.TestMemoryManager;
-import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.util.Utils;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.*;
+import static org.mockito.Answers.RETURNS_SMART_NULLS;
+import static org.mockito.Mockito.*;
+
 public class UnsafeShuffleWriterSuite {
 
   static final int NUM_PARTITITONS = 4;
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 84b82f5a47..fc127f07c8 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -589,7 +589,7 @@ public abstract class AbstractBytesToBytesMapSuite {
   @Test
   public void multipleValuesForSameKey() {
     BytesToBytesMap map =
-      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.75, 1024, false);
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.5, 1024, false);
     try {
       int i;
       for (i = 0; i < 1024; i++) {
author	Davies Liu <davies@databricks.com>	2016-06-03 16:45:09 -0700
committer	Davies Liu <davies.liu@gmail.com>	2016-06-03 16:45:09 -0700
commit	3074f575a3c84108fddab3f5f56eb1929a4b2cff (patch)
tree	381fdb85dd2b52bc79b0e18c27eded1bd66d622d /core/src/test/java
parent	67cc89ff028324ba4a7a7d9c19a268b9afea0031 (diff)
download	spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.tar.gz spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.tar.bz2 spark-3074f575a3c84108fddab3f5f56eb1929a4b2cff.zip