[SPARK-6030] [CORE] Using simulated field layout method to compute class shellSize

SizeEstimator gives wrong result for Integer on 64bit JVM with UseCompressedOops on, this pr fixes that. For more details, please refer [SPARK-6030](https://issues.apache.org/jira/browse/SPARK-6030) sryza, I noticed there is a pr to expose SizeEstimator, maybe that should be waited by this pr get merged if we confirm this problem. And shivaram would you mind to review this pr since you contribute related code. Also cc to srowen and mateiz Author: Ye Xianjin <advancedxy@gmail.com> Closes #4783 from advancedxy/SPARK-6030 and squashes the following commits: c4dcb41 [Ye Xianjin] Add super.beforeEach in the beforeEach method to make the trait stackable.. Remove useless leading whitespace. 3f80640 [Ye Xianjin] The size of Integer class changes from 24 to 16 on a 64-bit JVM with -UseCompressedOops flag on after the fix. I don't how 100000 was originally calculated, It looks like 100000 is the magic number which makes sure spilling. Because of the size change, It fails because there is no spilling at all. Change the number to a slightly larger number fixes that. e849d2d [Ye Xianjin] Merge two shellSize assignments into one. Add some explanation to alignSizeUp method. 85a0b51 [Ye Xianjin] Fix typos and update wording in comments. Using alignSizeUp to compute alignSize. d27eb77 [Ye Xianjin] Add some detailed comments in the code. Add some test cases. It's very difficult to design test cases as the final object alignment will hide a lot of filed layout details if we just considering the whole size. 842aed1 [Ye Xianjin] primitiveSize(cls) can just return Int. Use a simplified class field layout method to calculate class instance size. Will add more documents and test cases. Add a new alignSizeUp function which uses bitwise operators to speedup. 62e8ab4 [Ye Xianjin] Don't alignSize for objects' shellSize, alignSize when added to state.size. Add some primitive wrapper objects size tests.
author: Ye Xianjin <advancedxy@gmail.com> 2015-05-02 23:08:09 +0100
committer: Sean Owen <sowen@cloudera.com> 2015-05-02 23:08:09 +0100
commit: bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9 (patch)
tree: f2e273d87bfab66702f02e0ea32b794fd743254c /core/src/test
parent: da303526e54e9a0adfedb49417f383cde7870a69 (diff)
download: spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.tar.gz
spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.tar.bz2
spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.zip
2 files changed, 49 insertions, 8 deletions
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 28915bd533..133a76f28e 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -36,6 +36,15 @@ class DummyClass4(val d: DummyClass3) {
   val x: Int = 0
 }
 
+// dummy class to show class field blocks alignment.
+class DummyClass5 extends DummyClass1 {
+  val x: Boolean = true
+}
+
+class DummyClass6 extends DummyClass5 {
+  val y: Boolean = true
+}
+
 object DummyString {
   def apply(str: String) : DummyString = new DummyString(str.toArray)
 }
@@ -50,6 +59,7 @@ class SizeEstimatorSuite
 
   override def beforeEach() {
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
+    super.beforeEach()
     System.setProperty("os.arch", "amd64")
     System.setProperty("spark.test.useCompressedOops", "true")
   }
@@ -62,6 +72,22 @@ class SizeEstimatorSuite
     assertResult(48)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
   }
 
+  test("primitive wrapper objects") {
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Boolean(true)))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Byte("1")))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Character('1')))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Short("1")))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Integer(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Long(1)))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Float(1.0)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Double(1.0d)))
+  }
+
+  test("class field blocks rounding") {
+    assertResult(16)(SizeEstimator.estimate(new DummyClass5))
+    assertResult(24)(SizeEstimator.estimate(new DummyClass6))
+  }
+
   // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
   // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("strings") {
@@ -102,18 +128,18 @@ class SizeEstimatorSuite
     val arr = new Array[Char](100000)
     assertResult(200016)(SizeEstimator.estimate(arr))
     assertResult(480032)(SizeEstimator.estimate(Array.fill(10000)(new DummyString(arr))))
-    
+
     val buf = new ArrayBuffer[DummyString]()
     for (i <- 0 until 5000) {
       buf.append(new DummyString(new Array[Char](10)))
     }
     assertResult(340016)(SizeEstimator.estimate(buf.toArray))
-    
+
     for (i <- 0 until 5000) {
       buf.append(new DummyString(arr))
     }
     assertResult(683912)(SizeEstimator.estimate(buf.toArray))
-    
+
     // If an array contains the *same* element many times, we should only count it once.
     val d1 = new DummyClass1
     // 10 pointers plus 8-byte object
@@ -155,5 +181,20 @@ class SizeEstimatorSuite
     assertResult(64)(SizeEstimator.estimate(DummyString("a")))
     assertResult(64)(SizeEstimator.estimate(DummyString("ab")))
     assertResult(72)(SizeEstimator.estimate(DummyString("abcdefgh")))
+
+    // primitive wrapper classes
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Boolean(true)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Byte("1")))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Character('1')))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Short("1")))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Integer(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Long(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Float(1.0)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Double(1.0d)))
+  }
+
+  test("class field blocks rounding on 64-bit VM without useCompressedOops") {
+    assertResult(24)(SizeEstimator.estimate(new DummyClass5))
+    assertResult(32)(SizeEstimator.estimate(new DummyClass6))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 20fd22b78e..7a98723bc6 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -377,7 +377,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
     val sorter = new ExternalSorter[Int, Int, Int](
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter)
-    sorter.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    sorter.insertAll((0 until 120000).iterator.map(i => (i, i)))
     assert(diskBlockManager.getAllFiles().length > 0)
     sorter.stop()
     assert(diskBlockManager.getAllBlocks().length === 0)
@@ -385,9 +385,9 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
     val sorter2 = new ExternalSorter[Int, Int, Int](
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter2)
-    sorter2.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    sorter2.insertAll((0 until 120000).iterator.map(i => (i, i)))
     assert(diskBlockManager.getAllFiles().length > 0)
-    assert(sorter2.iterator.toSet === (0 until 100000).map(i => (i, i)).toSet)
+    assert(sorter2.iterator.toSet === (0 until 120000).map(i => (i, i)).toSet)
     sorter2.stop()
     assert(diskBlockManager.getAllBlocks().length === 0)
   }
@@ -428,8 +428,8 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter)
     intercept[SparkException] {
-      sorter.insertAll((0 until 100000).iterator.map(i => {
-        if (i == 99990) {
+      sorter.insertAll((0 until 120000).iterator.map(i => {
+        if (i == 119990) {
           throw new SparkException("Intentional failure")
         }
         (i, i)
author	Ye Xianjin <advancedxy@gmail.com>	2015-05-02 23:08:09 +0100
committer	Sean Owen <sowen@cloudera.com>	2015-05-02 23:08:09 +0100
commit	bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9 (patch)
tree	f2e273d87bfab66702f02e0ea32b794fd743254c /core/src/test
parent	da303526e54e9a0adfedb49417f383cde7870a69 (diff)
download	spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.tar.gz spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.tar.bz2 spark-bfcd528d6f5a5ebe61e0fcca890143e9a3c7f7f9.zip