diff options
author | Davies Liu <davies@databricks.com> | 2016-04-09 17:44:38 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-04-09 17:44:38 -0700 |
commit | 5cb5edaf9c5054e42d41f20b2dd92dafcccbf0d6 (patch) | |
tree | 0f499cb99a3942aced92affe65f50cf5d48b977c /core/src/main/java/org | |
parent | dfce9665c4b2b29a19e6302216dae2800da68ff9 (diff) | |
download | spark-5cb5edaf9c5054e42d41f20b2dd92dafcccbf0d6.tar.gz spark-5cb5edaf9c5054e42d41f20b2dd92dafcccbf0d6.tar.bz2 spark-5cb5edaf9c5054e42d41f20b2dd92dafcccbf0d6.zip |
[SPARK-14419] [SQL] Improve HashedRelation for key fit within Long
## What changes were proposed in this pull request?
Currently, we use java HashMap for HashedRelation if the key could fit within a Long. The java HashMap and CompactBuffer are not memory efficient, the memory used by them is also accounted accurately.
This PR introduce a LongToUnsafeRowMap (similar to BytesToBytesMap) for better memory efficiency and performance.
This PR reopen #12190 to fix bugs.
## How was this patch tested?
Existing tests.
Author: Davies Liu <davies@databricks.com>
Closes #12278 from davies/long_map3.
Diffstat (limited to 'core/src/main/java/org')
-rw-r--r-- | core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 32958be7a7..6807710f9f 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -716,7 +716,8 @@ public final class BytesToBytesMap extends MemoryConsumer { offset += klen; Platform.copyMemory(vbase, voff, base, offset, vlen); offset += vlen; - Platform.putLong(base, offset, 0); + // put this value at the beginning of the list + Platform.putLong(base, offset, isDefined ? longArray.get(pos * 2) : 0); // --- Update bookkeeping data structures ---------------------------------------------------- offset = currentPage.getBaseOffset(); @@ -724,17 +725,12 @@ public final class BytesToBytesMap extends MemoryConsumer { pageCursor += recordLength; final long storedKeyAddress = taskMemoryManager.encodePageNumberAndOffset( currentPage, recordOffset); + longArray.set(pos * 2, storedKeyAddress); + updateAddressesAndSizes(storedKeyAddress); numValues++; - if (isDefined) { - // put this pair at the end of chain - while (nextValue()) { /* do nothing */ } - Platform.putLong(baseObject, valueOffset + valueLength, storedKeyAddress); - nextValue(); // point to new added value - } else { + if (!isDefined) { numKeys++; - longArray.set(pos * 2, storedKeyAddress); longArray.set(pos * 2 + 1, keyHashcode); - updateAddressesAndSizes(storedKeyAddress); isDefined = true; if (numKeys > growthThreshold && longArray.size() < MAX_CAPACITY) { |