diff options
author | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-05-23 00:04:01 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-05-23 00:04:32 -0700 |
commit | c8eb76ba673026f2fb2b22e8b3e8102a5940297c (patch) | |
tree | da4ee9d28a92f56f571288a265d247048f7a3888 | |
parent | c636b87dc287ce99a887bc59cad31aaf48477a56 (diff) | |
download | spark-c8eb76ba673026f2fb2b22e8b3e8102a5940297c.tar.gz spark-c8eb76ba673026f2fb2b22e8b3e8102a5940297c.tar.bz2 spark-c8eb76ba673026f2fb2b22e8b3e8102a5940297c.zip |
[SPARK-6811] Copy SparkR lib in make-distribution.sh
This change also remove native libraries from SparkR to make sure our distribution works across platforms
Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac)
I will also test this with YARN soon and update this PR.
Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Closes #6373 from shivaram/sparkr-binary and squashes the following commits:
ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh
(cherry picked from commit a40bca0111de45763c3ef4270afb2185c16b8f95)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
-rw-r--r-- | R/pkg/NAMESPACE | 5 | ||||
-rw-r--r-- | R/pkg/R/utils.R | 38 | ||||
-rw-r--r-- | R/pkg/src-native/Makefile (renamed from R/pkg/src/Makefile) | 0 | ||||
-rw-r--r-- | R/pkg/src-native/Makefile.win (renamed from R/pkg/src/Makefile.win) | 0 | ||||
-rw-r--r-- | R/pkg/src-native/string_hash_code.c (renamed from R/pkg/src/string_hash_code.c) | 0 | ||||
-rwxr-xr-x | make-distribution.sh | 2 |
6 files changed, 43 insertions, 2 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 64ffdcffc9..411126a377 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,6 +1,9 @@ # Imports from base R importFrom(methods, setGeneric, setMethod, setOldClass) -useDynLib(SparkR, stringHashCode) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported export("sparkR.init") diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd5a5..69b2700191 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode <- function(key) { intBits <- packBits(rawToBits(rawVec), "integer") as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == "character") { - .Call("stringHashCode", key) + # TODO: SPARK-7839 means we might not have the native library available + if (is.loaded("stringHashCode")) { + .Call("stringHashCode", key) + } else { + n <- nchar(key) + if (n == 0) { + 0L + } else { + asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) + hashC <- 0 + for (k in 1:length(asciiVals)) { + hashC <- mult31AndAdd(hashC, asciiVals[k]) + } + as.integer(hashC) + } + } } else { warning(paste("Could not hash object, returning 0", sep = "")) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt <- function(value) { + if (value > .Machine$integer.max) { + value <- value - 2 * .Machine$integer.max - 2 + } else if (value < -1 * .Machine$integer.max) { + value <- 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd <- function(val, addVal) { + vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == "byte". # Return itself if already in "byte" format. serializeToBytes <- function(rdd) { diff --git a/R/pkg/src/Makefile b/R/pkg/src-native/Makefile index a55a56fe80..a55a56fe80 100644 --- a/R/pkg/src/Makefile +++ b/R/pkg/src-native/Makefile diff --git a/R/pkg/src/Makefile.win b/R/pkg/src-native/Makefile.win index aa486d8228..aa486d8228 100644 --- a/R/pkg/src/Makefile.win +++ b/R/pkg/src-native/Makefile.win diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src-native/string_hash_code.c index e3274b9a0c..e3274b9a0c 100644 --- a/R/pkg/src/string_hash_code.c +++ b/R/pkg/src-native/string_hash_code.c diff --git a/make-distribution.sh b/make-distribution.sh index 8d6e91d675..78827341b9 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" +mkdir -p "$DISTDIR"/R/lib +cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib cp -r "$SPARK_HOME/sbin" "$DISTDIR" cp -r "$SPARK_HOME/ec2" "$DISTDIR" |