aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-05-23 00:04:01 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-05-23 00:04:01 -0700
commita40bca0111de45763c3ef4270afb2185c16b8f95 (patch)
treefd7d6b8d70c25cf2b82b7b8731ff43ac3e67fe7d /R/pkg
parent7af3818c6b2bf35bfa531ab7cc3a4a714385015e (diff)
downloadspark-a40bca0111de45763c3ef4270afb2185c16b8f95.tar.gz
spark-a40bca0111de45763c3ef4270afb2185c16b8f95.tar.bz2
spark-a40bca0111de45763c3ef4270afb2185c16b8f95.zip
[SPARK-6811] Copy SparkR lib in make-distribution.sh
This change also remove native libraries from SparkR to make sure our distribution works across platforms Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac) I will also test this with YARN soon and update this PR. Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Closes #6373 from shivaram/sparkr-binary and squashes the following commits: ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/NAMESPACE5
-rw-r--r--R/pkg/R/utils.R38
-rw-r--r--R/pkg/src-native/Makefile (renamed from R/pkg/src/Makefile)0
-rw-r--r--R/pkg/src-native/Makefile.win (renamed from R/pkg/src/Makefile.win)0
-rw-r--r--R/pkg/src-native/string_hash_code.c (renamed from R/pkg/src/string_hash_code.c)0
5 files changed, 41 insertions, 2 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcffc9..411126a377 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
# Imports from base R
importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
# S3 methods exported
export("sparkR.init")
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd5a5..69b2700191 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode <- function(key) {
intBits <- packBits(rawToBits(rawVec), "integer")
as.integer(bitwXor(intBits[2], intBits[1]))
} else if (class(key) == "character") {
- .Call("stringHashCode", key)
+ # TODO: SPARK-7839 means we might not have the native library available
+ if (is.loaded("stringHashCode")) {
+ .Call("stringHashCode", key)
+ } else {
+ n <- nchar(key)
+ if (n == 0) {
+ 0L
+ } else {
+ asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+ hashC <- 0
+ for (k in 1:length(asciiVals)) {
+ hashC <- mult31AndAdd(hashC, asciiVals[k])
+ }
+ as.integer(hashC)
+ }
+ }
} else {
warning(paste("Could not hash object, returning 0", sep = ""))
as.integer(0)
}
}
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt <- function(value) {
+ if (value > .Machine$integer.max) {
+ value <- value - 2 * .Machine$integer.max - 2
+ } else if (value < -1 * .Machine$integer.max) {
+ value <- 2 * .Machine$integer.max + value + 2
+ }
+ value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd <- function(val, addVal) {
+ vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+ Reduce(function(a, b) {
+ wrapInt(as.numeric(a) + as.numeric(b))
+ },
+ vec)
+}
+
# Create a new RDD with serializedMode == "byte".
# Return itself if already in "byte" format.
serializeToBytes <- function(rdd) {
diff --git a/R/pkg/src/Makefile b/R/pkg/src-native/Makefile
index a55a56fe80..a55a56fe80 100644
--- a/R/pkg/src/Makefile
+++ b/R/pkg/src-native/Makefile
diff --git a/R/pkg/src/Makefile.win b/R/pkg/src-native/Makefile.win
index aa486d8228..aa486d8228 100644
--- a/R/pkg/src/Makefile.win
+++ b/R/pkg/src-native/Makefile.win
diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src-native/string_hash_code.c
index e3274b9a0c..e3274b9a0c 100644
--- a/R/pkg/src/string_hash_code.c
+++ b/R/pkg/src-native/string_hash_code.c