aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--R/pkg/NAMESPACE5
-rw-r--r--R/pkg/R/utils.R38
-rw-r--r--R/pkg/src-native/Makefile (renamed from R/pkg/src/Makefile)0
-rw-r--r--R/pkg/src-native/Makefile.win (renamed from R/pkg/src/Makefile.win)0
-rw-r--r--R/pkg/src-native/string_hash_code.c (renamed from R/pkg/src/string_hash_code.c)0
-rwxr-xr-xmake-distribution.sh2
6 files changed, 43 insertions, 2 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcffc9..411126a377 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
# Imports from base R
importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
# S3 methods exported
export("sparkR.init")
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd5a5..69b2700191 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode <- function(key) {
intBits <- packBits(rawToBits(rawVec), "integer")
as.integer(bitwXor(intBits[2], intBits[1]))
} else if (class(key) == "character") {
- .Call("stringHashCode", key)
+ # TODO: SPARK-7839 means we might not have the native library available
+ if (is.loaded("stringHashCode")) {
+ .Call("stringHashCode", key)
+ } else {
+ n <- nchar(key)
+ if (n == 0) {
+ 0L
+ } else {
+ asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+ hashC <- 0
+ for (k in 1:length(asciiVals)) {
+ hashC <- mult31AndAdd(hashC, asciiVals[k])
+ }
+ as.integer(hashC)
+ }
+ }
} else {
warning(paste("Could not hash object, returning 0", sep = ""))
as.integer(0)
}
}
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt <- function(value) {
+ if (value > .Machine$integer.max) {
+ value <- value - 2 * .Machine$integer.max - 2
+ } else if (value < -1 * .Machine$integer.max) {
+ value <- 2 * .Machine$integer.max + value + 2
+ }
+ value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd <- function(val, addVal) {
+ vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+ Reduce(function(a, b) {
+ wrapInt(as.numeric(a) + as.numeric(b))
+ },
+ vec)
+}
+
# Create a new RDD with serializedMode == "byte".
# Return itself if already in "byte" format.
serializeToBytes <- function(rdd) {
diff --git a/R/pkg/src/Makefile b/R/pkg/src-native/Makefile
index a55a56fe80..a55a56fe80 100644
--- a/R/pkg/src/Makefile
+++ b/R/pkg/src-native/Makefile
diff --git a/R/pkg/src/Makefile.win b/R/pkg/src-native/Makefile.win
index aa486d8228..aa486d8228 100644
--- a/R/pkg/src/Makefile.win
+++ b/R/pkg/src-native/Makefile.win
diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src-native/string_hash_code.c
index e3274b9a0c..e3274b9a0c 100644
--- a/R/pkg/src/string_hash_code.c
+++ b/R/pkg/src-native/string_hash_code.c
diff --git a/make-distribution.sh b/make-distribution.sh
index 8d6e91d675..78827341b9 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
+mkdir -p "$DISTDIR"/R/lib
+cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
cp -r "$SPARK_HOME/ec2" "$DISTDIR"