From a40bca0111de45763c3ef4270afb2185c16b8f95 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Sat, 23 May 2015 00:04:01 -0700 Subject: [SPARK-6811] Copy SparkR lib in make-distribution.sh This change also remove native libraries from SparkR to make sure our distribution works across platforms Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac) I will also test this with YARN soon and update this PR. Author: Shivaram Venkataraman Closes #6373 from shivaram/sparkr-binary and squashes the following commits: ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh --- R/pkg/NAMESPACE | 5 +++- R/pkg/R/utils.R | 38 +++++++++++++++++++++++++++- R/pkg/src-native/Makefile | 27 ++++++++++++++++++++ R/pkg/src-native/Makefile.win | 27 ++++++++++++++++++++ R/pkg/src-native/string_hash_code.c | 49 +++++++++++++++++++++++++++++++++++++ R/pkg/src/Makefile | 27 -------------------- R/pkg/src/Makefile.win | 27 -------------------- R/pkg/src/string_hash_code.c | 49 ------------------------------------- make-distribution.sh | 2 ++ 9 files changed, 146 insertions(+), 105 deletions(-) create mode 100644 R/pkg/src-native/Makefile create mode 100644 R/pkg/src-native/Makefile.win create mode 100644 R/pkg/src-native/string_hash_code.c delete mode 100644 R/pkg/src/Makefile delete mode 100644 R/pkg/src/Makefile.win delete mode 100644 R/pkg/src/string_hash_code.c diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 64ffdcffc9..411126a377 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,6 +1,9 @@ # Imports from base R importFrom(methods, setGeneric, setMethod, setOldClass) -useDynLib(SparkR, stringHashCode) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported export("sparkR.init") diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd5a5..69b2700191 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode <- function(key) { intBits <- packBits(rawToBits(rawVec), "integer") as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == "character") { - .Call("stringHashCode", key) + # TODO: SPARK-7839 means we might not have the native library available + if (is.loaded("stringHashCode")) { + .Call("stringHashCode", key) + } else { + n <- nchar(key) + if (n == 0) { + 0L + } else { + asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) + hashC <- 0 + for (k in 1:length(asciiVals)) { + hashC <- mult31AndAdd(hashC, asciiVals[k]) + } + as.integer(hashC) + } + } } else { warning(paste("Could not hash object, returning 0", sep = "")) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt <- function(value) { + if (value > .Machine$integer.max) { + value <- value - 2 * .Machine$integer.max - 2 + } else if (value < -1 * .Machine$integer.max) { + value <- 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd <- function(val, addVal) { + vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == "byte". # Return itself if already in "byte" format. serializeToBytes <- function(rdd) { diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile new file mode 100644 index 0000000000..a55a56fe80 --- /dev/null +++ b/R/pkg/src-native/Makefile @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +all: sharelib + +sharelib: string_hash_code.c + R CMD SHLIB -o SparkR.so string_hash_code.c + +clean: + rm -f *.o + rm -f *.so + +.PHONY: all clean diff --git a/R/pkg/src-native/Makefile.win b/R/pkg/src-native/Makefile.win new file mode 100644 index 0000000000..aa486d8228 --- /dev/null +++ b/R/pkg/src-native/Makefile.win @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +all: sharelib + +sharelib: string_hash_code.c + R CMD SHLIB -o SparkR.dll string_hash_code.c + +clean: + rm -f *.o + rm -f *.dll + +.PHONY: all clean diff --git a/R/pkg/src-native/string_hash_code.c b/R/pkg/src-native/string_hash_code.c new file mode 100644 index 0000000000..e3274b9a0c --- /dev/null +++ b/R/pkg/src-native/string_hash_code.c @@ -0,0 +1,49 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + * A C function for R extension which implements the Java String hash algorithm. + * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function + * + */ + +#include +#include + +/* for compatibility with R before 3.1 */ +#ifndef IS_SCALAR +#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1) +#endif + +SEXP stringHashCode(SEXP string) { + const char* str; + R_xlen_t len, i; + int hashCode = 0; + + if (!IS_SCALAR(string, STRSXP)) { + error("invalid input"); + } + + str = CHAR(asChar(string)); + len = XLENGTH(asChar(string)); + + for (i = 0; i < len; i++) { + hashCode = (hashCode << 5) - hashCode + *str++; + } + + return ScalarInteger(hashCode); +} diff --git a/R/pkg/src/Makefile b/R/pkg/src/Makefile deleted file mode 100644 index a55a56fe80..0000000000 --- a/R/pkg/src/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -all: sharelib - -sharelib: string_hash_code.c - R CMD SHLIB -o SparkR.so string_hash_code.c - -clean: - rm -f *.o - rm -f *.so - -.PHONY: all clean diff --git a/R/pkg/src/Makefile.win b/R/pkg/src/Makefile.win deleted file mode 100644 index aa486d8228..0000000000 --- a/R/pkg/src/Makefile.win +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -all: sharelib - -sharelib: string_hash_code.c - R CMD SHLIB -o SparkR.dll string_hash_code.c - -clean: - rm -f *.o - rm -f *.dll - -.PHONY: all clean diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src/string_hash_code.c deleted file mode 100644 index e3274b9a0c..0000000000 --- a/R/pkg/src/string_hash_code.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -/* - * A C function for R extension which implements the Java String hash algorithm. - * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function - * - */ - -#include -#include - -/* for compatibility with R before 3.1 */ -#ifndef IS_SCALAR -#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1) -#endif - -SEXP stringHashCode(SEXP string) { - const char* str; - R_xlen_t len, i; - int hashCode = 0; - - if (!IS_SCALAR(string, STRSXP)) { - error("invalid input"); - } - - str = CHAR(asChar(string)); - len = XLENGTH(asChar(string)); - - for (i = 0; i < len; i++) { - hashCode = (hashCode << 5) - hashCode + *str++; - } - - return ScalarInteger(hashCode); -} diff --git a/make-distribution.sh b/make-distribution.sh index 8d6e91d675..78827341b9 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" +mkdir -p "$DISTDIR"/R/lib +cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib cp -r "$SPARK_HOME/sbin" "$DISTDIR" cp -r "$SPARK_HOME/ec2" "$DISTDIR" -- cgit v1.2.3