aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2017-01-16 13:49:12 -0800
committerFelix Cheung <felixcheung@apache.org>2017-01-16 13:49:12 -0800
commitc84f7d3e1b845bc1e595ce9a6e2de663c2d218f4 (patch)
treef5988031c266dfadaa4a791b07b0737cb39ab529
parenta115a54399cd4bedb1a5086943a88af6339fbe85 (diff)
downloadspark-c84f7d3e1b845bc1e595ce9a6e2de663c2d218f4.tar.gz
spark-c84f7d3e1b845bc1e595ce9a6e2de663c2d218f4.tar.bz2
spark-c84f7d3e1b845bc1e595ce9a6e2de663c2d218f4.zip
[SPARK-18828][SPARKR] Refactor scripts for R
## What changes were proposed in this pull request? Refactored script to remove duplications and clearer purpose for each script ## How was this patch tested? manually Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16249 from felixcheung/rscripts.
-rwxr-xr-xR/check-cran.sh32
-rwxr-xr-xR/create-docs.sh11
-rwxr-xr-xR/create-rd.sh37
-rwxr-xr-xR/find-r.sh34
-rwxr-xr-xR/install-dev.sh20
-rwxr-xr-xR/install-source-package.sh57
-rwxr-xr-xdev/make-distribution.sh7
7 files changed, 146 insertions, 52 deletions
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 1288e7fc9f..a188b1448a 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -20,25 +20,14 @@
set -o pipefail
set -e
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
-if [ ! -z "$R_HOME" ]
- then
- R_SCRIPT_PATH="$R_HOME/bin"
- else
- # if system wide R_HOME is not found, then exit
- if [ ! `command -v R` ]; then
- echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
- exit 1
- fi
- R_SCRIPT_PATH="$(dirname $(which R))"
-fi
-echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
+. $FWDIR/find-r.sh
# Install the package (this is required for code in vignettes to run when building it later)
# Build the latest docs, but not vignettes, which is built with the package next
-$FWDIR/create-docs.sh
+. $FWDIR/install-dev.sh
# Build source package with vignettes
SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
@@ -84,19 +73,4 @@ else
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
fi
-# Install source package to get it to generate vignettes rds files, etc.
-if [ -n "$CLEAN_INSTALL" ]
-then
- echo "Removing lib path and installing from source package"
- LIB_DIR="$FWDIR/lib"
- rm -rf $LIB_DIR
- mkdir -p $LIB_DIR
- "$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
-
- # Zip the SparkR package so that it can be distributed to worker nodes on YARN
- pushd $LIB_DIR > /dev/null
- jar cfM "$LIB_DIR/sparkr.zip" SparkR
- popd > /dev/null
-fi
-
popd > /dev/null
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 84e6aa928c..6bef7e75e3 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -29,18 +29,19 @@ set -o pipefail
set -e
# Figure out where the script is
-export FWDIR="$(cd "`dirname "$0"`"; pwd)"
-export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
# Required for setting SPARK_SCALA_VERSION
. "${SPARK_HOME}"/bin/load-spark-env.sh
echo "Using Scala $SPARK_SCALA_VERSION"
-pushd $FWDIR
+pushd $FWDIR > /dev/null
+. $FWDIR/find-r.sh
# Install the package (this will also generate the Rd files)
-./install-dev.sh
+. $FWDIR/install-dev.sh
# Now create HTML files
@@ -48,7 +49,7 @@ pushd $FWDIR
mkdir -p pkg/html
pushd pkg/html
-Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
+"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
popd
diff --git a/R/create-rd.sh b/R/create-rd.sh
new file mode 100755
index 0000000000..d17e161739
--- /dev/null
+++ b/R/create-rd.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This scripts packages the SparkR source files (R and C files) and
+# creates a package that can be loaded in R. The package is by default installed to
+# $FWDIR/lib and the package can be loaded by using the following command in R:
+#
+# library(SparkR, lib.loc="$FWDIR/lib")
+#
+# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
+# to load the SparkR package on the worker nodes.
+
+set -o pipefail
+set -e
+
+FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
+pushd $FWDIR > /dev/null
+. $FWDIR/find-r.sh
+
+# Generate Rd files if devtools is installed
+"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
diff --git a/R/find-r.sh b/R/find-r.sh
new file mode 100755
index 0000000000..690acc083a
--- /dev/null
+++ b/R/find-r.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "$R_SCRIPT_PATH" ]
+then
+ if [ ! -z "$R_HOME" ]
+ then
+ R_SCRIPT_PATH="$R_HOME/bin"
+ else
+ # if system wide R_HOME is not found, then exit
+ if [ ! `command -v R` ]; then
+ echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
+ exit 1
+ fi
+ R_SCRIPT_PATH="$(dirname $(which R))"
+ fi
+ echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
+fi
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 0f881208bc..45e6411705 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -29,27 +29,15 @@
set -o pipefail
set -e
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
LIB_DIR="$FWDIR/lib"
mkdir -p $LIB_DIR
pushd $FWDIR > /dev/null
-if [ ! -z "$R_HOME" ]
- then
- R_SCRIPT_PATH="$R_HOME/bin"
- else
- # if system wide R_HOME is not found, then exit
- if [ ! `command -v R` ]; then
- echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
- exit 1
- fi
- R_SCRIPT_PATH="$(dirname $(which R))"
-fi
-echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
-
-# Generate Rd files if devtools is installed
-"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
+. $FWDIR/find-r.sh
+
+. $FWDIR/create-rd.sh
# Install SparkR to $LIB_DIR
"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
diff --git a/R/install-source-package.sh b/R/install-source-package.sh
new file mode 100755
index 0000000000..c6e443c04e
--- /dev/null
+++ b/R/install-source-package.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This scripts packages the SparkR source files (R and C files) and
+# creates a package that can be loaded in R. The package is by default installed to
+# $FWDIR/lib and the package can be loaded by using the following command in R:
+#
+# library(SparkR, lib.loc="$FWDIR/lib")
+#
+# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
+# to load the SparkR package on the worker nodes.
+
+set -o pipefail
+set -e
+
+FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
+pushd $FWDIR > /dev/null
+. $FWDIR/find-r.sh
+
+if [ -z "$VERSION" ]; then
+ VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
+fi
+
+if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then
+ echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found."
+ echo -e "Please build R source package with check-cran.sh"
+ exit -1;
+fi
+
+echo "Removing lib path and installing from source package"
+LIB_DIR="$FWDIR/lib"
+rm -rf $LIB_DIR
+mkdir -p $LIB_DIR
+"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
+
+# Zip the SparkR package so that it can be distributed to worker nodes on YARN
+pushd $LIB_DIR > /dev/null
+jar cfM "$LIB_DIR/sparkr.zip" SparkR
+popd > /dev/null
+
+popd
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 6c5ae0d629..47ff504316 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -232,14 +232,17 @@ if [ "$MAKE_R" == "true" ]; then
R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print $NF}'`
pushd "$SPARK_HOME/R" > /dev/null
# Build source package and run full checks
- # Install source package to get it to generate vignettes, etc.
# Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
- NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
+ NO_TESTS=1 "$SPARK_HOME/"R/check-cran.sh
+
# Move R source package to match the Spark release version if the versions are not the same.
# NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file
if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
fi
+
+ # Install source package to get it to generate vignettes rds files, etc.
+ VERSION=$VERSION "$SPARK_HOME/"R/install-source-package.sh
popd > /dev/null
else
echo "Skipping building R source package"