From c84f7d3e1b845bc1e595ce9a6e2de663c2d218f4 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Mon, 16 Jan 2017 13:49:12 -0800 Subject: [SPARK-18828][SPARKR] Refactor scripts for R ## What changes were proposed in this pull request? Refactored script to remove duplications and clearer purpose for each script ## How was this patch tested? manually Author: Felix Cheung Closes #16249 from felixcheung/rscripts. --- R/check-cran.sh | 32 +++---------------------- R/create-docs.sh | 11 +++++---- R/create-rd.sh | 37 +++++++++++++++++++++++++++++ R/find-r.sh | 34 +++++++++++++++++++++++++++ R/install-dev.sh | 20 ++++------------ R/install-source-package.sh | 57 +++++++++++++++++++++++++++++++++++++++++++++ dev/make-distribution.sh | 7 ++++-- 7 files changed, 146 insertions(+), 52 deletions(-) create mode 100755 R/create-rd.sh create mode 100755 R/find-r.sh create mode 100755 R/install-source-package.sh diff --git a/R/check-cran.sh b/R/check-cran.sh index 1288e7fc9f..a188b1448a 100755 --- a/R/check-cran.sh +++ b/R/check-cran.sh @@ -20,25 +20,14 @@ set -o pipefail set -e -FWDIR="$(cd `dirname $0`; pwd)" +FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)" pushd $FWDIR > /dev/null -if [ ! -z "$R_HOME" ] - then - R_SCRIPT_PATH="$R_HOME/bin" - else - # if system wide R_HOME is not found, then exit - if [ ! `command -v R` ]; then - echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed." - exit 1 - fi - R_SCRIPT_PATH="$(dirname $(which R))" -fi -echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}" +. $FWDIR/find-r.sh # Install the package (this is required for code in vignettes to run when building it later) # Build the latest docs, but not vignettes, which is built with the package next -$FWDIR/create-docs.sh +. $FWDIR/install-dev.sh # Build source package with vignettes SPARK_HOME="$(cd "${FWDIR}"/..; pwd)" @@ -84,19 +73,4 @@ else SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz fi -# Install source package to get it to generate vignettes rds files, etc. -if [ -n "$CLEAN_INSTALL" ] -then - echo "Removing lib path and installing from source package" - LIB_DIR="$FWDIR/lib" - rm -rf $LIB_DIR - mkdir -p $LIB_DIR - "$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR - - # Zip the SparkR package so that it can be distributed to worker nodes on YARN - pushd $LIB_DIR > /dev/null - jar cfM "$LIB_DIR/sparkr.zip" SparkR - popd > /dev/null -fi - popd > /dev/null diff --git a/R/create-docs.sh b/R/create-docs.sh index 84e6aa928c..6bef7e75e3 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -29,18 +29,19 @@ set -o pipefail set -e # Figure out where the script is -export FWDIR="$(cd "`dirname "$0"`"; pwd)" -export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)" +export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)" +export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)" # Required for setting SPARK_SCALA_VERSION . "${SPARK_HOME}"/bin/load-spark-env.sh echo "Using Scala $SPARK_SCALA_VERSION" -pushd $FWDIR +pushd $FWDIR > /dev/null +. $FWDIR/find-r.sh # Install the package (this will also generate the Rd files) -./install-dev.sh +. $FWDIR/install-dev.sh # Now create HTML files @@ -48,7 +49,7 @@ pushd $FWDIR mkdir -p pkg/html pushd pkg/html -Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))' +"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))' popd diff --git a/R/create-rd.sh b/R/create-rd.sh new file mode 100755 index 0000000000..d17e161739 --- /dev/null +++ b/R/create-rd.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This scripts packages the SparkR source files (R and C files) and +# creates a package that can be loaded in R. The package is by default installed to +# $FWDIR/lib and the package can be loaded by using the following command in R: +# +# library(SparkR, lib.loc="$FWDIR/lib") +# +# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory +# to load the SparkR package on the worker nodes. + +set -o pipefail +set -e + +FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)" +pushd $FWDIR > /dev/null +. $FWDIR/find-r.sh + +# Generate Rd files if devtools is installed +"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' diff --git a/R/find-r.sh b/R/find-r.sh new file mode 100755 index 0000000000..690acc083a --- /dev/null +++ b/R/find-r.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "$R_SCRIPT_PATH" ] +then + if [ ! -z "$R_HOME" ] + then + R_SCRIPT_PATH="$R_HOME/bin" + else + # if system wide R_HOME is not found, then exit + if [ ! `command -v R` ]; then + echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed." + exit 1 + fi + R_SCRIPT_PATH="$(dirname $(which R))" + fi + echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}" +fi diff --git a/R/install-dev.sh b/R/install-dev.sh index 0f881208bc..45e6411705 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -29,27 +29,15 @@ set -o pipefail set -e -FWDIR="$(cd `dirname $0`; pwd)" +FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)" LIB_DIR="$FWDIR/lib" mkdir -p $LIB_DIR pushd $FWDIR > /dev/null -if [ ! -z "$R_HOME" ] - then - R_SCRIPT_PATH="$R_HOME/bin" - else - # if system wide R_HOME is not found, then exit - if [ ! `command -v R` ]; then - echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed." - exit 1 - fi - R_SCRIPT_PATH="$(dirname $(which R))" -fi -echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}" - -# Generate Rd files if devtools is installed -"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' +. $FWDIR/find-r.sh + +. $FWDIR/create-rd.sh # Install SparkR to $LIB_DIR "$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ diff --git a/R/install-source-package.sh b/R/install-source-package.sh new file mode 100755 index 0000000000..c6e443c04e --- /dev/null +++ b/R/install-source-package.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This scripts packages the SparkR source files (R and C files) and +# creates a package that can be loaded in R. The package is by default installed to +# $FWDIR/lib and the package can be loaded by using the following command in R: +# +# library(SparkR, lib.loc="$FWDIR/lib") +# +# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory +# to load the SparkR package on the worker nodes. + +set -o pipefail +set -e + +FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)" +pushd $FWDIR > /dev/null +. $FWDIR/find-r.sh + +if [ -z "$VERSION" ]; then + VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` +fi + +if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then + echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found." + echo -e "Please build R source package with check-cran.sh" + exit -1; +fi + +echo "Removing lib path and installing from source package" +LIB_DIR="$FWDIR/lib" +rm -rf $LIB_DIR +mkdir -p $LIB_DIR +"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR + +# Zip the SparkR package so that it can be distributed to worker nodes on YARN +pushd $LIB_DIR > /dev/null +jar cfM "$LIB_DIR/sparkr.zip" SparkR +popd > /dev/null + +popd diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 6c5ae0d629..47ff504316 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -232,14 +232,17 @@ if [ "$MAKE_R" == "true" ]; then R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print $NF}'` pushd "$SPARK_HOME/R" > /dev/null # Build source package and run full checks - # Install source package to get it to generate vignettes, etc. # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME - NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh + NO_TESTS=1 "$SPARK_HOME/"R/check-cran.sh + # Move R source package to match the Spark release version if the versions are not the same. # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz fi + + # Install source package to get it to generate vignettes rds files, etc. + VERSION=$VERSION "$SPARK_HOME/"R/install-source-package.sh popd > /dev/null else echo "Skipping building R source package" -- cgit v1.2.3