From 0eea12a3d956b54bbbd73d21b296868852a04494 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Mon, 7 Mar 2016 14:48:02 -0800 Subject: [SPARK-13596][BUILD] Move misc top-level build files into appropriate subdirs ## What changes were proposed in this pull request? Move many top-level files in dev/ or other appropriate directory. In particular, put `make-distribution.sh` in `dev` and update docs accordingly. Remove deprecated `sbt/sbt`. I was (so far) unable to figure out how to move `tox.ini`. `scalastyle-config.xml` should be movable but edits to the project `.sbt` files didn't work; config file location is updatable for compile but not test scope. ## How was this patch tested? `./dev/run-tests` to verify RAT and checkstyle work. Jenkins tests for the rest. Author: Sean Owen Closes #11522 from srowen/SPARK-13596. --- dev/.rat-excludes | 100 +++++++++++++++++ dev/check-license | 7 +- dev/checkstyle-suppressions.xml | 33 ++++++ dev/checkstyle.xml | 171 ++++++++++++++++++++++++++++ dev/create-release/release-build.sh | 2 +- dev/lint-python | 6 +- dev/make-distribution.sh | 215 ++++++++++++++++++++++++++++++++++++ dev/tox.ini | 18 +++ 8 files changed, 545 insertions(+), 7 deletions(-) create mode 100644 dev/.rat-excludes create mode 100644 dev/checkstyle-suppressions.xml create mode 100644 dev/checkstyle.xml create mode 100755 dev/make-distribution.sh create mode 100644 dev/tox.ini (limited to 'dev') diff --git a/dev/.rat-excludes b/dev/.rat-excludes new file mode 100644 index 0000000000..8b5061415f --- /dev/null +++ b/dev/.rat-excludes @@ -0,0 +1,100 @@ +target +cache +.gitignore +.gitattributes +.project +.classpath +.mima-excludes +.generated-mima-excludes +.generated-mima-class-excludes +.generated-mima-member-excludes +.rat-excludes +.*md +derby.log +TAGS +RELEASE +control +docs +slaves +spark-env.cmd +bootstrap-tooltip.js +jquery-1.11.1.min.js +d3.min.js +dagre-d3.min.js +graphlib-dot.min.js +sorttable.js +vis.min.js +vis.min.css +dataTables.bootstrap.css +dataTables.bootstrap.min.js +dataTables.rowsGroup.js +jquery.blockUI.min.js +jquery.cookies.2.2.0.min.js +jquery.dataTables.1.10.4.min.css +jquery.dataTables.1.10.4.min.js +jquery.mustache.js +jsonFormatter.min.css +jsonFormatter.min.js +.*avsc +.*txt +.*json +.*data +.*log +cloudpickle.py +heapq3.py +join.py +SparkExprTyper.scala +SparkILoop.scala +SparkILoopInit.scala +SparkIMain.scala +SparkImports.scala +SparkJLineCompletion.scala +SparkJLineReader.scala +SparkMemberHandlers.scala +SparkReplReporter.scala +sbt +sbt-launch-lib.bash +plugins.sbt +work +.*\.q +.*\.qv +golden +test.out/* +.*iml +service.properties +db.lck +build/* +dist/* +.*out +.*ipr +.*iws +logs +.*scalastyle-output.xml +.*dependency-reduced-pom.xml +known_translations +json_expectation +local-1422981759269 +local-1422981780767 +local-1425081759269 +local-1426533911241 +local-1426633911242 +local-1430917381534 +local-1430917381535_1 +local-1430917381535_2 +DESCRIPTION +NAMESPACE +test_support/* +.*Rd +help/* +html/* +INDEX +.lintr +gen-java.* +.*avpr +org.apache.spark.sql.sources.DataSourceRegister +org.apache.spark.scheduler.SparkHistoryListenerFactory +.*parquet +LZ4BlockInputStream.java +spark-deps-.* +.*csv +.*tsv diff --git a/dev/check-license b/dev/check-license index 10740cfdc5..678e73fd60 100755 --- a/dev/check-license +++ b/dev/check-license @@ -58,7 +58,7 @@ else declare java_cmd=java fi -export RAT_VERSION=0.10 +export RAT_VERSION=0.11 export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar mkdir -p "$FWDIR"/lib @@ -67,14 +67,15 @@ mkdir -p "$FWDIR"/lib exit 1 } -$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes -d "$FWDIR" > rat-results.txt +mkdir target +$java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > target/rat-results.txt if [ $? -ne 0 ]; then echo "RAT exited abnormally" exit 1 fi -ERRORS="$(cat rat-results.txt | grep -e "??")" +ERRORS="$(cat target/rat-results.txt | grep -e "??")" if test ! -z "$ERRORS"; then echo "Could not find Apache license headers in the following files:" diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml new file mode 100644 index 0000000000..9242be3d03 --- /dev/null +++ b/dev/checkstyle-suppressions.xml @@ -0,0 +1,33 @@ + + + + + + + + + diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml new file mode 100644 index 0000000000..a165fee2a4 --- /dev/null +++ b/dev/checkstyle.xml @@ -0,0 +1,171 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c08b6d7de6..65e80fc760 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -165,7 +165,7 @@ if [[ "$1" == "package" ]]; then # Get maven home set by MVN MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'` - ./make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \ + ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \ -DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log cd .. cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz . diff --git a/dev/lint-python b/dev/lint-python index 068337d273..477ac0ef6d 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -37,7 +37,7 @@ compile_status="${PIPESTATUS[0]}" #+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162 #+ TODOs: #+ - Download pep8 from PyPI. It's more "official". -PEP8_VERSION="1.6.2" +PEP8_VERSION="1.7.0" PEP8_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pep8-$PEP8_VERSION.py" PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/$PEP8_VERSION/pep8.py" @@ -80,7 +80,7 @@ export "PATH=$PYTHONPATH:$PATH" #+ first, but we do so so that the check status can #+ be output before the report, like with the #+ scalastyle and RAT checks. -python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" +python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" pep8_status="${PIPESTATUS[0]}" if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then @@ -122,7 +122,7 @@ fi # for to_be_checked in "$PATHS_TO_CHECK" # do -# pylint --rcfile="$SPARK_ROOT_DIR/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH" +# pylint --rcfile="$SPARK_ROOT_DIR/python/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH" # done # if [ "${PIPESTATUS[0]}" -ne 0 ]; then diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh new file mode 100755 index 0000000000..ac4e9b90f0 --- /dev/null +++ b/dev/make-distribution.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Script to create a binary distribution for easy deploys of Spark. +# The distribution directory defaults to dist/ but can be overridden below. +# The distribution contains fat (assembly) jars that include the Scala library, +# so it is completely self contained. +# It does not contain source or *.class files. + +set -o pipefail +set -e +set -x + +# Figure out where the Spark framework is installed +SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)" +DISTDIR="$SPARK_HOME/dist" + +MAKE_TGZ=false +NAME=none +MVN="$SPARK_HOME/build/mvn" + +function exit_with_usage { + echo "make-distribution.sh - tool for making binary distributions of Spark" + echo "" + echo "usage:" + cl_options="[--name] [--tgz] [--mvn ]" + echo "make-distribution.sh $cl_options " + echo "See Spark's \"Building Spark\" doc for correct Maven options." + echo "" + exit 1 +} + +# Parse arguments +while (( "$#" )); do + case $1 in + --hadoop) + echo "Error: '--hadoop' is no longer supported:" + echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead." + echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4." + exit_with_usage + ;; + --with-yarn) + echo "Error: '--with-yarn' is no longer supported, use Maven option -Pyarn" + exit_with_usage + ;; + --with-hive) + echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver" + exit_with_usage + ;; + --tgz) + MAKE_TGZ=true + ;; + --mvn) + MVN="$2" + shift + ;; + --name) + NAME="$2" + shift + ;; + --help) + exit_with_usage + ;; + *) + break + ;; + esac + shift +done + +if [ -z "$JAVA_HOME" ]; then + # Fall back on JAVA_HOME from rpm, if found + if [ $(command -v rpm) ]; then + RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)" + if [ "$RPM_JAVA_HOME" != "%java_home" ]; then + JAVA_HOME="$RPM_JAVA_HOME" + echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm" + fi + fi +fi + +if [ -z "$JAVA_HOME" ]; then + echo "Error: JAVA_HOME is not set, cannot proceed." + exit -1 +fi + +if [ $(command -v git) ]; then + GITREV=$(git rev-parse --short HEAD 2>/dev/null || :) + if [ ! -z "$GITREV" ]; then + GITREVSTRING=" (git revision $GITREV)" + fi + unset GITREV +fi + + +if [ ! "$(command -v "$MVN")" ] ; then + echo -e "Could not locate Maven command: '$MVN'." + echo -e "Specify the Maven command with the --mvn flag" + exit -1; +fi + +VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null | grep -v "INFO" | tail -n 1) +SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\ + | grep -v "INFO"\ + | tail -n 1) +SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\ + | grep -v "INFO"\ + | tail -n 1) +SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\ + | grep -v "INFO"\ + | fgrep --count "hive";\ + # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\ + # because we use "set -o pipefail" + echo -n) + +if [ "$NAME" == "none" ]; then + NAME=$SPARK_HADOOP_VERSION +fi + +echo "Spark version is $VERSION" + +if [ "$MAKE_TGZ" == "true" ]; then + echo "Making spark-$VERSION-bin-$NAME.tgz" +else + echo "Making distribution for Spark $VERSION in $DISTDIR..." +fi + +# Build uber fat JAR +cd "$SPARK_HOME" + +export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m}" + +# Store the command as an array because $MVN variable might have spaces in it. +# Normal quoting tricks don't work. +# See: http://mywiki.wooledge.org/BashFAQ/050 +BUILD_COMMAND=("$MVN" clean package -DskipTests $@) + +# Actually build the jar +echo -e "\nBuilding with..." +echo -e "\$ ${BUILD_COMMAND[@]}\n" + +"${BUILD_COMMAND[@]}" + +# Make directories +rm -rf "$DISTDIR" +mkdir -p "$DISTDIR/lib" +echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE" +echo "Build flags: $@" >> "$DISTDIR/RELEASE" + +# Copy jars +cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" +cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" +# This will fail if the -Pyarn profile is not provided +# In this case, silence the error and ignore the return code of this command +cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || : + +# Copy example sources (needed for python and SQL) +mkdir -p "$DISTDIR/examples/src/main" +cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/" + +if [ "$SPARK_HIVE" == "1" ]; then + cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/" +fi + +# Copy license and ASF files +cp "$SPARK_HOME/LICENSE" "$DISTDIR" +cp -r "$SPARK_HOME/licenses" "$DISTDIR" +cp "$SPARK_HOME/NOTICE" "$DISTDIR" + +if [ -e "$SPARK_HOME"/CHANGES.txt ]; then + cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR" +fi + +# Copy data files +cp -r "$SPARK_HOME/data" "$DISTDIR" + +# Copy other things +mkdir "$DISTDIR"/conf +cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf +cp "$SPARK_HOME/README.md" "$DISTDIR" +cp -r "$SPARK_HOME/bin" "$DISTDIR" +cp -r "$SPARK_HOME/python" "$DISTDIR" +cp -r "$SPARK_HOME/sbin" "$DISTDIR" +# Copy SparkR if it exists +if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then + mkdir -p "$DISTDIR"/R/lib + cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib + cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib +fi + +if [ "$MAKE_TGZ" == "true" ]; then + TARDIR_NAME=spark-$VERSION-bin-$NAME + TARDIR="$SPARK_HOME/$TARDIR_NAME" + rm -rf "$TARDIR" + cp -r "$DISTDIR" "$TARDIR" + tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME" + rm -rf "$TARDIR" +fi diff --git a/dev/tox.ini b/dev/tox.ini new file mode 100644 index 0000000000..76e3f42cde --- /dev/null +++ b/dev/tox.ini @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[pep8] +max-line-length=100 +exclude=cloudpickle.py,heapq3.py,shared.py -- cgit v1.2.3