aboutsummaryrefslogtreecommitdiff
path: root/dev
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-03-07 14:48:02 -0800
committerReynold Xin <rxin@databricks.com>2016-03-07 14:48:02 -0800
commit0eea12a3d956b54bbbd73d21b296868852a04494 (patch)
tree8c323563016c2865db40a1c69424316a978e3807 /dev
parent8577260abdc908ac08d28ddd3f07a2411fdc82b7 (diff)
downloadspark-0eea12a3d956b54bbbd73d21b296868852a04494.tar.gz
spark-0eea12a3d956b54bbbd73d21b296868852a04494.tar.bz2
spark-0eea12a3d956b54bbbd73d21b296868852a04494.zip
[SPARK-13596][BUILD] Move misc top-level build files into appropriate subdirs
## What changes were proposed in this pull request? Move many top-level files in dev/ or other appropriate directory. In particular, put `make-distribution.sh` in `dev` and update docs accordingly. Remove deprecated `sbt/sbt`. I was (so far) unable to figure out how to move `tox.ini`. `scalastyle-config.xml` should be movable but edits to the project `.sbt` files didn't work; config file location is updatable for compile but not test scope. ## How was this patch tested? `./dev/run-tests` to verify RAT and checkstyle work. Jenkins tests for the rest. Author: Sean Owen <sowen@cloudera.com> Closes #11522 from srowen/SPARK-13596.
Diffstat (limited to 'dev')
-rw-r--r--dev/.rat-excludes100
-rwxr-xr-xdev/check-license7
-rw-r--r--dev/checkstyle-suppressions.xml33
-rw-r--r--dev/checkstyle.xml171
-rwxr-xr-xdev/create-release/release-build.sh2
-rwxr-xr-xdev/lint-python6
-rwxr-xr-xdev/make-distribution.sh215
-rw-r--r--dev/tox.ini18
8 files changed, 545 insertions, 7 deletions
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
new file mode 100644
index 0000000000..8b5061415f
--- /dev/null
+++ b/dev/.rat-excludes
@@ -0,0 +1,100 @@
+target
+cache
+.gitignore
+.gitattributes
+.project
+.classpath
+.mima-excludes
+.generated-mima-excludes
+.generated-mima-class-excludes
+.generated-mima-member-excludes
+.rat-excludes
+.*md
+derby.log
+TAGS
+RELEASE
+control
+docs
+slaves
+spark-env.cmd
+bootstrap-tooltip.js
+jquery-1.11.1.min.js
+d3.min.js
+dagre-d3.min.js
+graphlib-dot.min.js
+sorttable.js
+vis.min.js
+vis.min.css
+dataTables.bootstrap.css
+dataTables.bootstrap.min.js
+dataTables.rowsGroup.js
+jquery.blockUI.min.js
+jquery.cookies.2.2.0.min.js
+jquery.dataTables.1.10.4.min.css
+jquery.dataTables.1.10.4.min.js
+jquery.mustache.js
+jsonFormatter.min.css
+jsonFormatter.min.js
+.*avsc
+.*txt
+.*json
+.*data
+.*log
+cloudpickle.py
+heapq3.py
+join.py
+SparkExprTyper.scala
+SparkILoop.scala
+SparkILoopInit.scala
+SparkIMain.scala
+SparkImports.scala
+SparkJLineCompletion.scala
+SparkJLineReader.scala
+SparkMemberHandlers.scala
+SparkReplReporter.scala
+sbt
+sbt-launch-lib.bash
+plugins.sbt
+work
+.*\.q
+.*\.qv
+golden
+test.out/*
+.*iml
+service.properties
+db.lck
+build/*
+dist/*
+.*out
+.*ipr
+.*iws
+logs
+.*scalastyle-output.xml
+.*dependency-reduced-pom.xml
+known_translations
+json_expectation
+local-1422981759269
+local-1422981780767
+local-1425081759269
+local-1426533911241
+local-1426633911242
+local-1430917381534
+local-1430917381535_1
+local-1430917381535_2
+DESCRIPTION
+NAMESPACE
+test_support/*
+.*Rd
+help/*
+html/*
+INDEX
+.lintr
+gen-java.*
+.*avpr
+org.apache.spark.sql.sources.DataSourceRegister
+org.apache.spark.scheduler.SparkHistoryListenerFactory
+.*parquet
+LZ4BlockInputStream.java
+spark-deps-.*
+.*csv
+.*tsv
diff --git a/dev/check-license b/dev/check-license
index 10740cfdc5..678e73fd60 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -58,7 +58,7 @@ else
declare java_cmd=java
fi
-export RAT_VERSION=0.10
+export RAT_VERSION=0.11
export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
mkdir -p "$FWDIR"/lib
@@ -67,14 +67,15 @@ mkdir -p "$FWDIR"/lib
exit 1
}
-$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes -d "$FWDIR" > rat-results.txt
+mkdir target
+$java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > target/rat-results.txt
if [ $? -ne 0 ]; then
echo "RAT exited abnormally"
exit 1
fi
-ERRORS="$(cat rat-results.txt | grep -e "??")"
+ERRORS="$(cat target/rat-results.txt | grep -e "??")"
if test ! -z "$ERRORS"; then
echo "Could not find Apache license headers in the following files:"
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
new file mode 100644
index 0000000000..9242be3d03
--- /dev/null
+++ b/dev/checkstyle-suppressions.xml
@@ -0,0 +1,33 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<!DOCTYPE suppressions PUBLIC
+"-//Puppy Crawl//DTD Suppressions 1.1//EN"
+"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
+
+<!--
+
+ This file contains suppression rules for Checkstyle checks.
+ Ideally only files that cannot be modified (e.g. third-party code)
+ should be added here. All other violations should be fixed.
+
+-->
+
+<suppressions>
+<suppress checks=".*"
+ files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
+</suppressions>
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
new file mode 100644
index 0000000000..a165fee2a4
--- /dev/null
+++ b/dev/checkstyle.xml
@@ -0,0 +1,171 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<!DOCTYPE module PUBLIC
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+
+<!--
+
+ Checkstyle configuration based on the Google coding conventions from:
+
+ - Google Java Style
+ https://google-styleguide.googlecode.com/svn-history/r130/trunk/javaguide.html
+
+ with Spark-specific changes from:
+
+ https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide
+
+ Checkstyle is very configurable. Be sure to read the documentation at
+ http://checkstyle.sf.net (or in your downloaded distribution).
+
+ Most Checks are configurable, be sure to consult the documentation.
+
+ To completely disable a check, just comment it out or delete it from the file.
+
+ Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+
+ -->
+
+<module name = "Checker">
+ <property name="charset" value="UTF-8"/>
+
+ <property name="severity" value="error"/>
+
+ <property name="fileExtensions" value="java, properties, xml"/>
+
+ <module name="SuppressionFilter">
+ <property name="file" value="checkstyle-suppressions.xml"/>
+ </module>
+
+ <!-- Checks for whitespace -->
+ <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+ <module name="FileTabCharacter">
+ <property name="eachLine" value="true"/>
+ </module>
+
+ <module name="RegexpSingleline">
+ <!-- \s matches whitespace character, $ matches end of line. -->
+ <property name="format" value="\s+$"/>
+ <property name="message" value="No trailing whitespace allowed."/>
+ </module>
+
+ <module name="TreeWalker">
+ <module name="OuterTypeFilename"/>
+ <module name="IllegalTokenText">
+ <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+ <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+ <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
+ </module>
+ <module name="AvoidEscapedUnicodeCharacters">
+ <property name="allowEscapesForControlCharacters" value="true"/>
+ <property name="allowByTailComment" value="true"/>
+ <property name="allowNonPrintableEscapes" value="true"/>
+ </module>
+ <!-- TODO: 11/09/15 disabled - the lengths are currently > 100 in many places -->
+ <!--
+ <module name="LineLength">
+ <property name="max" value="100"/>
+ <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+ </module>
+ -->
+ <module name="NoLineWrap"/>
+ <module name="EmptyBlock">
+ <property name="option" value="TEXT"/>
+ <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+ </module>
+ <module name="NeedBraces">
+ <property name="allowSingleLineStatement" value="true"/>
+ </module>
+ <module name="OneStatementPerLine"/>
+ <module name="ArrayTypeStyle"/>
+ <module name="FallThrough"/>
+ <module name="UpperEll"/>
+ <module name="ModifierOrder"/>
+ <module name="SeparatorWrap">
+ <property name="tokens" value="DOT"/>
+ <property name="option" value="nl"/>
+ </module>
+ <module name="SeparatorWrap">
+ <property name="tokens" value="COMMA"/>
+ <property name="option" value="EOL"/>
+ </module>
+ <module name="PackageName">
+ <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+ <message key="name.invalidPattern"
+ value="Package name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="ClassTypeParameterName">
+ <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
+ <message key="name.invalidPattern"
+ value="Class type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="MethodTypeParameterName">
+ <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
+ <message key="name.invalidPattern"
+ value="Method type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="NoFinalizer"/>
+ <module name="GenericWhitespace">
+ <message key="ws.followed"
+ value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+ <message key="ws.preceded"
+ value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+ <message key="ws.illegalFollow"
+ value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+ <message key="ws.notPreceded"
+ value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+ </module>
+ <!-- TODO: 11/09/15 disabled - indentation is currently inconsistent -->
+ <!--
+ <module name="Indentation">
+ <property name="basicOffset" value="4"/>
+ <property name="braceAdjustment" value="0"/>
+ <property name="caseIndent" value="4"/>
+ <property name="throwsIndent" value="4"/>
+ <property name="lineWrappingIndentation" value="4"/>
+ <property name="arrayInitIndent" value="4"/>
+ </module>
+ -->
+ <!-- TODO: 11/09/15 disabled - order is currently wrong in many places -->
+ <!--
+ <module name="ImportOrder">
+ <property name="separated" value="true"/>
+ <property name="ordered" value="true"/>
+ <property name="groups" value="/^javax?\./,scala,*,org.apache.spark"/>
+ </module>
+ -->
+ <module name="MethodParamPad"/>
+ <module name="AnnotationLocation">
+ <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+ </module>
+ <module name="AnnotationLocation">
+ <property name="tokens" value="VARIABLE_DEF"/>
+ <property name="allowSamelineMultipleAnnotations" value="true"/>
+ </module>
+ <module name="MethodName">
+ <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+ <message key="name.invalidPattern"
+ value="Method name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="EmptyCatchBlock">
+ <property name="exceptionVariableName" value="expected"/>
+ </module>
+ <module name="CommentsIndentation"/>
+ <module name="UnusedImports"/>
+ </module>
+</module>
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index c08b6d7de6..65e80fc760 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -165,7 +165,7 @@ if [[ "$1" == "package" ]]; then
# Get maven home set by MVN
MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
- ./make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
+ ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
-DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
cd ..
cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
diff --git a/dev/lint-python b/dev/lint-python
index 068337d273..477ac0ef6d 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -37,7 +37,7 @@ compile_status="${PIPESTATUS[0]}"
#+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
#+ TODOs:
#+ - Download pep8 from PyPI. It's more "official".
-PEP8_VERSION="1.6.2"
+PEP8_VERSION="1.7.0"
PEP8_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pep8-$PEP8_VERSION.py"
PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/$PEP8_VERSION/pep8.py"
@@ -80,7 +80,7 @@ export "PATH=$PYTHONPATH:$PATH"
#+ first, but we do so so that the check status can
#+ be output before the report, like with the
#+ scalastyle and RAT checks.
-python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
+python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
pep8_status="${PIPESTATUS[0]}"
if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
@@ -122,7 +122,7 @@ fi
# for to_be_checked in "$PATHS_TO_CHECK"
# do
-# pylint --rcfile="$SPARK_ROOT_DIR/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH"
+# pylint --rcfile="$SPARK_ROOT_DIR/python/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH"
# done
# if [ "${PIPESTATUS[0]}" -ne 0 ]; then
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
new file mode 100755
index 0000000000..ac4e9b90f0
--- /dev/null
+++ b/dev/make-distribution.sh
@@ -0,0 +1,215 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Script to create a binary distribution for easy deploys of Spark.
+# The distribution directory defaults to dist/ but can be overridden below.
+# The distribution contains fat (assembly) jars that include the Scala library,
+# so it is completely self contained.
+# It does not contain source or *.class files.
+
+set -o pipefail
+set -e
+set -x
+
+# Figure out where the Spark framework is installed
+SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
+DISTDIR="$SPARK_HOME/dist"
+
+MAKE_TGZ=false
+NAME=none
+MVN="$SPARK_HOME/build/mvn"
+
+function exit_with_usage {
+ echo "make-distribution.sh - tool for making binary distributions of Spark"
+ echo ""
+ echo "usage:"
+ cl_options="[--name] [--tgz] [--mvn <mvn-command>]"
+ echo "make-distribution.sh $cl_options <maven build options>"
+ echo "See Spark's \"Building Spark\" doc for correct Maven options."
+ echo ""
+ exit 1
+}
+
+# Parse arguments
+while (( "$#" )); do
+ case $1 in
+ --hadoop)
+ echo "Error: '--hadoop' is no longer supported:"
+ echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
+ echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+ exit_with_usage
+ ;;
+ --with-yarn)
+ echo "Error: '--with-yarn' is no longer supported, use Maven option -Pyarn"
+ exit_with_usage
+ ;;
+ --with-hive)
+ echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver"
+ exit_with_usage
+ ;;
+ --tgz)
+ MAKE_TGZ=true
+ ;;
+ --mvn)
+ MVN="$2"
+ shift
+ ;;
+ --name)
+ NAME="$2"
+ shift
+ ;;
+ --help)
+ exit_with_usage
+ ;;
+ *)
+ break
+ ;;
+ esac
+ shift
+done
+
+if [ -z "$JAVA_HOME" ]; then
+ # Fall back on JAVA_HOME from rpm, if found
+ if [ $(command -v rpm) ]; then
+ RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)"
+ if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
+ JAVA_HOME="$RPM_JAVA_HOME"
+ echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm"
+ fi
+ fi
+fi
+
+if [ -z "$JAVA_HOME" ]; then
+ echo "Error: JAVA_HOME is not set, cannot proceed."
+ exit -1
+fi
+
+if [ $(command -v git) ]; then
+ GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
+ if [ ! -z "$GITREV" ]; then
+ GITREVSTRING=" (git revision $GITREV)"
+ fi
+ unset GITREV
+fi
+
+
+if [ ! "$(command -v "$MVN")" ] ; then
+ echo -e "Could not locate Maven command: '$MVN'."
+ echo -e "Specify the Maven command with the --mvn flag"
+ exit -1;
+fi
+
+VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null | grep -v "INFO" | tail -n 1)
+SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
+ | grep -v "INFO"\
+ | tail -n 1)
+SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
+ | grep -v "INFO"\
+ | tail -n 1)
+SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
+ | grep -v "INFO"\
+ | fgrep --count "<id>hive</id>";\
+ # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
+ # because we use "set -o pipefail"
+ echo -n)
+
+if [ "$NAME" == "none" ]; then
+ NAME=$SPARK_HADOOP_VERSION
+fi
+
+echo "Spark version is $VERSION"
+
+if [ "$MAKE_TGZ" == "true" ]; then
+ echo "Making spark-$VERSION-bin-$NAME.tgz"
+else
+ echo "Making distribution for Spark $VERSION in $DISTDIR..."
+fi
+
+# Build uber fat JAR
+cd "$SPARK_HOME"
+
+export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m}"
+
+# Store the command as an array because $MVN variable might have spaces in it.
+# Normal quoting tricks don't work.
+# See: http://mywiki.wooledge.org/BashFAQ/050
+BUILD_COMMAND=("$MVN" clean package -DskipTests $@)
+
+# Actually build the jar
+echo -e "\nBuilding with..."
+echo -e "\$ ${BUILD_COMMAND[@]}\n"
+
+"${BUILD_COMMAND[@]}"
+
+# Make directories
+rm -rf "$DISTDIR"
+mkdir -p "$DISTDIR/lib"
+echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
+echo "Build flags: $@" >> "$DISTDIR/RELEASE"
+
+# Copy jars
+cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
+cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
+# This will fail if the -Pyarn profile is not provided
+# In this case, silence the error and ignore the return code of this command
+cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
+
+# Copy example sources (needed for python and SQL)
+mkdir -p "$DISTDIR/examples/src/main"
+cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"
+
+if [ "$SPARK_HIVE" == "1" ]; then
+ cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
+fi
+
+# Copy license and ASF files
+cp "$SPARK_HOME/LICENSE" "$DISTDIR"
+cp -r "$SPARK_HOME/licenses" "$DISTDIR"
+cp "$SPARK_HOME/NOTICE" "$DISTDIR"
+
+if [ -e "$SPARK_HOME"/CHANGES.txt ]; then
+ cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
+fi
+
+# Copy data files
+cp -r "$SPARK_HOME/data" "$DISTDIR"
+
+# Copy other things
+mkdir "$DISTDIR"/conf
+cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
+cp "$SPARK_HOME/README.md" "$DISTDIR"
+cp -r "$SPARK_HOME/bin" "$DISTDIR"
+cp -r "$SPARK_HOME/python" "$DISTDIR"
+cp -r "$SPARK_HOME/sbin" "$DISTDIR"
+# Copy SparkR if it exists
+if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then
+ mkdir -p "$DISTDIR"/R/lib
+ cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
+ cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib
+fi
+
+if [ "$MAKE_TGZ" == "true" ]; then
+ TARDIR_NAME=spark-$VERSION-bin-$NAME
+ TARDIR="$SPARK_HOME/$TARDIR_NAME"
+ rm -rf "$TARDIR"
+ cp -r "$DISTDIR" "$TARDIR"
+ tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
+ rm -rf "$TARDIR"
+fi
diff --git a/dev/tox.ini b/dev/tox.ini
new file mode 100644
index 0000000000..76e3f42cde
--- /dev/null
+++ b/dev/tox.ini
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[pep8]
+max-line-length=100
+exclude=cloudpickle.py,heapq3.py,shared.py