From b2bdd0e505f1ae3d39c46139f17bd43779ece635 Mon Sep 17 00:00:00 2001 From: Tathagata Das Date: Thu, 22 May 2014 20:48:55 -0700 Subject: Updated scripts for auditing releases - Added script to automatically generate change list CHANGES.txt - Added test for verifying linking against maven distributions of `spark-sql` and `spark-hive` - Added SBT projects for testing functionality of `spark-sql` and `spark-hive` - Fixed issues in existing tests that might have come up because of changes in Spark 1.0 Author: Tathagata Das Closes #844 from tdas/update-dev-scripts and squashes the following commits: 25090ba [Tathagata Das] Added missing license e2e20b3 [Tathagata Das] Updated tests for auditing releases. --- dev/audit-release/audit_release.py | 11 +- .../maven_app_core/src/main/java/SimpleApp.java | 1 + .../sbt_app_core/src/main/scala/SparkApp.scala | 2 +- dev/audit-release/sbt_app_hive/build.sbt | 29 +++ dev/audit-release/sbt_app_hive/data.txt | 9 + .../sbt_app_hive/src/main/resources/hive-site.xml | 213 +++++++++++++++++++++ .../sbt_app_hive/src/main/scala/HiveApp.scala | 57 ++++++ dev/audit-release/sbt_app_sql/build.sbt | 29 +++ .../sbt_app_sql/src/main/scala/SqlApp.scala | 57 ++++++ .../src/main/scala/StreamingApp.scala | 1 - 10 files changed, 403 insertions(+), 6 deletions(-) create mode 100644 dev/audit-release/sbt_app_hive/build.sbt create mode 100644 dev/audit-release/sbt_app_hive/data.txt create mode 100644 dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml create mode 100644 dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala create mode 100644 dev/audit-release/sbt_app_sql/build.sbt create mode 100644 dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala (limited to 'dev/audit-release') diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py index 4a816d4101..8c7573b91f 100755 --- a/dev/audit-release/audit_release.py +++ b/dev/audit-release/audit_release.py @@ -93,9 +93,12 @@ original_dir = os.getcwd() # For each of these modules, we'll test an 'empty' application in sbt and # maven that links against them. This will catch issues with messed up # dependencies within those projects. -modules = ["spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl", - "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka", - "spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq"] +modules = [ + "spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl", + "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka", + "spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq", + "spark-catalyst", "spark-sql", "spark-hive" +] modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules) # Check for directories that might interfere with tests @@ -122,7 +125,7 @@ for module in modules: os.chdir(original_dir) # SBT application tests -for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming"]: +for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive"]: os.chdir(app) ret = run_cmd("sbt clean run", exit_on_failure=False) test(ret == 0, "sbt application (%s)" % app) diff --git a/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java b/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java index 6b65dda39b..5217689e7c 100644 --- a/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java +++ b/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java @@ -37,5 +37,6 @@ public class SimpleApp { System.exit(-1); } System.out.println("Test succeeded"); + sc.stop(); } } diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala index a89b0d7d38..77bbd167b1 100644 --- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala +++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala @@ -19,6 +19,7 @@ package main.scala import scala.util.Try +import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ @@ -30,7 +31,6 @@ object SimpleApp { } val logFile = "input.txt" val sc = new SparkContext(conf) - SparkContext.jarOfClass(this.getClass).foreach(sc.addJar) val logData = sc.textFile(logFile, 2).cache() val numAs = logData.filter(line => line.contains("a")).count() val numBs = logData.filter(line => line.contains("b")).count() diff --git a/dev/audit-release/sbt_app_hive/build.sbt b/dev/audit-release/sbt_app_hive/build.sbt new file mode 100644 index 0000000000..7ac1be729c --- /dev/null +++ b/dev/audit-release/sbt_app_hive/build.sbt @@ -0,0 +1,29 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +name := "Simple Project" + +version := "1.0" + +scalaVersion := System.getenv.get("SCALA_VERSION") + +libraryDependencies += "org.apache.spark" %% "spark-hive" % System.getenv.get("SPARK_VERSION") + +resolvers ++= Seq( + "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"), + "Akka Repository" at "http://repo.akka.io/releases/", + "Spray Repository" at "http://repo.spray.cc/") diff --git a/dev/audit-release/sbt_app_hive/data.txt b/dev/audit-release/sbt_app_hive/data.txt new file mode 100644 index 0000000000..0229e67f51 --- /dev/null +++ b/dev/audit-release/sbt_app_hive/data.txt @@ -0,0 +1,9 @@ +0val_0 +1val_1 +2val_2 +3val_3 +4val_4 +5val_5 +6val_6 +7val_7 +9val_9 diff --git a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml new file mode 100644 index 0000000000..93b835813d --- /dev/null +++ b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml @@ -0,0 +1,213 @@ + + + + + + + + + + + + + + + + + + build.dir + ${user.dir}/build + + + + build.dir.hive + ${build.dir}/hive + + + + hadoop.tmp.dir + ${build.dir.hive}/test/hadoop-${user.name} + A base for other temporary directories. + + + + + + hive.exec.scratchdir + ${build.dir}/scratchdir + Scratch space for Hive jobs + + + + hive.exec.local.scratchdir + ${build.dir}/localscratchdir/ + Local scratch space for Hive jobs + + + + javax.jdo.option.ConnectionURL + + jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true + + + + javax.jdo.option.ConnectionDriverName + org.apache.derby.jdbc.EmbeddedDriver + + + + javax.jdo.option.ConnectionUserName + APP + + + + javax.jdo.option.ConnectionPassword + mine + + + + + hive.metastore.warehouse.dir + ${test.warehouse.dir} + + + + + hive.metastore.metadb.dir + ${build.dir}/test/data/metadb/ + + Required by metastore server or if the uris argument below is not supplied + + + + + test.log.dir + ${build.dir}/test/logs + + + + + test.src.dir + ${build.dir}/src/test + + + + + + + hive.jar.path + ${build.dir.hive}/ql/hive-exec-${version}.jar + + + + + hive.metastore.rawstore.impl + org.apache.hadoop.hive.metastore.ObjectStore + Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database + + + + hive.querylog.location + ${build.dir}/tmp + Location of the structured hive logs + + + + + + hive.task.progress + false + Track progress of a task + + + + hive.support.concurrency + false + Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks. + + + + fs.pfile.impl + org.apache.hadoop.fs.ProxyLocalFileSystem + A proxy for local file system used for cross file system testing + + + + hive.exec.mode.local.auto + false + + Let hive determine whether to run in local mode automatically + Disabling this for tests so that minimr is not affected + + + + + hive.auto.convert.join + false + Whether Hive enable the optimization about converting common join into mapjoin based on the input file size + + + + hive.ignore.mapjoin.hint + false + Whether Hive ignores the mapjoin hint + + + + hive.input.format + org.apache.hadoop.hive.ql.io.CombineHiveInputFormat + The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. + + + + hive.default.rcfile.serde + org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + The default SerDe hive will use for the rcfile format + + + diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala new file mode 100644 index 0000000000..7257d17d10 --- /dev/null +++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main.scala + +import scala.collection.mutable.{ListBuffer, Queue} + +import org.apache.spark.SparkConf +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.hive.LocalHiveContext + +case class Person(name: String, age: Int) + +object SparkSqlExample { + + def main(args: Array[String]) { + val conf = sys.env.get("SPARK_AUDIT_MASTER") match { + case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) + case None => new SparkConf().setAppName("Simple Sql App") + } + val sc = new SparkContext(conf) + val hiveContext = new LocalHiveContext(sc) + + import hiveContext._ + hql("DROP TABLE IF EXISTS src") + hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") + hql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src") + val results = hql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect() + results.foreach(println) + + def test(f: => Boolean, failureMsg: String) = { + if (!f) { + println(failureMsg) + System.exit(-1) + } + } + + test(results.size == 5, "Unexpected number of selected elements: " + results) + println("Test succeeded") + sc.stop() + } +} diff --git a/dev/audit-release/sbt_app_sql/build.sbt b/dev/audit-release/sbt_app_sql/build.sbt new file mode 100644 index 0000000000..6e0ad3b4b2 --- /dev/null +++ b/dev/audit-release/sbt_app_sql/build.sbt @@ -0,0 +1,29 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +name := "Simple Project" + +version := "1.0" + +scalaVersion := System.getenv.get("SCALA_VERSION") + +libraryDependencies += "org.apache.spark" %% "spark-sql" % System.getenv.get("SPARK_VERSION") + +resolvers ++= Seq( + "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"), + "Akka Repository" at "http://repo.akka.io/releases/", + "Spray Repository" at "http://repo.spray.cc/") diff --git a/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala new file mode 100644 index 0000000000..50af90c213 --- /dev/null +++ b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main.scala + +import scala.collection.mutable.{ListBuffer, Queue} + +import org.apache.spark.SparkConf +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SQLContext + +case class Person(name: String, age: Int) + +object SparkSqlExample { + + def main(args: Array[String]) { + val conf = sys.env.get("SPARK_AUDIT_MASTER") match { + case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) + case None => new SparkConf().setAppName("Simple Sql App") + } + val sc = new SparkContext(conf) + val sqlContext = new SQLContext(sc) + + import sqlContext._ + val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)) + people.registerAsTable("people") + val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") + val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect() + teenagerNames.foreach(println) + + def test(f: => Boolean, failureMsg: String) = { + if (!f) { + println(failureMsg) + System.exit(-1) + } + } + + test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames) + println("Test succeeded") + sc.stop() + } +} diff --git a/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala b/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala index a1d8971abe..58a662bd9b 100644 --- a/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala +++ b/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala @@ -32,7 +32,6 @@ object SparkStreamingExample { case None => new SparkConf().setAppName("Simple Streaming App") } val ssc = new StreamingContext(conf, Seconds(1)) - SparkContext.jarOfClass(this.getClass).foreach(ssc.sparkContext.addJar) val seen = ListBuffer[RDD[Int]]() val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10) -- cgit v1.2.3