aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--README.md77
-rw-r--r--assembly/README13
-rw-r--r--assembly/lib/PY4J_LICENSE.txt27
-rw-r--r--assembly/lib/PY4J_VERSION.txt1
-rw-r--r--assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar (renamed from python/lib/py4j0.7.jar)bin103286 -> 103286 bytes
-rw-r--r--assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom9
-rw-r--r--assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml12
-rw-r--r--assembly/pom.xml159
-rw-r--r--assembly/src/main/assembly/assembly.xml85
-rw-r--r--bagel/pom.xml111
-rw-r--r--bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala (renamed from bagel/src/main/scala/spark/bagel/Bagel.scala)37
-rw-r--r--bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala (renamed from bagel/src/test/scala/bagel/BagelSuite.scala)10
-rw-r--r--bin/compute-classpath.cmd4
-rwxr-xr-xbin/compute-classpath.sh90
-rwxr-xr-xbin/slaves.sh4
-rwxr-xr-xbin/spark-daemon.sh24
-rwxr-xr-xbin/start-master.sh2
-rwxr-xr-xbin/start-slave.sh2
-rwxr-xr-xbin/start-slaves.sh2
-rwxr-xr-xbin/stop-all.sh1
-rwxr-xr-xbin/stop-master.sh2
-rwxr-xr-xbin/stop-slaves.sh4
-rw-r--r--conf/metrics.properties.template90
-rw-r--r--conf/slaves2
-rwxr-xr-xconf/spark-env.sh.template20
-rw-r--r--core/pom.xml228
-rw-r--r--core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala30
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileClient.java (renamed from core/src/main/java/spark/network/netty/FileClient.java)2
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java (renamed from core/src/main/java/spark/network/netty/FileClientChannelInitializer.java)2
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java (renamed from core/src/main/java/spark/network/netty/FileClientHandler.java)2
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileServer.java (renamed from core/src/main/java/spark/network/netty/FileServer.java)2
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java (renamed from core/src/main/java/spark/network/netty/FileServerChannelInitializer.java)2
-rw-r--r--core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java (renamed from core/src/main/java/spark/network/netty/FileServerHandler.java)2
-rwxr-xr-xcore/src/main/java/org/apache/spark/network/netty/PathResolver.java (renamed from core/src/main/java/spark/network/netty/PathResolver.java)2
-rwxr-xr-xcore/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css874
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/sorttable.js (renamed from core/src/main/resources/spark/ui/static/sorttable.js)0
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png (renamed from core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png)bin3536 -> 3536 bytes
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/spark_logo.png (renamed from core/src/main/resources/spark/ui/static/spark_logo.png)bin14233 -> 14233 bytes
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/webui.css63
-rw-r--r--core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css9
-rw-r--r--core/src/main/resources/spark/ui/static/bootstrap.min.css9
-rw-r--r--core/src/main/resources/spark/ui/static/webui.css49
-rw-r--r--core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala45
-rw-r--r--core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala69
-rw-r--r--core/src/main/scala/org/apache/spark/Accumulators.scala (renamed from core/src/main/scala/spark/Accumulators.scala)11
-rw-r--r--core/src/main/scala/org/apache/spark/Aggregator.scala (renamed from core/src/main/scala/spark/Aggregator.scala)20
-rw-r--r--core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala (renamed from core/src/main/scala/spark/BlockStoreShuffleFetcher.scala)21
-rw-r--r--core/src/main/scala/org/apache/spark/CacheManager.scala (renamed from core/src/main/scala/spark/CacheManager.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/Dependency.scala (renamed from core/src/main/scala/spark/Dependency.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/FetchFailedException.scala (renamed from core/src/main/scala/spark/FetchFailedException.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/HttpFileServer.scala (renamed from core/src/main/scala/spark/HttpFileServer.scala)3
-rw-r--r--core/src/main/scala/org/apache/spark/HttpServer.scala (renamed from core/src/main/scala/spark/HttpServer.scala)3
-rw-r--r--core/src/main/scala/org/apache/spark/Logging.scala (renamed from core/src/main/scala/spark/Logging.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/MapOutputTracker.scala (renamed from core/src/main/scala/spark/MapOutputTracker.scala)70
-rw-r--r--core/src/main/scala/org/apache/spark/Partition.scala (renamed from core/src/main/scala/spark/Partition.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/Partitioner.scala (renamed from core/src/main/scala/spark/Partitioner.scala)25
-rw-r--r--core/src/main/scala/org/apache/spark/SerializableWritable.scala (renamed from core/src/main/scala/spark/SerializableWritable.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/ShuffleFetcher.scala (renamed from core/src/main/scala/spark/ShuffleFetcher.scala)11
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala (renamed from core/src/main/scala/spark/SparkContext.scala)134
-rw-r--r--core/src/main/scala/org/apache/spark/SparkEnv.scala (renamed from core/src/main/scala/spark/SparkEnv.scala)78
-rw-r--r--core/src/main/scala/org/apache/spark/SparkException.scala (renamed from core/src/main/scala/spark/SparkException.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/SparkFiles.java (renamed from core/src/main/scala/spark/SparkFiles.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala (renamed from core/src/main/scala/spark/HadoopWriter.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/TaskContext.scala (renamed from core/src/main/scala/spark/TaskContext.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/TaskEndReason.scala (renamed from core/src/main/scala/spark/TaskEndReason.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/TaskState.scala (renamed from core/src/main/scala/spark/TaskState.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaDoubleRDD.scala)51
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaPairRDD.scala)70
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaRDD.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala (renamed from core/src/main/scala/spark/api/java/JavaRDDLike.scala)29
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala (renamed from core/src/main/scala/spark/api/java/JavaSparkContext.scala)30
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java (renamed from core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala28
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/StorageLevels.java (renamed from core/src/main/scala/spark/api/java/StorageLevels.java)4
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java (renamed from core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java (renamed from core/src/main/scala/spark/api/java/function/DoubleFunction.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala (renamed from core/src/main/scala/spark/api/java/function/FlatMapFunction.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala (renamed from core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/Function.java (renamed from core/src/main/scala/spark/api/java/function/Function.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/Function2.java (renamed from core/src/main/scala/spark/api/java/function/Function2.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java (renamed from core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java (renamed from core/src/main/scala/spark/api/java/function/PairFunction.java)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala (renamed from core/src/main/scala/spark/api/java/function/VoidFunction.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala (renamed from core/src/main/scala/spark/api/java/function/WrappedFunction1.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala (renamed from core/src/main/scala/spark/api/java/function/WrappedFunction2.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala (renamed from core/src/main/scala/spark/api/python/PythonPartitioner.scala)31
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala (renamed from core/src/main/scala/spark/api/python/PythonRDD.scala)87
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala (renamed from core/src/main/scala/spark/api/python/PythonWorkerFactory.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala (renamed from core/src/main/scala/spark/broadcast/Broadcast.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala (renamed from core/src/main/scala/spark/broadcast/BroadcastFactory.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/HttpBroadcast.scala)40
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala (renamed from core/src/main/scala/spark/broadcast/MultiTracker.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala (renamed from core/src/main/scala/spark/broadcast/SourceInfo.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/TreeBroadcast.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala (renamed from core/src/main/scala/spark/deploy/ApplicationDescription.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/Command.scala (renamed from core/src/main/scala/spark/deploy/Command.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala130
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala (renamed from core/src/main/scala/spark/deploy/ExecutorState.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala (renamed from core/src/main/scala/spark/deploy/JsonProtocol.scala)16
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala (renamed from core/src/main/scala/spark/deploy/LocalSparkCluster.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala (renamed from core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala)18
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/WebUI.scala (renamed from core/src/main/scala/spark/deploy/WebUI.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/client/Client.scala (renamed from core/src/main/scala/spark/deploy/client/Client.scala)23
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala (renamed from core/src/main/scala/spark/deploy/client/ClientListener.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala (renamed from core/src/main/scala/spark/deploy/client/TestClient.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala (renamed from core/src/main/scala/spark/deploy/client/TestExecutor.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala (renamed from core/src/main/scala/spark/deploy/master/ApplicationInfo.scala)11
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala24
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala (renamed from core/src/main/scala/spark/deploy/master/ApplicationState.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala (renamed from core/src/main/scala/spark/deploy/master/ExecutorInfo.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/Master.scala (renamed from core/src/main/scala/spark/deploy/master/Master.scala)97
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala (renamed from core/src/main/scala/spark/deploy/master/MasterArguments.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala25
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala (renamed from core/src/main/scala/spark/deploy/master/WorkerInfo.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala (renamed from core/src/main/scala/spark/deploy/master/WorkerState.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala (renamed from core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala)41
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala (renamed from core/src/main/scala/spark/deploy/master/ui/IndexPage.scala)67
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala (renamed from core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala)27
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala (renamed from core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala)50
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala (renamed from core/src/main/scala/spark/deploy/worker/Worker.scala)47
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala (renamed from core/src/main/scala/spark/deploy/worker/WorkerArguments.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala34
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala (renamed from core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala)46
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala (renamed from core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala)41
-rw-r--r--core/src/main/scala/org/apache/spark/executor/Executor.scala (renamed from core/src/main/scala/spark/executor/Executor.scala)73
-rw-r--r--core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/ExecutorBackend.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala (renamed from core/src/main/scala/spark/executor/ExecutorExitCode.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala55
-rw-r--r--core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala (renamed from core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/MesosExecutorBackend.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala)33
-rw-r--r--core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala (renamed from core/src/main/scala/spark/executor/TaskMetrics.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/io/CompressionCodec.scala82
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala100
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala163
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala59
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala68
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala (renamed from core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala)20
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala55
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala (renamed from core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala)17
-rw-r--r--core/src/main/scala/org/apache/spark/metrics/source/Source.scala25
-rw-r--r--core/src/main/scala/org/apache/spark/network/BufferMessage.scala (renamed from core/src/main/scala/spark/network/BufferMessage.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/network/Connection.scala (renamed from core/src/main/scala/spark/network/Connection.scala)50
-rw-r--r--core/src/main/scala/org/apache/spark/network/ConnectionManager.scala (renamed from core/src/main/scala/spark/network/ConnectionManager.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala (renamed from core/src/main/scala/spark/network/ConnectionManagerId.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala (renamed from core/src/main/scala/spark/network/ConnectionManagerTest.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/network/Message.scala (renamed from core/src/main/scala/spark/network/Message.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/network/MessageChunk.scala (renamed from core/src/main/scala/spark/network/MessageChunk.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala (renamed from core/src/main/scala/spark/network/MessageChunkHeader.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/network/ReceiverTest.scala (renamed from core/src/main/scala/spark/network/ReceiverTest.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/network/SenderTest.scala (renamed from core/src/main/scala/spark/network/SenderTest.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala (renamed from core/src/main/scala/spark/network/netty/FileHeader.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala (renamed from core/src/main/scala/spark/network/netty/ShuffleCopier.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala (renamed from core/src/main/scala/spark/network/netty/ShuffleSender.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/package.scala35
-rw-r--r--core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala (renamed from core/src/main/scala/spark/partial/ApproximateActionListener.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala (renamed from core/src/main/scala/spark/partial/ApproximateEvaluator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala (renamed from core/src/main/scala/spark/partial/BoundedDouble.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala (renamed from core/src/main/scala/spark/partial/CountEvaluator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedCountEvaluator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedSumEvaluator.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala (renamed from core/src/main/scala/spark/partial/MeanEvaluator.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/partial/PartialResult.scala (renamed from core/src/main/scala/spark/partial/PartialResult.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala (renamed from core/src/main/scala/spark/partial/StudentTCacher.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala (renamed from core/src/main/scala/spark/partial/SumEvaluator.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala (renamed from core/src/main/scala/spark/rdd/BlockRDD.scala)13
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala (renamed from core/src/main/scala/spark/rdd/CartesianRDD.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala (renamed from core/src/main/scala/spark/rdd/CheckpointRDD.scala)20
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala (renamed from core/src/main/scala/spark/rdd/CoGroupedRDD.scala)60
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala342
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala (renamed from core/src/main/scala/spark/DoubleRDDFunctions.scala)25
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala (renamed from core/src/main/scala/spark/rdd/EmptyRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala (renamed from core/src/main/scala/spark/rdd/FilteredRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala (renamed from core/src/main/scala/spark/rdd/FlatMappedRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala (renamed from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala)22
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala (renamed from core/src/main/scala/spark/rdd/GlommedRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala (renamed from core/src/main/scala/spark/rdd/HadoopRDD.scala)21
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala (renamed from core/src/main/scala/spark/rdd/JdbcRDD.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala (renamed from core/src/main/scala/spark/rdd/MapPartitionsRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala (renamed from core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala (renamed from core/src/main/scala/spark/rdd/MappedRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala (renamed from core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala)19
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala (renamed from core/src/main/scala/spark/rdd/NewHadoopRDD.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala52
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala (renamed from core/src/main/scala/spark/PairRDDFunctions.scala)204
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala (renamed from core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala)67
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala (renamed from core/src/main/scala/spark/rdd/PartitionPruningRDD.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala (renamed from core/src/main/scala/spark/rdd/PipedRDD.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala (renamed from core/src/main/scala/spark/RDD.scala)93
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala (renamed from core/src/main/scala/spark/RDDCheckpointData.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala (renamed from core/src/main/scala/spark/rdd/SampledRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala (renamed from core/src/main/scala/spark/SequenceFileRDDFunctions.scala)26
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala (renamed from core/src/main/scala/spark/rdd/ShuffledRDD.scala)35
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala (renamed from core/src/main/scala/spark/rdd/SubtractedRDD.scala)47
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala (renamed from core/src/main/scala/spark/rdd/UnionRDD.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala (renamed from core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala)34
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala (renamed from core/src/main/scala/spark/rdd/ZippedRDD.scala)27
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala (renamed from core/src/main/scala/spark/scheduler/ActiveJob.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala (renamed from core/src/main/scala/spark/scheduler/DAGScheduler.scala)294
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala (renamed from core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala)15
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala30
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala (renamed from core/src/main/scala/spark/scheduler/InputFormatInfo.scala)11
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/JobListener.scala (renamed from core/src/main/scala/spark/scheduler/JobListener.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala (renamed from core/src/main/scala/spark/scheduler/JobLogger.scala)108
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/JobResult.scala (renamed from core/src/main/scala/spark/scheduler/JobResult.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala (renamed from core/src/main/scala/spark/scheduler/JobWaiter.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala (renamed from core/src/main/scala/spark/scheduler/MapStatus.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala (renamed from core/src/main/scala/spark/scheduler/ResultTask.scala)38
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala (renamed from core/src/main/scala/spark/scheduler/ShuffleMapTask.scala)44
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala (renamed from core/src/main/scala/spark/scheduler/SparkListener.scala)36
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala74
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala (renamed from core/src/main/scala/spark/scheduler/SplitInfo.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/Stage.scala (renamed from core/src/main/scala/spark/scheduler/Stage.scala)16
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala (renamed from core/src/main/scala/spark/scheduler/StageInfo.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/Task.scala (renamed from core/src/main/scala/spark/scheduler/Task.scala)12
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala34
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala (renamed from core/src/main/scala/spark/scheduler/TaskResult.scala)28
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala (renamed from core/src/main/scala/spark/scheduler/TaskScheduler.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala (renamed from core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala)13
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala (renamed from core/src/main/scala/spark/scheduler/TaskSet.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala440
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala712
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala (renamed from core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala (renamed from core/src/main/scala/spark/scheduler/cluster/Pool.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala (renamed from core/src/main/scala/spark/scheduler/cluster/Schedulable.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala)103
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala (renamed from core/src/main/scala/spark/SoftReferenceCache.scala)20
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala)17
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala62
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala)34
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala)19
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala32
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala)35
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala (renamed from core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala (renamed from core/src/main/scala/spark/scheduler/local/LocalScheduler.scala)67
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala (renamed from core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala)112
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala)37
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala)26
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala (renamed from core/src/main/scala/spark/JavaSerializer.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala159
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/Serializer.scala (renamed from core/src/main/scala/spark/serializer/Serializer.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala (renamed from core/src/main/scala/spark/serializer/SerializerManager.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockException.scala (renamed from core/src/main/scala/spark/storage/BlockException.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala (renamed from core/src/main/scala/spark/storage/BlockFetchTracker.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala (renamed from core/src/main/scala/spark/storage/BlockFetcherIterator.scala)23
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManager.scala (renamed from core/src/main/scala/spark/storage/BlockManager.scala)88
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala (renamed from core/src/main/scala/spark/storage/BlockManagerId.scala)8
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala (renamed from core/src/main/scala/spark/storage/BlockManagerMaster.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala (renamed from core/src/main/scala/spark/storage/BlockManagerMasterActor.scala)21
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala110
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala (renamed from core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala48
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala (renamed from core/src/main/scala/spark/storage/BlockManagerWorker.scala)7
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockMessage.scala (renamed from core/src/main/scala/spark/storage/BlockMessage.scala)5
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala (renamed from core/src/main/scala/spark/storage/BlockMessageArray.scala)11
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala (renamed from core/src/main/scala/spark/storage/BlockObjectWriter.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockStore.scala (renamed from core/src/main/scala/spark/storage/BlockStore.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/storage/DiskStore.scala (renamed from core/src/main/scala/spark/storage/DiskStore.scala)19
-rw-r--r--core/src/main/scala/org/apache/spark/storage/MemoryStore.scala (renamed from core/src/main/scala/spark/storage/MemoryStore.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/storage/PutResult.scala (renamed from core/src/main/scala/spark/storage/PutResult.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala (renamed from core/src/main/scala/spark/storage/ShuffleBlockManager.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/storage/StorageLevel.scala (renamed from core/src/main/scala/spark/storage/StorageLevel.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/storage/StorageUtils.scala (renamed from core/src/main/scala/spark/storage/StorageUtils.scala)9
-rw-r--r--core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala (renamed from core/src/main/scala/spark/storage/ThreadingTest.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/ui/JettyUtils.scala (renamed from core/src/main/scala/spark/ui/JettyUtils.scala)15
-rw-r--r--core/src/main/scala/org/apache/spark/ui/Page.scala (renamed from core/src/main/scala/spark/ui/Page.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/ui/SparkUI.scala (renamed from core/src/main/scala/spark/ui/SparkUI.scala)35
-rw-r--r--core/src/main/scala/org/apache/spark/ui/UIUtils.scala (renamed from core/src/main/scala/spark/ui/UIUtils.scala)98
-rw-r--r--core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala (renamed from core/src/main/scala/spark/ui/UIWorkloadGenerator.scala)49
-rw-r--r--core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala (renamed from core/src/main/scala/spark/ui/env/EnvironmentUI.scala)57
-rw-r--r--core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala137
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala90
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala156
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala61
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala32
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala55
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala183
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala107
-rw-r--r--core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala (renamed from core/src/main/scala/spark/ui/storage/BlockManagerUI.scala)6
-rw-r--r--core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala (renamed from core/src/main/scala/spark/ui/storage/IndexPage.scala)38
-rw-r--r--core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala (renamed from core/src/main/scala/spark/ui/storage/RDDPage.scala)44
-rw-r--r--core/src/main/scala/org/apache/spark/util/AkkaUtils.scala (renamed from core/src/main/scala/spark/util/AkkaUtils.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala (renamed from core/src/main/scala/spark/util/BoundedPriorityQueue.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala (renamed from core/src/main/scala/spark/util/ByteBufferInputStream.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/util/Clock.scala29
-rw-r--r--core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala (renamed from core/src/main/scala/spark/ClosureCleaner.scala)3
-rw-r--r--core/src/main/scala/org/apache/spark/util/CompletionIterator.scala (renamed from core/src/main/scala/spark/util/CompletionIterator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/Distribution.scala (renamed from core/src/main/scala/spark/util/Distribution.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/IdGenerator.scala (renamed from core/src/main/scala/spark/util/IdGenerator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/IntParam.scala (renamed from core/src/main/scala/spark/util/IntParam.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/MemoryParam.scala (renamed from core/src/main/scala/spark/util/MemoryParam.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala (renamed from core/src/main/scala/spark/util/MetadataCleaner.scala)4
-rw-r--r--core/src/main/scala/org/apache/spark/util/MutablePair.scala (renamed from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala)24
-rw-r--r--core/src/main/scala/org/apache/spark/util/NextIterator.scala (renamed from core/src/main/scala/spark/util/NextIterator.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala (renamed from core/src/main/scala/spark/util/RateLimitedOutputStream.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala (renamed from core/src/main/scala/spark/util/SerializableBuffer.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/SizeEstimator.scala (renamed from core/src/main/scala/spark/SizeEstimator.scala)3
-rw-r--r--core/src/main/scala/org/apache/spark/util/StatCounter.scala (renamed from core/src/main/scala/spark/util/StatCounter.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala (renamed from core/src/main/scala/spark/util/TimeStampedHashMap.scala)10
-rw-r--r--core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala (renamed from core/src/main/scala/spark/util/TimeStampedHashSet.scala)2
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala (renamed from core/src/main/scala/spark/Utils.scala)110
-rw-r--r--core/src/main/scala/org/apache/spark/util/Vector.scala (renamed from core/src/main/scala/spark/util/Vector.scala)9
-rw-r--r--core/src/main/scala/spark/Cache.scala80
-rw-r--r--core/src/main/scala/spark/KryoSerializer.scala241
-rw-r--r--core/src/main/scala/spark/deploy/DeployMessage.scala125
-rw-r--r--core/src/main/scala/spark/package.scala32
-rw-r--r--core/src/main/scala/spark/rdd/CoalescedRDD.scala81
-rw-r--r--core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala631
-rw-r--r--core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala765
-rw-r--r--core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala62
-rw-r--r--core/src/main/scala/spark/storage/BlockManagerMessages.scala123
-rw-r--r--core/src/main/scala/spark/ui/jobs/IndexPage.scala129
-rw-r--r--core/src/main/scala/spark/ui/jobs/JobProgressUI.scala144
-rw-r--r--core/src/main/scala/spark/ui/jobs/StagePage.scala131
-rw-r--r--core/src/test/resources/test_metrics_config.properties6
-rw-r--r--core/src/test/resources/test_metrics_system.properties7
-rw-r--r--core/src/test/scala/org/apache/spark/AccumulatorSuite.scala (renamed from core/src/test/scala/spark/AccumulatorSuite.scala)4
-rw-r--r--core/src/test/scala/org/apache/spark/BroadcastSuite.scala (renamed from core/src/test/scala/spark/BroadcastSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/CheckpointSuite.scala (renamed from core/src/test/scala/spark/CheckpointSuite.scala)9
-rw-r--r--core/src/test/scala/org/apache/spark/DistributedSuite.scala (renamed from core/src/test/scala/spark/DistributedSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/DriverSuite.scala (renamed from core/src/test/scala/spark/DriverSuite.scala)5
-rw-r--r--core/src/test/scala/org/apache/spark/FailureSuite.scala (renamed from core/src/test/scala/spark/FailureSuite.scala)42
-rw-r--r--core/src/test/scala/org/apache/spark/FileServerSuite.scala (renamed from core/src/test/scala/spark/FileServerSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/FileSuite.scala (renamed from core/src/test/scala/spark/FileSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/JavaAPISuite.java (renamed from core/src/test/scala/spark/JavaAPISuite.java)54
-rw-r--r--core/src/test/scala/org/apache/spark/LocalSparkContext.scala (renamed from core/src/test/scala/spark/LocalSparkContext.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala (renamed from core/src/test/scala/spark/MapOutputTrackerSuite.scala)20
-rw-r--r--core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala28
-rw-r--r--core/src/test/scala/org/apache/spark/PartitioningSuite.scala (renamed from core/src/test/scala/spark/PartitioningSuite.scala)13
-rw-r--r--core/src/test/scala/org/apache/spark/PipedRDDSuite.scala (renamed from core/src/test/scala/spark/PipedRDDSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/SharedSparkContext.scala (renamed from core/src/test/scala/spark/SharedSparkContext.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala (renamed from core/src/test/scala/spark/ShuffleNettySuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/ShuffleSuite.scala (renamed from core/src/test/scala/spark/ShuffleSuite.scala)105
-rw-r--r--core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala60
-rw-r--r--core/src/test/scala/org/apache/spark/ThreadingSuite.scala (renamed from core/src/test/scala/spark/ThreadingSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/UnpersistSuite.scala (renamed from core/src/test/scala/spark/UnpersistSuite.scala)4
-rw-r--r--core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala (renamed from core/src/test/scala/spark/ZippedPartitionsSuite.scala)4
-rw-r--r--core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala62
-rw-r--r--core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala89
-rw-r--r--core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala54
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala (renamed from core/src/test/scala/spark/rdd/JdbcRDDSuite.scala)6
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala (renamed from core/src/test/scala/spark/PairRDDFunctionsSuite.scala)10
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala (renamed from core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala (renamed from core/src/test/scala/spark/RDDSuite.scala)71
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala (renamed from core/src/test/scala/spark/SortingSuite.scala)6
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala)57
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala (renamed from core/src/test/scala/spark/scheduler/JobLoggerSuite.scala)14
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala (renamed from core/src/test/scala/spark/scheduler/SparkListenerSuite.scala)6
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala (renamed from core/src/test/scala/spark/scheduler/TaskContextSuite.scala)12
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala)41
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala273
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala26
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala)56
-rw-r--r--core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala (renamed from core/src/test/scala/spark/KryoSerializerSuite.scala)71
-rw-r--r--core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala (renamed from core/src/test/scala/spark/storage/BlockManagerSuite.scala)29
-rw-r--r--core/src/test/scala/org/apache/spark/ui/UISuite.scala (renamed from core/src/test/scala/spark/ui/UISuite.scala)9
-rw-r--r--core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala (renamed from core/src/test/scala/spark/ClosureCleanerSuite.scala)7
-rw-r--r--core/src/test/scala/org/apache/spark/util/DistributionSuite.scala (renamed from core/src/test/scala/spark/util/DistributionSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/util/FakeClock.scala26
-rw-r--r--core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala (renamed from core/src/test/scala/spark/util/NextIteratorSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala (renamed from core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala (renamed from core/src/test/scala/spark/SizeEstimatorSuite.scala)2
-rw-r--r--core/src/test/scala/org/apache/spark/util/UtilsSuite.scala (renamed from core/src/test/scala/spark/UtilsSuite.scala)18
-rw-r--r--docs/README.md2
-rwxr-xr-xdocs/_layouts/global.html34
-rw-r--r--docs/_plugins/copy_api_dirs.rb4
-rw-r--r--docs/bagel-programming-guide.md37
-rw-r--r--docs/building-with-maven.md58
-rw-r--r--docs/configuration.md152
-rw-r--r--docs/css/bootstrap.min.css2
-rw-r--r--docs/ec2-scripts.md32
-rw-r--r--docs/hardware-provisioning.md69
-rw-r--r--docs/img/incubator-logo.pngbin0 -> 11651 bytes
-rw-r--r--docs/index.md77
-rw-r--r--docs/java-programming-guide.md33
-rw-r--r--docs/mllib-guide.md6
-rw-r--r--docs/python-programming-guide.md47
-rw-r--r--docs/quick-start.md79
-rw-r--r--docs/running-on-mesos.md3
-rw-r--r--docs/running-on-yarn.md55
-rw-r--r--docs/scala-programming-guide.md36
-rw-r--r--docs/spark-debugger.md4
-rw-r--r--docs/spark-simple-tutorial.md41
-rw-r--r--docs/spark-standalone.md58
-rw-r--r--docs/streaming-custom-receivers.md61
-rw-r--r--docs/streaming-programming-guide.md45
-rw-r--r--docs/tuning.md13
-rw-r--r--ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh9
-rw-r--r--ec2/deploy.generic/root/spark-ec2/ec2-variables.sh18
-rwxr-xr-xec2/spark_ec2.py277
-rw-r--r--examples/pom.xml293
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java (renamed from examples/src/main/java/spark/examples/JavaHdfsLR.java)10
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaKMeans.java (renamed from examples/src/main/java/spark/examples/JavaKMeans.java)14
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java (renamed from examples/src/main/java/spark/examples/JavaLogQuery.java)12
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaPageRank.java115
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java (renamed from examples/src/main/java/spark/examples/JavaSparkPi.java)10
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaTC.java (renamed from examples/src/main/java/spark/examples/JavaTC.java)8
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaWordCount.java (renamed from examples/src/main/java/spark/examples/JavaWordCount.java)14
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java87
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java81
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java85
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java)10
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java)16
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java (renamed from examples/src/main/java/spark/streaming/examples/JavaQueueStream.java)16
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala (renamed from examples/src/main/scala/spark/examples/BroadcastTest.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala (renamed from examples/src/main/scala/spark/examples/CassandraTest.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala (renamed from examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala (renamed from examples/src/main/scala/spark/examples/GroupByTest.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala (renamed from examples/src/main/scala/spark/examples/HBaseTest.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala (renamed from examples/src/main/scala/spark/examples/HdfsTest.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalALS.scala (renamed from examples/src/main/scala/spark/examples/LocalALS.scala)2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala (renamed from examples/src/main/scala/spark/examples/LocalFileLR.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala (renamed from examples/src/main/scala/spark/examples/LocalKMeans.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalLR.scala (renamed from examples/src/main/scala/spark/examples/LocalLR.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalPi.scala (renamed from examples/src/main/scala/spark/examples/LocalPi.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LogQuery.scala (renamed from examples/src/main/scala/spark/examples/LogQuery.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala (renamed from examples/src/main/scala/spark/examples/MultiBroadcastTest.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SkewedGroupByTest.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkALS.scala (renamed from examples/src/main/scala/spark/examples/SparkALS.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala (renamed from examples/src/main/scala/spark/examples/SparkHdfsLR.scala)11
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala (renamed from examples/src/main/scala/spark/examples/SparkKMeans.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkLR.scala (renamed from examples/src/main/scala/spark/examples/SparkLR.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala46
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkPi.scala (renamed from examples/src/main/scala/spark/examples/SparkPi.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkTC.scala (renamed from examples/src/main/scala/spark/examples/SparkTC.scala)4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala (renamed from bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala)11
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala (renamed from bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala)12
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala (renamed from bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala)21
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala)18
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala)14
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/QueueStream.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala (renamed from examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala)12
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala)10
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala)8
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala)10
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala)6
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala)14
-rwxr-xr-xmake-distribution.sh70
-rw-r--r--mllib/pom.xml75
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala21
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala189
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala188
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/KMeans.scala)30
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala)8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala)2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala98
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala168
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala29
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala99
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala (renamed from mllib/src/main/scala/spark/mllib/recommendation/ALS.scala)72
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala (renamed from mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala)15
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala160
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala211
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala168
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala (renamed from mllib/src/main/scala/spark/mllib/regression/Regression.scala)6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala214
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala (renamed from core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala)41
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala85
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala133
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala (renamed from mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala)56
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala114
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala (renamed from mllib/src/main/scala/spark/mllib/util/MLUtils.scala)51
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala51
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/Gradient.scala50
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala79
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/Updater.scala44
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala175
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala211
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala72
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java98
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java98
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java115
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java110
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java97
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java94
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java110
-rw-r--r--mllib/src/test/resources/log4j.properties2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala150
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala169
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala (renamed from mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala)13
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala (renamed from mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala)66
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala121
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala72
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala90
-rw-r--r--mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala74
-rw-r--r--mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala64
-rw-r--r--pagerank_data.txt6
-rw-r--r--pom.xml316
-rw-r--r--project/SparkBuild.scala176
-rw-r--r--project/build.properties2
-rw-r--r--project/plugins.sbt6
-rwxr-xr-xpyspark25
-rwxr-xr-xpython/examples/als.py5
-rwxr-xr-x[-rw-r--r--]python/examples/kmeans.py3
-rwxr-xr-xpython/examples/logistic_regression.py54
-rwxr-xr-xpython/examples/pagerank.py70
-rwxr-xr-x[-rw-r--r--]python/examples/pi.py3
-rwxr-xr-x[-rw-r--r--]python/examples/transitive_closure.py5
-rwxr-xr-x[-rw-r--r--]python/examples/wordcount.py5
-rw-r--r--python/pyspark/context.py29
-rw-r--r--python/pyspark/files.py2
-rw-r--r--python/pyspark/java_gateway.py12
-rw-r--r--python/pyspark/rdd.py191
-rw-r--r--python/pyspark/rddsampler.py112
-rw-r--r--python/pyspark/shell.py20
-rw-r--r--python/pyspark/statcounter.py109
-rw-r--r--python/pyspark/tests.py24
-rw-r--r--python/pyspark/worker.py13
-rwxr-xr-xpython/run-tests25
-rw-r--r--python/test_support/userlib-0.1-py2.7.eggbin0 -> 1945 bytes
-rw-r--r--repl-bin/pom.xml168
-rwxr-xr-xrepl-bin/src/deb/bin/run2
-rwxr-xr-xrepl-bin/src/deb/bin/spark-executor2
-rwxr-xr-xrepl-bin/src/deb/bin/spark-shell2
-rw-r--r--repl/pom.xml223
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala (renamed from repl/src/main/scala/spark/repl/ExecutorClassLoader.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/Main.scala (renamed from repl/src/main/scala/spark/repl/Main.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala (renamed from repl/src/main/scala/spark/repl/SparkHelper.scala)0
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala (renamed from repl/src/main/scala/spark/repl/SparkILoop.scala)22
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala (renamed from repl/src/main/scala/spark/repl/SparkIMain.scala)10
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala (renamed from repl/src/main/scala/spark/repl/SparkISettings.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkImports.scala (renamed from repl/src/main/scala/spark/repl/SparkImports.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala (renamed from repl/src/main/scala/spark/repl/SparkJLineCompletion.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala (renamed from repl/src/main/scala/spark/repl/SparkJLineReader.scala)2
-rw-r--r--repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala (renamed from repl/src/main/scala/spark/repl/SparkMemberHandlers.scala)2
-rw-r--r--repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala (renamed from repl/src/test/scala/spark/repl/ReplSuite.scala)6
-rwxr-xr-xrun-example81
-rwxr-xr-xsbt/sbt6
-rwxr-xr-xspark-class (renamed from run)80
-rwxr-xr-xspark-executor2
-rwxr-xr-xspark-shell7
-rw-r--r--spark-shell.cmd2
-rw-r--r--streaming/pom.xml110
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala (renamed from streaming/src/main/scala/spark/streaming/Checkpoint.scala)28
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/DStream.scala (renamed from streaming/src/main/scala/spark/streaming/DStream.scala)17
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala (renamed from streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala)4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala (renamed from streaming/src/main/scala/spark/streaming/DStreamGraph.scala)4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Duration.scala (renamed from streaming/src/main/scala/spark/streaming/Duration.scala)10
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Interval.scala (renamed from streaming/src/main/scala/spark/streaming/Interval.scala)2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Job.scala (renamed from streaming/src/main/scala/spark/streaming/Job.scala)2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala (renamed from streaming/src/main/scala/spark/streaming/JobManager.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala (renamed from streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala)12
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala (renamed from streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala)29
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala (renamed from streaming/src/main/scala/spark/streaming/Scheduler.scala)9
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala (renamed from streaming/src/main/scala/spark/streaming/StreamingContext.scala)21
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Time.scala (renamed from streaming/src/main/scala/spark/streaming/Time.scala)2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala)16
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala)10
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala)30
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala)34
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala)9
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala)17
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala)4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala)29
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala)4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala)16
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala)9
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala)12
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala)10
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala (renamed from streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala)8
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala (renamed from streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala)4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala (renamed from streaming/src/main/scala/spark/streaming/util/Clock.scala)2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala (renamed from streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala)9
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala (renamed from streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala)6
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala (renamed from streaming/src/main/scala/spark/streaming/util/RawTextSender.scala)7
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala (renamed from streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala)2
-rw-r--r--streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java (renamed from streaming/src/test/java/spark/streaming/JavaAPISuite.java)32
-rw-r--r--streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala (renamed from streaming/src/test/java/spark/streaming/JavaTestUtils.scala)17
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala)6
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala (renamed from streaming/src/test/scala/spark/streaming/CheckpointSuite.scala)8
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala (renamed from streaming/src/test/scala/spark/streaming/FailureSuite.scala)6
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala)12
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala (renamed from streaming/src/test/scala/spark/streaming/TestSuiteBase.scala)11
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala)6
-rw-r--r--tools/pom.xml69
-rw-r--r--tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala360
-rw-r--r--yarn/pom.xml111
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala)32
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala)4
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala)21
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala)8
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala)15
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala)8
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala)42
-rw-r--r--yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala (renamed from core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala)13
616 files changed, 16182 insertions, 8290 deletions
diff --git a/.gitignore b/.gitignore
index ae39c52b11..e1f64a1133 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,3 +38,6 @@ dependency-reduced-pom.xml
.ensime_lucene
checkpoint
derby.log
+dist/
+spark-*-bin.tar.gz
+unit-tests.log
diff --git a/README.md b/README.md
index 4c91013f7e..11bf8edb3b 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
-# Spark
+# Apache Spark
-Lightning-Fast Cluster Computing - <http://www.spark-project.org/>
+Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/>
## Online Documentation
You can find the latest Spark documentation, including a programming
-guide, on the project webpage at <http://spark-project.org/documentation.html>.
+guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>.
This README file only contains basic setup instructions.
@@ -16,26 +16,24 @@ Spark requires Scala 2.9.3 (Scala 2.10 is not yet supported). The project is
built using Simple Build Tool (SBT), which is packaged with it. To build
Spark and its example programs, run:
- sbt/sbt package
+ sbt/sbt assembly
-Spark also supports building using Maven. If you would like to build using Maven,
-see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html)
-in the spark documentation..
+Once you've built Spark, the easiest way to start using it is the shell:
-To run Spark, you will need to have Scala's bin directory in your `PATH`, or
-you will need to set the `SCALA_HOME` environment variable to point to where
-you've installed Scala. Scala must be accessible through one of these
-methods on your cluster's worker nodes as well as its master.
+ ./spark-shell
-To run one of the examples, use `./run <class> <params>`. For example:
+Or, for the Python API, the Python shell (`./pyspark`).
- ./run spark.examples.SparkLR local[2]
+Spark also comes with several sample programs in the `examples` directory.
+To run one of them, use `./run-example <class> <params>`. For example:
+
+ ./run-example org.apache.spark.examples.SparkLR local[2]
will run the Logistic Regression example locally on 2 CPUs.
Each of the example programs prints usage help if no params are given.
-All of the Spark samples take a `<host>` parameter that is the cluster URL
+All of the Spark samples take a `<master>` parameter that is the cluster URL
to connect to. This can be a mesos:// or spark:// URL, or "local" to run
locally with one thread, or "local[N]" to run locally with N threads.
@@ -43,23 +41,52 @@ locally with one thread, or "local[N]" to run locally with N threads.
## A Note About Hadoop Versions
Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
-storage systems. Because the HDFS API has changed in different versions of
+storage systems. Because the protocols have changed in different versions of
Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `HADOOP_VERSION` variable at the top
-of `project/SparkBuild.scala`, then rebuilding Spark.
+You can change the version by setting the `SPARK_HADOOP_VERSION` environment
+when building Spark.
+For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop
+versions without YARN, use:
-## Configuration
+ # Apache Hadoop 1.2.1
+ $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
+
+ # Cloudera CDH 4.2.0 with MapReduce v1
+ $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
+
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
+with YARN, also set `SPARK_YARN=true`:
+
+ # Apache Hadoop 2.0.5-alpha
+ $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
+
+ # Cloudera CDH 4.2.0 with MapReduce v2
+ $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
-Please refer to the "Configuration" guide in the online documentation for a
-full overview on how to configure Spark. At the minimum, you will need to
-create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and
-set the following two variables:
+For convenience, these variables may also be set through the `conf/spark-env.sh` file
+described below.
-- `SCALA_HOME`: Location where Scala is installed.
+When developing a Spark application, specify the Hadoop version by adding the
+"hadoop-client" artifact to your project's dependencies. For example, if you're
+using Hadoop 1.0.1 and build your application using SBT, add this entry to
+`libraryDependencies`:
+
+ "org.apache.hadoop" % "hadoop-client" % "1.2.1"
+
+If your project is built with Maven, add this to your POM file's `<dependencies>` section:
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>1.2.1</version>
+ </dependency>
+
+
+## Configuration
-- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run
- on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux.
+Please refer to the [Configuration guide](http://spark.incubator.apache.org/docs/latest/configuration.html)
+in the online documentation for an overview on how to configure Spark.
## Contributing to Spark
diff --git a/assembly/README b/assembly/README
new file mode 100644
index 0000000000..6ee2a536d7
--- /dev/null
+++ b/assembly/README
@@ -0,0 +1,13 @@
+This is an assembly module for Spark project.
+
+It creates a single tar.gz file that includes all needed dependency of the project
+except for org.apache.hadoop.* jars that are supposed to be available from the
+deployed Hadoop cluster.
+
+This module is off by default to avoid spending extra time on top of repl-bin
+module. To activate it specify the profile in the command line
+ -Passembly
+
+In case you want to avoid building time-expensive repl-bin module, that shaders
+all the dependency into a big flat jar supplement maven command with
+ -DnoExpensive
diff --git a/assembly/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt
new file mode 100644
index 0000000000..a70279ca14
--- /dev/null
+++ b/assembly/lib/PY4J_LICENSE.txt
@@ -0,0 +1,27 @@
+
+Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/assembly/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt
new file mode 100644
index 0000000000..04a0cd52a8
--- /dev/null
+++ b/assembly/lib/PY4J_VERSION.txt
@@ -0,0 +1 @@
+b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
diff --git a/python/lib/py4j0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
index 73b7ddb7d1..73b7ddb7d1 100644
--- a/python/lib/py4j0.7.jar
+++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
Binary files differ
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
new file mode 100644
index 0000000000..1c730e19b4
--- /dev/null
+++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <version>0.7</version>
+ <description>POM was created from install:install-file</description>
+</project>
diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
new file mode 100644
index 0000000000..6942ff45e7
--- /dev/null
+++ b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<metadata>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <versioning>
+ <release>0.7</release>
+ <versions>
+ <version>0.7</version>
+ </versions>
+ <lastUpdated>20130828020333</lastUpdated>
+ </versioning>
+</metadata>
diff --git a/assembly/pom.xml b/assembly/pom.xml
new file mode 100644
index 0000000000..d19f44d292
--- /dev/null
+++ b/assembly/pom.xml
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-assembly</artifactId>
+ <name>Spark Project Assembly</name>
+ <url>http://spark.incubator.apache.org/</url>
+
+ <repositories>
+ <!-- A repository in the local filesystem for the Py4J JAR, which is not in Maven central -->
+ <repository>
+ <id>lib</id>
+ <url>file://${project.basedir}/lib</url>
+ </repository>
+ </repositories>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-bagel</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-mllib</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-repl</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-streaming</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <version>0.7</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <!-- Use the shade plugin to create a big JAR with all the dependencies -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>hadoop2-yarn</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>bigtop-dist</id>
+ <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
+ that contains Spark but not the Hadoop JARs it depends on. -->
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.4</version>
+ <executions>
+ <execution>
+ <id>dist</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+</project>
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
new file mode 100644
index 0000000000..47d3fa93d0
--- /dev/null
+++ b/assembly/src/main/assembly/assembly.xml
@@ -0,0 +1,85 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<assembly>
+ <id>dist</id>
+ <formats>
+ <format>tar.gz</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+
+ <fileSets>
+ <fileSet>
+ <includes>
+ <include>README</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>
+ ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/
+ </directory>
+ <outputDirectory>/ui-resources/org/apache/spark/ui/static</outputDirectory>
+ <includes>
+ <include>**/*</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>
+ ${project.parent.basedir}/bin/
+ </directory>
+ <outputDirectory>/bin</outputDirectory>
+ <includes>
+ <include>**/*</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>
+ ${project.parent.basedir}
+ </directory>
+ <outputDirectory>/bin</outputDirectory>
+ <includes>
+ <include>run-example*</include>
+ <include>spark-class*</include>
+ <include>spark-shell*</include>
+ <include>spark-executor*</include>
+ </includes>
+ </fileSet>
+ </fileSets>
+
+ <dependencySets>
+ <dependencySet>
+ <includes>
+ <include>org.apache.spark:*:jar</include>
+ </includes>
+ <excludes>
+ <exclude>org.apache.spark:spark-assembly:jar</exclude>
+ </excludes>
+ </dependencySet>
+ <dependencySet>
+ <outputDirectory>lib</outputDirectory>
+ <useTransitiveDependencies>true</useTransitiveDependencies>
+ <unpack>false</unpack>
+ <scope>runtime</scope>
+ <useProjectArtifact>false</useProjectArtifact>
+ <excludes>
+ <exclude>org.apache.hadoop:*:jar</exclude>
+ <exclude>org.apache.spark:*:jar</exclude>
+ </excludes>
+ </dependencySet>
+ </dependencySets>
+
+</assembly>
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 60bbc49e6c..51173c32b2 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -19,24 +19,28 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-bagel</artifactId>
<packaging>jar</packaging>
<name>Spark Project Bagel</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<dependencies>
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
-
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.version}</artifactId>
@@ -58,103 +62,4 @@
</plugin>
</plugins>
</build>
-
- <profiles>
- <profile>
- <id>hadoop1</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2-yarn</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
</project>
diff --git a/bagel/src/main/scala/spark/bagel/Bagel.scala b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
index 80c8d53d2b..44e26bbb9e 100644
--- a/bagel/src/main/scala/spark/bagel/Bagel.scala
+++ b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
@@ -15,32 +15,31 @@
* limitations under the License.
*/
-package spark.bagel
+package org.apache.spark.bagel
-import spark._
-import spark.SparkContext._
-
-import scala.collection.mutable.ArrayBuffer
-import storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
object Bagel extends Logging {
val DEFAULT_STORAGE_LEVEL = StorageLevel.MEMORY_AND_DISK
/**
* Runs a Bagel program.
- * @param sc [[spark.SparkContext]] to use for the program.
+ * @param sc [[org.apache.spark.SparkContext]] to use for the program.
* @param vertices vertices of the graph represented as an RDD of (Key, Vertex) pairs. Often the Key will be
* the vertex id.
* @param messages initial set of messages represented as an RDD of (Key, Message) pairs. Often this will be an
* empty array, i.e. sc.parallelize(Array[K, Message]()).
- * @param combiner [[spark.bagel.Combiner]] combines multiple individual messages to a given vertex into one
+ * @param combiner [[org.apache.spark.bagel.Combiner]] combines multiple individual messages to a given vertex into one
* message before sending (which often involves network I/O).
- * @param aggregator [[spark.bagel.Aggregator]] performs a reduce across all vertices after each superstep,
+ * @param aggregator [[org.apache.spark.bagel.Aggregator]] performs a reduce across all vertices after each superstep,
* and provides the result to each vertex in the next superstep.
- * @param partitioner [[spark.Partitioner]] partitions values by key
+ * @param partitioner [[org.apache.spark.Partitioner]] partitions values by key
* @param numPartitions number of partitions across which to split the graph.
* Default is the default parallelism of the SparkContext
- * @param storageLevel [[spark.storage.StorageLevel]] to use for caching of intermediate RDDs in each superstep.
+ * @param storageLevel [[org.apache.spark.storage.StorageLevel]] to use for caching of intermediate RDDs in each superstep.
* Defaults to caching in memory.
* @param compute function that takes a Vertex, optional set of (possibly combined) messages to the Vertex,
* optional Aggregator and the current superstep,
@@ -98,7 +97,7 @@ object Bagel extends Logging {
verts
}
- /** Runs a Bagel program with no [[spark.bagel.Aggregator]] and the default storage level */
+ /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default storage level */
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
sc: SparkContext,
vertices: RDD[(K, V)],
@@ -110,7 +109,7 @@ object Bagel extends Logging {
compute: (V, Option[C], Int) => (V, Array[M])
): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
- /** Runs a Bagel program with no [[spark.bagel.Aggregator]] */
+ /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] */
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
sc: SparkContext,
vertices: RDD[(K, V)],
@@ -128,7 +127,7 @@ object Bagel extends Logging {
}
/**
- * Runs a Bagel program with no [[spark.bagel.Aggregator]], default [[spark.HashPartitioner]]
+ * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]]
* and default storage level
*/
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
@@ -141,7 +140,7 @@ object Bagel extends Logging {
compute: (V, Option[C], Int) => (V, Array[M])
): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
- /** Runs a Bagel program with no [[spark.bagel.Aggregator]] and the default [[spark.HashPartitioner]]*/
+ /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default [[org.apache.spark.HashPartitioner]]*/
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
sc: SparkContext,
vertices: RDD[(K, V)],
@@ -159,8 +158,8 @@ object Bagel extends Logging {
}
/**
- * Runs a Bagel program with no [[spark.bagel.Aggregator]], default [[spark.HashPartitioner]],
- * [[spark.bagel.DefaultCombiner]] and the default storage level
+ * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]],
+ * [[org.apache.spark.bagel.DefaultCombiner]] and the default storage level
*/
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
sc: SparkContext,
@@ -172,8 +171,8 @@ object Bagel extends Logging {
): RDD[(K, V)] = run(sc, vertices, messages, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
/**
- * Runs a Bagel program with no [[spark.bagel.Aggregator]], the default [[spark.HashPartitioner]]
- * and [[spark.bagel.DefaultCombiner]]
+ * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], the default [[org.apache.spark.HashPartitioner]]
+ * and [[org.apache.spark.bagel.DefaultCombiner]]
*/
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
sc: SparkContext,
diff --git a/bagel/src/test/scala/bagel/BagelSuite.scala b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
index ef2d57fbd0..7b954a4775 100644
--- a/bagel/src/test/scala/bagel/BagelSuite.scala
+++ b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
@@ -15,16 +15,14 @@
* limitations under the License.
*/
-package spark.bagel
+package org.apache.spark.bagel
-import org.scalatest.{FunSuite, Assertions, BeforeAndAfter}
+import org.scalatest.{BeforeAndAfter, FunSuite, Assertions}
import org.scalatest.concurrent.Timeouts
import org.scalatest.time.SpanSugar._
-import scala.collection.mutable.ArrayBuffer
-
-import spark._
-import storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.storage.StorageLevel
class TestVertex(val active: Boolean, val age: Int) extends Vertex with Serializable
class TestMessage(val targetId: String) extends Message[String] with Serializable
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 835d1af794..9178b852e6 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -33,6 +33,8 @@ set REPL_DIR=%FWDIR%repl
set EXAMPLES_DIR=%FWDIR%examples
set BAGEL_DIR=%FWDIR%bagel
set MLLIB_DIR=%FWDIR%mllib
+set TOOLS_DIR=%FWDIR%tools
+set YARN_DIR=%FWDIR%yarn
set STREAMING_DIR=%FWDIR%streaming
set PYSPARK_DIR=%FWDIR%python
@@ -48,6 +50,8 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes
rem Add hadoop conf dir - else FileSystem.*, etc fail
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 4c539649a2..c7819d4932 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
@@ -30,85 +30,25 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
. $FWDIR/conf/spark-env.sh
fi
-CORE_DIR="$FWDIR/core"
-REPL_DIR="$FWDIR/repl"
-REPL_BIN_DIR="$FWDIR/repl-bin"
-EXAMPLES_DIR="$FWDIR/examples"
-BAGEL_DIR="$FWDIR/bagel"
-MLLIB_DIR="$FWDIR/mllib"
-STREAMING_DIR="$FWDIR/streaming"
-PYSPARK_DIR="$FWDIR/python"
-
# Build up classpath
-CLASSPATH="$SPARK_CLASSPATH"
-
-function dev_classpath {
- CLASSPATH="$CLASSPATH:$FWDIR/conf"
- CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
- if [ -n "$SPARK_TESTING" ] ; then
- CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
- CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
- fi
- CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
- CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
- CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
- CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
- CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
- if [ -e "$FWDIR/lib_managed" ]; then
- CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
- CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
- fi
- CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
- # Add the shaded JAR for Maven builds
- if [ -e $REPL_BIN_DIR/target ]; then
- for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
- CLASSPATH="$CLASSPATH:$jar"
- done
- # The shaded JAR doesn't contain examples, so include those separately
- EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
- CLASSPATH+=":$EXAMPLES_JAR"
- fi
- CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
- CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
- for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
- CLASSPATH="$CLASSPATH:$jar"
- done
-
- # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
- # to avoid the -sources and -doc packages that are built by publish-local.
- if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
- # Use the JAR from the SBT build
- export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
- fi
- if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
- # Use the JAR from the Maven build
- export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
- fi
-
- # Add Scala standard library
- if [ -z "$SCALA_LIBRARY_PATH" ]; then
- if [ -z "$SCALA_HOME" ]; then
- echo "SCALA_HOME is not set" >&2
- exit 1
- fi
- SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
- fi
- CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
- CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
- CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
-}
-
-function release_classpath {
- CLASSPATH="$CLASSPATH:$FWDIR/jars/*"
-}
-
+CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
if [ -f "$FWDIR/RELEASE" ]; then
- release_classpath
+ ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
else
- dev_classpath
+ ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
+fi
+CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
+
+# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
+if [[ $SPARK_TESTING == 1 ]]; then
+ CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
+ CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes"
+ CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes"
+ CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
+ CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
fi
-# Add hadoop conf dir - else FileSystem.*, etc fail !
+# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
# the configurtion files.
if [ "x" != "x$HADOOP_CONF_DIR" ]; then
diff --git a/bin/slaves.sh b/bin/slaves.sh
index c8fb5ca473..752565b759 100755
--- a/bin/slaves.sh
+++ b/bin/slaves.sh
@@ -42,7 +42,7 @@ bin=`cd "$bin"; pwd`
. "$bin/spark-config.sh"
# If the slaves file is specified in the command line,
-# then it takes precedence over the definition in
+# then it takes precedence over the definition in
# spark-env.sh. Save it here.
HOSTLIST=$SPARK_SLAVES
@@ -58,8 +58,6 @@ if [ "$HOSTLIST" = "" ]; then
fi
fi
-echo $"${@// /\\ }"
-
# By default disable strict host key checking
if [ "$SPARK_SSH_OPTS" = "" ]; then
SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh
index a5b88ca785..5bfe967fbf 100755
--- a/bin/spark-daemon.sh
+++ b/bin/spark-daemon.sh
@@ -75,6 +75,9 @@ if [ "$SPARK_IDENT_STRING" = "" ]; then
export SPARK_IDENT_STRING="$USER"
fi
+
+export SPARK_PRINT_LAUNCH_COMMAND="1"
+
# get log directory
if [ "$SPARK_LOG_DIR" = "" ]; then
export SPARK_LOG_DIR="$SPARK_HOME/logs"
@@ -85,7 +88,7 @@ TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
rm -f $SPARK_LOG_DIR/.spark_test
else
- chown $SPARK_IDENT_STRING $SPARK_LOG_DIR
+ chown $SPARK_IDENT_STRING $SPARK_LOG_DIR
fi
if [ "$SPARK_PID_DIR" = "" ]; then
@@ -107,7 +110,7 @@ fi
case $startStop in
(start)
-
+
mkdir -p "$SPARK_PID_DIR"
if [ -f $pid ]; then
@@ -122,14 +125,21 @@ case $startStop in
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
fi
- spark_rotate_log $log
+ spark_rotate_log "$log"
echo starting $command, logging to $log
cd "$SPARK_PREFIX"
- nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null &
- echo $! > $pid
- sleep 1; head "$log"
+ nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
+ newpid=$!
+ echo $newpid > $pid
+ sleep 2
+ # Check if the process has died; in that case we'll tail the log so the user can see
+ if ! kill -0 $newpid >/dev/null 2>&1; then
+ echo "failed to launch $command:"
+ tail -2 "$log" | sed 's/^/ /'
+ echo "full log in $log"
+ fi
;;
-
+
(stop)
if [ -f $pid ]; then
diff --git a/bin/start-master.sh b/bin/start-master.sh
index 2288fb19d7..648c7ae75f 100755
--- a/bin/start-master.sh
+++ b/bin/start-master.sh
@@ -49,4 +49,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
fi
fi
-"$bin"/spark-daemon.sh start spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
+"$bin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
diff --git a/bin/start-slave.sh b/bin/start-slave.sh
index d6db16882d..4eefa20944 100755
--- a/bin/start-slave.sh
+++ b/bin/start-slave.sh
@@ -32,4 +32,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
fi
fi
-"$bin"/spark-daemon.sh start spark.deploy.worker.Worker "$@"
+"$bin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh
index dad7c3df76..00dc4888b2 100755
--- a/bin/start-slaves.sh
+++ b/bin/start-slaves.sh
@@ -35,8 +35,6 @@ if [ "$SPARK_MASTER_IP" = "" ]; then
SPARK_MASTER_IP=`hostname`
fi
-echo "Master IP: $SPARK_MASTER_IP"
-
# Launch the slaves
if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
diff --git a/bin/stop-all.sh b/bin/stop-all.sh
index a043ac0095..b6c83a7ba4 100755
--- a/bin/stop-all.sh
+++ b/bin/stop-all.sh
@@ -20,6 +20,7 @@
# Start all spark daemons.
# Run this on the master nde
+
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
diff --git a/bin/stop-master.sh b/bin/stop-master.sh
index 31a610bf9d..310e33bedc 100755
--- a/bin/stop-master.sh
+++ b/bin/stop-master.sh
@@ -24,4 +24,4 @@ bin=`cd "$bin"; pwd`
. "$bin/spark-config.sh"
-"$bin"/spark-daemon.sh stop spark.deploy.master.Master 1
+"$bin"/spark-daemon.sh stop org.apache.spark.deploy.master.Master 1
diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh
index 8e056f23d4..03e416a132 100755
--- a/bin/stop-slaves.sh
+++ b/bin/stop-slaves.sh
@@ -29,9 +29,9 @@ if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
fi
if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
- "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker 1
+ "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker 1
else
for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
- "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker $(( $i + 1 ))
+ "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
done
fi
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
new file mode 100644
index 0000000000..6c36f3cca4
--- /dev/null
+++ b/conf/metrics.properties.template
@@ -0,0 +1,90 @@
+# syntax: [instance].sink|source.[name].[options]=[value]
+
+# This file configures Spark's internal metrics system. The metrics system is
+# divided into instances which correspond to internal components.
+# Each instance can be configured to report its metrics to one or more sinks.
+# Accepted values for [instance] are "master", "worker", "executor", "driver",
+# and "applications". A wild card "*" can be used as an instance name, in
+# which case all instances will inherit the supplied property.
+#
+# Within an instance, a "source" specifies a particular set of grouped metrics.
+# there are two kinds of sources:
+# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
+# collect a Spark component's internal state. Each instance is paired with a
+# Spark source that is added automatically.
+# 2. Common sources, like JvmSource, which will collect low level state.
+# These can be added through configuration options and are then loaded
+# using reflection.
+#
+# A "sink" specifies where metrics are delivered to. Each instance can be
+# assigned one or more sinks.
+#
+# The sink|source field specifies whether the property relates to a sink or
+# source.
+#
+# The [name] field specifies the name of source or sink.
+#
+# The [options] field is the specific property of this source or sink. The
+# source or sink is responsible for parsing this property.
+#
+# Notes:
+# 1. To add a new sink, set the "class" option to a fully qualified class
+# name (see examples below).
+# 2. Some sinks involve a polling period. The minimum allowed polling period
+# is 1 second.
+# 3. Wild card properties can be overridden by more specific properties.
+# For example, master.sink.console.period takes precedence over
+# *.sink.console.period.
+# 4. A metrics specific configuration
+# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+# added to Java properties using -Dspark.metrics.conf=xxx if you want to
+# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
+# and it will be loaded automatically.
+# 5. MetricsServlet is added by default as a sink in master, worker and client
+# driver, you can send http request "/metrics/json" to get a snapshot of all the
+# registered metrics in json format. For master, requests "/metrics/master/json" and
+# "/metrics/applications/json" can be sent seperately to get metrics snapshot of
+# instance master and applications. MetricsServlet may not be configured by self.
+#
+
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=spark.metrics.sink.ConsoleSink
+
+# Polling period for ConsoleSink
+#*.sink.console.period=10
+
+#*.sink.console.unit=seconds
+
+# Master instance overlap polling period
+#master.sink.console.period=15
+
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances
+#*.sink.csv.class=spark.metrics.sink.CsvSink
+
+# Polling period for CsvSink
+#*.sink.csv.period=1
+
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Worker instance overlap polling period
+#worker.sink.csv.period=10
+
+#worker.sink.csv.unit=minutes
+
+# Enable jvm source for instance master, worker, driver and executor
+#master.source.jvm.class=spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=spark.metrics.source.JvmSource
+
diff --git a/conf/slaves b/conf/slaves
index 6e315a8540..da0a01343d 100644
--- a/conf/slaves
+++ b/conf/slaves
@@ -1,2 +1,2 @@
-# A Spark Worker will be started on each of the machines listes below.
+# A Spark Worker will be started on each of the machines listed below.
localhost \ No newline at end of file
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b8936314ec..0a35ee7c79 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -1,19 +1,21 @@
#!/usr/bin/env bash
# This file contains environment variables required to run Spark. Copy it as
-# spark-env.sh and edit that to configure Spark for your site. At a minimum,
-# the following two variables should be set:
-# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to
-# point to the directory for Scala library JARs (if you install Scala as a
-# Debian or RPM package, these are in a separate path, often /usr/share/java)
+# spark-env.sh and edit that to configure Spark for your site.
+#
+# The following variables can be set in this file:
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
+# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
+# we recommend setting app-wide options in the application's driver program.
+# Examples of node-specific options : -Dspark.local.dir, GC options
+# Examples of app-wide options : -Dspark.serializer
#
-# If using the standalone deploy mode, you can also set variables for it:
-# - SPARK_MASTER_IP, to bind the master to a different IP address
+# If using the standalone deploy mode, you can also set variables for it here:
+# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
-# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes
-# to be spawned on every slave machine
+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
diff --git a/core/pom.xml b/core/pom.xml
index 6329b2fbd8..5738b7406f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -19,20 +19,32 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-core</artifactId>
<packaging>jar</packaging>
<name>Spark Project Core</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<dependencies>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-ipc</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
@@ -41,6 +53,10 @@
<artifactId>guava</artifactId>
</dependency>
<dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
@@ -49,6 +65,10 @@
<artifactId>compress-lzf</artifactId>
</dependency>
<dependency>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</dependency>
@@ -57,8 +77,14 @@
<artifactId>protobuf-java</artifactId>
</dependency>
<dependency>
- <groupId>de.javakaffee</groupId>
- <artifactId>kryo-serializers</artifactId>
+ <groupId>com.twitter</groupId>
+ <artifactId>chill_2.9.3</artifactId>
+ <version>0.3.1</version>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>chill-java</artifactId>
+ <version>0.3.1</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
@@ -108,7 +134,18 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
-
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-jvm</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-json</artifactId>
+ </dependency>
<dependency>
<groupId>org.apache.derby</groupId>
<artifactId>derby</artifactId>
@@ -186,183 +223,4 @@
</plugin>
</plugins>
</build>
-
- <profiles>
- <profile>
- <id>hadoop1</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>build-helper-maven-plugin</artifactId>
- <executions>
- <execution>
- <id>add-source</id>
- <phase>generate-sources</phase>
- <goals>
- <goal>add-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/main/scala</source>
- <source>src/hadoop1/scala</source>
- </sources>
- </configuration>
- </execution>
- <execution>
- <id>add-scala-test-sources</id>
- <phase>generate-test-sources</phase>
- <goals>
- <goal>add-test-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/test/scala</source>
- </sources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>build-helper-maven-plugin</artifactId>
- <executions>
- <execution>
- <id>add-source</id>
- <phase>generate-sources</phase>
- <goals>
- <goal>add-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/main/scala</source>
- <source>src/hadoop2/scala</source>
- </sources>
- </configuration>
- </execution>
- <execution>
- <id>add-scala-test-sources</id>
- <phase>generate-test-sources</phase>
- <goals>
- <goal>add-test-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/test/scala</source>
- </sources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2-yarn</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>build-helper-maven-plugin</artifactId>
- <executions>
- <execution>
- <id>add-source</id>
- <phase>generate-sources</phase>
- <goals>
- <goal>add-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/main/scala</source>
- <source>src/hadoop2-yarn/scala</source>
- </sources>
- </configuration>
- </execution>
- <execution>
- <id>add-scala-test-sources</id>
- <phase>generate-test-sources</phase>
- <goals>
- <goal>add-test-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>src/test/scala</source>
- </sources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
</project>
diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala
deleted file mode 100644
index aa3b1ed3a5..0000000000
--- a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.mapreduce
-
-import org.apache.hadoop.conf.Configuration
-import task.{TaskAttemptContextImpl, JobContextImpl}
-
-trait HadoopMapReduceUtil {
- def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId)
-
- def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
-
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier,
- jobId, isMap, taskId, attemptId)
-}
diff --git a/core/src/main/java/spark/network/netty/FileClient.java b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
index 0625a6d502..20a7a3aa8c 100644
--- a/core/src/main/java/spark/network/netty/FileClient.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import io.netty.bootstrap.Bootstrap;
import io.netty.channel.Channel;
diff --git a/core/src/main/java/spark/network/netty/FileClientChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
index 05ad4b61d7..65ee15d63b 100644
--- a/core/src/main/java/spark/network/netty/FileClientChannelInitializer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import io.netty.buffer.BufType;
import io.netty.channel.ChannelInitializer;
diff --git a/core/src/main/java/spark/network/netty/FileClientHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
index e8cd9801f6..c4aa2669e0 100644
--- a/core/src/main/java/spark/network/netty/FileClientHandler.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import io.netty.buffer.ByteBuf;
import io.netty.channel.ChannelHandlerContext;
diff --git a/core/src/main/java/spark/network/netty/FileServer.java b/core/src/main/java/org/apache/spark/network/netty/FileServer.java
index 9f009a61d5..666432474d 100644
--- a/core/src/main/java/spark/network/netty/FileServer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServer.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import java.net.InetSocketAddress;
diff --git a/core/src/main/java/spark/network/netty/FileServerChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
index 50c57a81a3..833af1632d 100644
--- a/core/src/main/java/spark/network/netty/FileServerChannelInitializer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.socket.SocketChannel;
diff --git a/core/src/main/java/spark/network/netty/FileServerHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
index 176ba8da49..d3d57a0255 100644
--- a/core/src/main/java/spark/network/netty/FileServerHandler.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
import java.io.File;
import java.io.FileInputStream;
diff --git a/core/src/main/java/spark/network/netty/PathResolver.java b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
index f446c55b19..94c034cad0 100755
--- a/core/src/main/java/spark/network/netty/PathResolver.java
+++ b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
public interface PathResolver {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css b/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css
new file mode 100755
index 0000000000..13cef3d6f1
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css
@@ -0,0 +1,874 @@
+/*!
+ * Bootstrap v2.3.2
+ *
+ * Copyright 2013 Twitter, Inc
+ * Licensed under the Apache License v2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Designed and built with all the love in the world @twitter by @mdo and @fat.
+ */
+.clearfix{*zoom:1;}.clearfix:before,.clearfix:after{display:table;content:"";line-height:0;}
+.clearfix:after{clear:both;}
+.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0;}
+.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;}
+article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block;}
+audio,canvas,video{display:inline-block;*display:inline;*zoom:1;}
+audio:not([controls]){display:none;}
+html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;}
+a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;}
+a:hover,a:active{outline:0;}
+sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline;}
+sup{top:-0.5em;}
+sub{bottom:-0.25em;}
+img{max-width:100%;width:auto\9;height:auto;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic;}
+#map_canvas img,.google-maps img{max-width:none;}
+button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle;}
+button,input{*overflow:visible;line-height:normal;}
+button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0;}
+button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;}
+label,select,button,input[type="button"],input[type="reset"],input[type="submit"],input[type="radio"],input[type="checkbox"]{cursor:pointer;}
+input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield;}
+input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none;}
+textarea{overflow:auto;vertical-align:top;}
+@media print{*{text-shadow:none !important;color:#000 !important;background:transparent !important;box-shadow:none !important;} a,a:visited{text-decoration:underline;} a[href]:after{content:" (" attr(href) ")";} abbr[title]:after{content:" (" attr(title) ")";} .ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:"";} pre,blockquote{border:1px solid #999;page-break-inside:avoid;} thead{display:table-header-group;} tr,img{page-break-inside:avoid;} img{max-width:100% !important;} @page {margin:0.5cm;}p,h2,h3{orphans:3;widows:3;} h2,h3{page-break-after:avoid;}}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333333;background-color:#ffffff;}
+a{color:#0088cc;text-decoration:none;}
+a:hover,a:focus{color:#005580;text-decoration:underline;}
+.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}
+.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);-webkit-box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);-moz-box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);}
+.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px;}
+.row{margin-left:-20px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;}
+.row:after{clear:both;}
+[class*="span"]{float:left;min-height:1px;margin-left:20px;}
+.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px;}
+.span12{width:940px;}
+.span11{width:860px;}
+.span10{width:780px;}
+.span9{width:700px;}
+.span8{width:620px;}
+.span7{width:540px;}
+.span6{width:460px;}
+.span5{width:380px;}
+.span4{width:300px;}
+.span3{width:220px;}
+.span2{width:140px;}
+.span1{width:60px;}
+.offset12{margin-left:980px;}
+.offset11{margin-left:900px;}
+.offset10{margin-left:820px;}
+.offset9{margin-left:740px;}
+.offset8{margin-left:660px;}
+.offset7{margin-left:580px;}
+.offset6{margin-left:500px;}
+.offset5{margin-left:420px;}
+.offset4{margin-left:340px;}
+.offset3{margin-left:260px;}
+.offset2{margin-left:180px;}
+.offset1{margin-left:100px;}
+.row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;}
+.row-fluid:after{clear:both;}
+.row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;}
+.row-fluid [class*="span"]:first-child{margin-left:0;}
+.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.127659574468085%;}
+.row-fluid .span12{width:100%;*width:99.94680851063829%;}
+.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%;}
+.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%;}
+.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%;}
+.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%;}
+.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%;}
+.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%;}
+.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%;}
+.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%;}
+.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%;}
+.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%;}
+.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%;}
+.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%;}
+.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%;}
+.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%;}
+.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%;}
+.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%;}
+.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%;}
+.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%;}
+.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%;}
+.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%;}
+.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%;}
+.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%;}
+.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%;}
+.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%;}
+.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%;}
+.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%;}
+.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%;}
+.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%;}
+.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%;}
+.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%;}
+.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%;}
+.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%;}
+.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%;}
+.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%;}
+.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%;}
+[class*="span"].hide,.row-fluid [class*="span"].hide{display:none;}
+[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right;}
+.container{margin-right:auto;margin-left:auto;*zoom:1;}.container:before,.container:after{display:table;content:"";line-height:0;}
+.container:after{clear:both;}
+.container-fluid{padding-right:20px;padding-left:20px;*zoom:1;}.container-fluid:before,.container-fluid:after{display:table;content:"";line-height:0;}
+.container-fluid:after{clear:both;}
+p{margin:0 0 10px;}
+.lead{margin-bottom:20px;font-size:21px;font-weight:200;line-height:30px;}
+small{font-size:85%;}
+strong{font-weight:bold;}
+em{font-style:italic;}
+cite{font-style:normal;}
+.muted{color:#999999;}
+a.muted:hover,a.muted:focus{color:#808080;}
+.text-warning{color:#c09853;}
+a.text-warning:hover,a.text-warning:focus{color:#a47e3c;}
+.text-error{color:#b94a48;}
+a.text-error:hover,a.text-error:focus{color:#953b39;}
+.text-info{color:#3a87ad;}
+a.text-info:hover,a.text-info:focus{color:#2d6987;}
+.text-success{color:#468847;}
+a.text-success:hover,a.text-success:focus{color:#356635;}
+.text-left{text-align:left;}
+.text-right{text-align:right;}
+.text-center{text-align:center;}
+h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:20px;color:inherit;text-rendering:optimizelegibility;}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999999;}
+h1,h2,h3{line-height:40px;}
+h1{font-size:38.5px;}
+h2{font-size:31.5px;}
+h3{font-size:24.5px;}
+h4{font-size:17.5px;}
+h5{font-size:14px;}
+h6{font-size:11.9px;}
+h1 small{font-size:24.5px;}
+h2 small{font-size:17.5px;}
+h3 small{font-size:14px;}
+h4 small{font-size:14px;}
+.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eeeeee;}
+ul,ol{padding:0;margin:0 0 10px 25px;}
+ul ul,ul ol,ol ol,ol ul{margin-bottom:0;}
+li{line-height:20px;}
+ul.unstyled,ol.unstyled{margin-left:0;list-style:none;}
+ul.inline,ol.inline{margin-left:0;list-style:none;}ul.inline>li,ol.inline>li{display:inline-block;*display:inline;*zoom:1;padding-left:5px;padding-right:5px;}
+dl{margin-bottom:20px;}
+dt,dd{line-height:20px;}
+dt{font-weight:bold;}
+dd{margin-left:10px;}
+.dl-horizontal{*zoom:1;}.dl-horizontal:before,.dl-horizontal:after{display:table;content:"";line-height:0;}
+.dl-horizontal:after{clear:both;}
+.dl-horizontal dt{float:left;width:160px;clear:left;text-align:right;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}
+.dl-horizontal dd{margin-left:180px;}
+hr{margin:20px 0;border:0;border-top:1px solid #eeeeee;border-bottom:1px solid #ffffff;}
+abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted #999999;}
+abbr.initialism{font-size:90%;text-transform:uppercase;}
+blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eeeeee;}blockquote p{margin-bottom:0;font-size:17.5px;font-weight:300;line-height:1.25;}
+blockquote small{display:block;line-height:20px;color:#999999;}blockquote small:before{content:'\2014 \00A0';}
+blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eeeeee;border-left:0;}blockquote.pull-right p,blockquote.pull-right small{text-align:right;}
+blockquote.pull-right small:before{content:'';}
+blockquote.pull-right small:after{content:'\00A0 \2014';}
+q:before,q:after,blockquote:before,blockquote:after{content:"";}
+address{display:block;margin-bottom:20px;font-style:normal;line-height:20px;}
+code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;}
+code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8;white-space:nowrap;}
+pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}pre.prettyprint{margin-bottom:20px;}
+pre code{padding:0;color:inherit;white-space:pre;white-space:pre-wrap;background-color:transparent;border:0;}
+.pre-scrollable{max-height:340px;overflow-y:scroll;}
+.label,.badge{display:inline-block;padding:2px 4px;font-size:11.844px;font-weight:bold;line-height:14px;color:#ffffff;vertical-align:baseline;white-space:nowrap;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#999999;}
+.label{-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;}
+.badge{padding-left:9px;padding-right:9px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px;}
+.label:empty,.badge:empty{display:none;}
+a.label:hover,a.label:focus,a.badge:hover,a.badge:focus{color:#ffffff;text-decoration:none;cursor:pointer;}
+.label-important,.badge-important{background-color:#b94a48;}
+.label-important[href],.badge-important[href]{background-color:#953b39;}
+.label-warning,.badge-warning{background-color:#f89406;}
+.label-warning[href],.badge-warning[href]{background-color:#c67605;}
+.label-success,.badge-success{background-color:#468847;}
+.label-success[href],.badge-success[href]{background-color:#356635;}
+.label-info,.badge-info{background-color:#3a87ad;}
+.label-info[href],.badge-info[href]{background-color:#2d6987;}
+.label-inverse,.badge-inverse{background-color:#333333;}
+.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a;}
+.btn .label,.btn .badge{position:relative;top:-1px;}
+.btn-mini .label,.btn-mini .badge{top:0;}
+table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0;}
+.table{width:100%;margin-bottom:20px;}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #dddddd;}
+.table th{font-weight:bold;}
+.table thead th{vertical-align:bottom;}
+.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0;}
+.table tbody+tbody{border-top:2px solid #dddddd;}
+.table .table{background-color:#ffffff;}
+.table-condensed th,.table-condensed td{padding:4px 5px;}
+.table-bordered{border:1px solid #dddddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}.table-bordered th,.table-bordered td{border-left:1px solid #dddddd;}
+.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0;}
+.table-bordered thead:first-child tr:first-child>th:first-child,.table-bordered tbody:first-child tr:first-child>td:first-child,.table-bordered tbody:first-child tr:first-child>th:first-child{-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;}
+.table-bordered thead:first-child tr:first-child>th:last-child,.table-bordered tbody:first-child tr:first-child>td:last-child,.table-bordered tbody:first-child tr:first-child>th:last-child{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;}
+.table-bordered thead:last-child tr:last-child>th:first-child,.table-bordered tbody:last-child tr:last-child>td:first-child,.table-bordered tbody:last-child tr:last-child>th:first-child,.table-bordered tfoot:last-child tr:last-child>td:first-child,.table-bordered tfoot:last-child tr:last-child>th:first-child{-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;}
+.table-bordered thead:last-child tr:last-child>th:last-child,.table-bordered tbody:last-child tr:last-child>td:last-child,.table-bordered tbody:last-child tr:last-child>th:last-child,.table-bordered tfoot:last-child tr:last-child>td:last-child,.table-bordered tfoot:last-child tr:last-child>th:last-child{-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;}
+.table-bordered tfoot+tbody:last-child tr:last-child td:first-child{-webkit-border-bottom-left-radius:0;-moz-border-radius-bottomleft:0;border-bottom-left-radius:0;}
+.table-bordered tfoot+tbody:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:0;-moz-border-radius-bottomright:0;border-bottom-right-radius:0;}
+.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;}
+.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;}
+.table-striped tbody>tr:nth-child(odd)>td,.table-striped tbody>tr:nth-child(odd)>th{background-color:#f9f9f9;}
+.table-hover tbody tr:hover>td,.table-hover tbody tr:hover>th{background-color:#f5f5f5;}
+table td[class*="span"],table th[class*="span"],.row-fluid table td[class*="span"],.row-fluid table th[class*="span"]{display:table-cell;float:none;margin-left:0;}
+.table td.span1,.table th.span1{float:none;width:44px;margin-left:0;}
+.table td.span2,.table th.span2{float:none;width:124px;margin-left:0;}
+.table td.span3,.table th.span3{float:none;width:204px;margin-left:0;}
+.table td.span4,.table th.span4{float:none;width:284px;margin-left:0;}
+.table td.span5,.table th.span5{float:none;width:364px;margin-left:0;}
+.table td.span6,.table th.span6{float:none;width:444px;margin-left:0;}
+.table td.span7,.table th.span7{float:none;width:524px;margin-left:0;}
+.table td.span8,.table th.span8{float:none;width:604px;margin-left:0;}
+.table td.span9,.table th.span9{float:none;width:684px;margin-left:0;}
+.table td.span10,.table th.span10{float:none;width:764px;margin-left:0;}
+.table td.span11,.table th.span11{float:none;width:844px;margin-left:0;}
+.table td.span12,.table th.span12{float:none;width:924px;margin-left:0;}
+.table tbody tr.success>td{background-color:#dff0d8;}
+.table tbody tr.error>td{background-color:#f2dede;}
+.table tbody tr.warning>td{background-color:#fcf8e3;}
+.table tbody tr.info>td{background-color:#d9edf7;}
+.table-hover tbody tr.success:hover>td{background-color:#d0e9c6;}
+.table-hover tbody tr.error:hover>td{background-color:#ebcccc;}
+.table-hover tbody tr.warning:hover>td{background-color:#faf2cc;}
+.table-hover tbody tr.info:hover>td{background-color:#c4e3f3;}
+form{margin:0 0 20px;}
+fieldset{padding:0;margin:0;border:0;}
+legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333333;border:0;border-bottom:1px solid #e5e5e5;}legend small{font-size:15px;color:#999999;}
+label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px;}
+input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;}
+label{display:block;margin-bottom:5px;}
+select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:10px;font-size:14px;line-height:20px;color:#555555;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;vertical-align:middle;}
+input,textarea,.uneditable-input{width:206px;}
+textarea{height:auto;}
+textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#ffffff;border:1px solid #cccccc;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-webkit-transition:border linear .2s, box-shadow linear .2s;-moz-transition:border linear .2s, box-shadow linear .2s;-o-transition:border linear .2s, box-shadow linear .2s;transition:border linear .2s, box-shadow linear .2s;}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82, 168, 236, 0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);}
+input[type="radio"],input[type="checkbox"]{margin:4px 0 0;*margin-top:0;margin-top:1px \9;line-height:normal;}
+input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto;}
+select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px;}
+select{width:220px;border:1px solid #cccccc;background-color:#ffffff;}
+select[multiple],select[size]{height:auto;}
+select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;}
+.uneditable-input,.uneditable-textarea{color:#999999;background-color:#fcfcfc;border-color:#cccccc;-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);cursor:not-allowed;}
+.uneditable-input{overflow:hidden;white-space:nowrap;}
+.uneditable-textarea{width:auto;height:auto;}
+input:-moz-placeholder,textarea:-moz-placeholder{color:#999999;}
+input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999999;}
+input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999999;}
+.radio,.checkbox{min-height:20px;padding-left:20px;}
+.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-20px;}
+.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px;}
+.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle;}
+.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px;}
+.input-mini{width:60px;}
+.input-small{width:90px;}
+.input-medium{width:150px;}
+.input-large{width:210px;}
+.input-xlarge{width:270px;}
+.input-xxlarge{width:530px;}
+input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0;}
+.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block;}
+input,textarea,.uneditable-input{margin-left:0;}
+.controls-row [class*="span"]+[class*="span"]{margin-left:20px;}
+input.span12,textarea.span12,.uneditable-input.span12{width:926px;}
+input.span11,textarea.span11,.uneditable-input.span11{width:846px;}
+input.span10,textarea.span10,.uneditable-input.span10{width:766px;}
+input.span9,textarea.span9,.uneditable-input.span9{width:686px;}
+input.span8,textarea.span8,.uneditable-input.span8{width:606px;}
+input.span7,textarea.span7,.uneditable-input.span7{width:526px;}
+input.span6,textarea.span6,.uneditable-input.span6{width:446px;}
+input.span5,textarea.span5,.uneditable-input.span5{width:366px;}
+input.span4,textarea.span4,.uneditable-input.span4{width:286px;}
+input.span3,textarea.span3,.uneditable-input.span3{width:206px;}
+input.span2,textarea.span2,.uneditable-input.span2{width:126px;}
+input.span1,textarea.span1,.uneditable-input.span1{width:46px;}
+.controls-row{*zoom:1;}.controls-row:before,.controls-row:after{display:table;content:"";line-height:0;}
+.controls-row:after{clear:both;}
+.controls-row [class*="span"],.row-fluid .controls-row [class*="span"]{float:left;}
+.controls-row .checkbox[class*="span"],.controls-row .radio[class*="span"]{padding-top:5px;}
+input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eeeeee;}
+input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent;}
+.control-group.warning .control-label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853;}
+.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;}
+.control-group.warning input,.control-group.warning select,.control-group.warning textarea{border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;}
+.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853;}
+.control-group.error .control-label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48;}
+.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;}
+.control-group.error input,.control-group.error select,.control-group.error textarea{border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;}
+.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48;}
+.control-group.success .control-label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847;}
+.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;}
+.control-group.success input,.control-group.success select,.control-group.success textarea{border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;}
+.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847;}
+.control-group.info .control-label,.control-group.info .help-block,.control-group.info .help-inline{color:#3a87ad;}
+.control-group.info .checkbox,.control-group.info .radio,.control-group.info input,.control-group.info select,.control-group.info textarea{color:#3a87ad;}
+.control-group.info input,.control-group.info select,.control-group.info textarea{border-color:#3a87ad;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.info input:focus,.control-group.info select:focus,.control-group.info textarea:focus{border-color:#2d6987;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;}
+.control-group.info .input-prepend .add-on,.control-group.info .input-append .add-on{color:#3a87ad;background-color:#d9edf7;border-color:#3a87ad;}
+input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#b94a48;border-color:#ee5f5b;}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7;}
+.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1;}.form-actions:before,.form-actions:after{display:table;content:"";line-height:0;}
+.form-actions:after{clear:both;}
+.help-block,.help-inline{color:#595959;}
+.help-block{display:block;margin-bottom:10px;}
+.help-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle;padding-left:5px;}
+.input-append,.input-prepend{display:inline-block;margin-bottom:10px;vertical-align:middle;font-size:0;white-space:nowrap;}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input,.input-append .dropdown-menu,.input-prepend .dropdown-menu,.input-append .popover,.input-prepend .popover{font-size:14px;}
+.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;vertical-align:top;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2;}
+.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #ffffff;background-color:#eeeeee;border:1px solid #ccc;}
+.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn,.input-append .btn-group>.dropdown-toggle,.input-prepend .btn-group>.dropdown-toggle{vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546;}
+.input-prepend .add-on,.input-prepend .btn{margin-right:-1px;}
+.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;}
+.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;}.input-append input+.btn-group .btn:last-child,.input-append select+.btn-group .btn:last-child,.input-append .uneditable-input+.btn-group .btn:last-child{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}
+.input-append .add-on,.input-append .btn,.input-append .btn-group{margin-left:-1px;}
+.input-append .add-on:last-child,.input-append .btn:last-child,.input-append .btn-group:last-child>.dropdown-toggle{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}
+.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}.input-prepend.input-append input+.btn-group .btn,.input-prepend.input-append select+.btn-group .btn,.input-prepend.input-append .uneditable-input+.btn-group .btn{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}
+.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;}
+.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}
+.input-prepend.input-append .btn-group:first-child{margin-left:0;}
+input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;}
+.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px;}
+.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0;}
+.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0;}
+.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px;}
+.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;*zoom:1;margin-bottom:0;vertical-align:middle;}
+.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none;}
+.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block;}
+.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0;}
+.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle;}
+.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0;}
+.control-group{margin-bottom:10px;}
+legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate;}
+.form-horizontal .control-group{margin-bottom:20px;*zoom:1;}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;content:"";line-height:0;}
+.form-horizontal .control-group:after{clear:both;}
+.form-horizontal .control-label{float:left;width:160px;padding-top:5px;text-align:right;}
+.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:180px;*margin-left:0;}.form-horizontal .controls:first-child{*padding-left:180px;}
+.form-horizontal .help-block{margin-bottom:0;}
+.form-horizontal input+.help-block,.form-horizontal select+.help-block,.form-horizontal textarea+.help-block,.form-horizontal .uneditable-input+.help-block,.form-horizontal .input-prepend+.help-block,.form-horizontal .input-append+.help-block{margin-top:10px;}
+.form-horizontal .form-actions{padding-left:180px;}
+.btn{display:inline-block;*display:inline;*zoom:1;padding:4px 12px;margin-bottom:0;font-size:14px;line-height:20px;text-align:center;vertical-align:middle;cursor:pointer;color:#333333;text-shadow:0 1px 1px rgba(255, 255, 255, 0.75);background-color:#f5f5f5;background-image:-moz-linear-gradient(top, #ffffff, #e6e6e6);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#e6e6e6));background-image:-webkit-linear-gradient(top, #ffffff, #e6e6e6);background-image:-o-linear-gradient(top, #ffffff, #e6e6e6);background-image:linear-gradient(to bottom, #ffffff, #e6e6e6);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#ffe6e6e6', GradientType=0);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#e6e6e6;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);border:1px solid #cccccc;*border:0;border-bottom-color:#b3b3b3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;*margin-left:.3em;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);}.btn:hover,.btn:focus,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333333;background-color:#e6e6e6;*background-color:#d9d9d9;}
+.btn:active,.btn.active{background-color:#cccccc \9;}
+.btn:first-child{*margin-left:0;}
+.btn:hover,.btn:focus{color:#333333;text-decoration:none;background-position:0 -15px;-webkit-transition:background-position 0.1s linear;-moz-transition:background-position 0.1s linear;-o-transition:background-position 0.1s linear;transition:background-position 0.1s linear;}
+.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;}
+.btn.active,.btn:active{background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);}
+.btn.disabled,.btn[disabled]{cursor:default;background-image:none;opacity:0.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;}
+.btn-large{padding:11px 19px;font-size:17.5px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}
+.btn-large [class^="icon-"],.btn-large [class*=" icon-"]{margin-top:4px;}
+.btn-small{padding:2px 10px;font-size:11.9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;}
+.btn-small [class^="icon-"],.btn-small [class*=" icon-"]{margin-top:0;}
+.btn-mini [class^="icon-"],.btn-mini [class*=" icon-"]{margin-top:-1px;}
+.btn-mini{padding:0 6px;font-size:10.5px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;}
+.btn-block{display:block;width:100%;padding-left:0;padding-right:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;}
+.btn-block+.btn-block{margin-top:5px;}
+input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%;}
+.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255, 255, 255, 0.75);}
+.btn-primary{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#006dcc;background-image:-moz-linear-gradient(top, #0088cc, #0044cc);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0044cc));background-image:-webkit-linear-gradient(top, #0088cc, #0044cc);background-image:-o-linear-gradient(top, #0088cc, #0044cc);background-image:linear-gradient(to bottom, #0088cc, #0044cc);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0044cc', GradientType=0);border-color:#0044cc #0044cc #002a80;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#0044cc;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-primary:hover,.btn-primary:focus,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#ffffff;background-color:#0044cc;*background-color:#003bb3;}
+.btn-primary:active,.btn-primary.active{background-color:#003399 \9;}
+.btn-warning{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(to bottom, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450', endColorstr='#fff89406', GradientType=0);border-color:#f89406 #f89406 #ad6704;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#f89406;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-warning:hover,.btn-warning:focus,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#ffffff;background-color:#f89406;*background-color:#df8505;}
+.btn-warning:active,.btn-warning.active{background-color:#c67605 \9;}
+.btn-danger{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#da4f49;background-image:-moz-linear-gradient(top, #ee5f5b, #bd362f);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#bd362f));background-image:-webkit-linear-gradient(top, #ee5f5b, #bd362f);background-image:-o-linear-gradient(top, #ee5f5b, #bd362f);background-image:linear-gradient(to bottom, #ee5f5b, #bd362f);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b', endColorstr='#ffbd362f', GradientType=0);border-color:#bd362f #bd362f #802420;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#bd362f;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-danger:hover,.btn-danger:focus,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#ffffff;background-color:#bd362f;*background-color:#a9302a;}
+.btn-danger:active,.btn-danger.active{background-color:#942a25 \9;}
+.btn-success{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#5bb75b;background-image:-moz-linear-gradient(top, #62c462, #51a351);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#51a351));background-image:-webkit-linear-gradient(top, #62c462, #51a351);background-image:-o-linear-gradient(top, #62c462, #51a351);background-image:linear-gradient(to bottom, #62c462, #51a351);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462', endColorstr='#ff51a351', GradientType=0);border-color:#51a351 #51a351 #387038;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#51a351;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-success:hover,.btn-success:focus,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#ffffff;background-color:#51a351;*background-color:#499249;}
+.btn-success:active,.btn-success.active{background-color:#408140 \9;}
+.btn-info{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#49afcd;background-image:-moz-linear-gradient(top, #5bc0de, #2f96b4);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#2f96b4));background-image:-webkit-linear-gradient(top, #5bc0de, #2f96b4);background-image:-o-linear-gradient(top, #5bc0de, #2f96b4);background-image:linear-gradient(to bottom, #5bc0de, #2f96b4);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff2f96b4', GradientType=0);border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#2f96b4;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-info:hover,.btn-info:focus,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#ffffff;background-color:#2f96b4;*background-color:#2a85a0;}
+.btn-info:active,.btn-info.active{background-color:#24748c \9;}
+.btn-inverse{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#363636;background-image:-moz-linear-gradient(top, #444444, #222222);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#444444), to(#222222));background-image:-webkit-linear-gradient(top, #444444, #222222);background-image:-o-linear-gradient(top, #444444, #222222);background-image:linear-gradient(to bottom, #444444, #222222);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff444444', endColorstr='#ff222222', GradientType=0);border-color:#222222 #222222 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#222222;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-inverse:hover,.btn-inverse:focus,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#ffffff;background-color:#222222;*background-color:#151515;}
+.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9;}
+button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px;}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0;}
+button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px;}
+button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px;}
+button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px;}
+.btn-link,.btn-link:active,.btn-link[disabled]{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;}
+.btn-link{border-color:transparent;cursor:pointer;color:#0088cc;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.btn-link:hover,.btn-link:focus{color:#005580;text-decoration:underline;background-color:transparent;}
+.btn-link[disabled]:hover,.btn-link[disabled]:focus{color:#333333;text-decoration:none;}
+[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat;margin-top:1px;}
+.icon-white,.nav-pills>.active>a>[class^="icon-"],.nav-pills>.active>a>[class*=" icon-"],.nav-list>.active>a>[class^="icon-"],.nav-list>.active>a>[class*=" icon-"],.navbar-inverse .nav>.active>a>[class^="icon-"],.navbar-inverse .nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:focus>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>li>a:focus>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"],.dropdown-submenu:hover>a>[class^="icon-"],.dropdown-submenu:focus>a>[class^="icon-"],.dropdown-submenu:hover>a>[class*=" icon-"],.dropdown-submenu:focus>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png");}
+.icon-glass{background-position:0 0;}
+.icon-music{background-position:-24px 0;}
+.icon-search{background-position:-48px 0;}
+.icon-envelope{background-position:-72px 0;}
+.icon-heart{background-position:-96px 0;}
+.icon-star{background-position:-120px 0;}
+.icon-star-empty{background-position:-144px 0;}
+.icon-user{background-position:-168px 0;}
+.icon-film{background-position:-192px 0;}
+.icon-th-large{background-position:-216px 0;}
+.icon-th{background-position:-240px 0;}
+.icon-th-list{background-position:-264px 0;}
+.icon-ok{background-position:-288px 0;}
+.icon-remove{background-position:-312px 0;}
+.icon-zoom-in{background-position:-336px 0;}
+.icon-zoom-out{background-position:-360px 0;}
+.icon-off{background-position:-384px 0;}
+.icon-signal{background-position:-408px 0;}
+.icon-cog{background-position:-432px 0;}
+.icon-trash{background-position:-456px 0;}
+.icon-home{background-position:0 -24px;}
+.icon-file{background-position:-24px -24px;}
+.icon-time{background-position:-48px -24px;}
+.icon-road{background-position:-72px -24px;}
+.icon-download-alt{background-position:-96px -24px;}
+.icon-download{background-position:-120px -24px;}
+.icon-upload{background-position:-144px -24px;}
+.icon-inbox{background-position:-168px -24px;}
+.icon-play-circle{background-position:-192px -24px;}
+.icon-repeat{background-position:-216px -24px;}
+.icon-refresh{background-position:-240px -24px;}
+.icon-list-alt{background-position:-264px -24px;}
+.icon-lock{background-position:-287px -24px;}
+.icon-flag{background-position:-312px -24px;}
+.icon-headphones{background-position:-336px -24px;}
+.icon-volume-off{background-position:-360px -24px;}
+.icon-volume-down{background-position:-384px -24px;}
+.icon-volume-up{background-position:-408px -24px;}
+.icon-qrcode{background-position:-432px -24px;}
+.icon-barcode{background-position:-456px -24px;}
+.icon-tag{background-position:0 -48px;}
+.icon-tags{background-position:-25px -48px;}
+.icon-book{background-position:-48px -48px;}
+.icon-bookmark{background-position:-72px -48px;}
+.icon-print{background-position:-96px -48px;}
+.icon-camera{background-position:-120px -48px;}
+.icon-font{background-position:-144px -48px;}
+.icon-bold{background-position:-167px -48px;}
+.icon-italic{background-position:-192px -48px;}
+.icon-text-height{background-position:-216px -48px;}
+.icon-text-width{background-position:-240px -48px;}
+.icon-align-left{background-position:-264px -48px;}
+.icon-align-center{background-position:-288px -48px;}
+.icon-align-right{background-position:-312px -48px;}
+.icon-align-justify{background-position:-336px -48px;}
+.icon-list{background-position:-360px -48px;}
+.icon-indent-left{background-position:-384px -48px;}
+.icon-indent-right{background-position:-408px -48px;}
+.icon-facetime-video{background-position:-432px -48px;}
+.icon-picture{background-position:-456px -48px;}
+.icon-pencil{background-position:0 -72px;}
+.icon-map-marker{background-position:-24px -72px;}
+.icon-adjust{background-position:-48px -72px;}
+.icon-tint{background-position:-72px -72px;}
+.icon-edit{background-position:-96px -72px;}
+.icon-share{background-position:-120px -72px;}
+.icon-check{background-position:-144px -72px;}
+.icon-move{background-position:-168px -72px;}
+.icon-step-backward{background-position:-192px -72px;}
+.icon-fast-backward{background-position:-216px -72px;}
+.icon-backward{background-position:-240px -72px;}
+.icon-play{background-position:-264px -72px;}
+.icon-pause{background-position:-288px -72px;}
+.icon-stop{background-position:-312px -72px;}
+.icon-forward{background-position:-336px -72px;}
+.icon-fast-forward{background-position:-360px -72px;}
+.icon-step-forward{background-position:-384px -72px;}
+.icon-eject{background-position:-408px -72px;}
+.icon-chevron-left{background-position:-432px -72px;}
+.icon-chevron-right{background-position:-456px -72px;}
+.icon-plus-sign{background-position:0 -96px;}
+.icon-minus-sign{background-position:-24px -96px;}
+.icon-remove-sign{background-position:-48px -96px;}
+.icon-ok-sign{background-position:-72px -96px;}
+.icon-question-sign{background-position:-96px -96px;}
+.icon-info-sign{background-position:-120px -96px;}
+.icon-screenshot{background-position:-144px -96px;}
+.icon-remove-circle{background-position:-168px -96px;}
+.icon-ok-circle{background-position:-192px -96px;}
+.icon-ban-circle{background-position:-216px -96px;}
+.icon-arrow-left{background-position:-240px -96px;}
+.icon-arrow-right{background-position:-264px -96px;}
+.icon-arrow-up{background-position:-289px -96px;}
+.icon-arrow-down{background-position:-312px -96px;}
+.icon-share-alt{background-position:-336px -96px;}
+.icon-resize-full{background-position:-360px -96px;}
+.icon-resize-small{background-position:-384px -96px;}
+.icon-plus{background-position:-408px -96px;}
+.icon-minus{background-position:-433px -96px;}
+.icon-asterisk{background-position:-456px -96px;}
+.icon-exclamation-sign{background-position:0 -120px;}
+.icon-gift{background-position:-24px -120px;}
+.icon-leaf{background-position:-48px -120px;}
+.icon-fire{background-position:-72px -120px;}
+.icon-eye-open{background-position:-96px -120px;}
+.icon-eye-close{background-position:-120px -120px;}
+.icon-warning-sign{background-position:-144px -120px;}
+.icon-plane{background-position:-168px -120px;}
+.icon-calendar{background-position:-192px -120px;}
+.icon-random{background-position:-216px -120px;width:16px;}
+.icon-comment{background-position:-240px -120px;}
+.icon-magnet{background-position:-264px -120px;}
+.icon-chevron-up{background-position:-288px -120px;}
+.icon-chevron-down{background-position:-313px -119px;}
+.icon-retweet{background-position:-336px -120px;}
+.icon-shopping-cart{background-position:-360px -120px;}
+.icon-folder-close{background-position:-384px -120px;width:16px;}
+.icon-folder-open{background-position:-408px -120px;width:16px;}
+.icon-resize-vertical{background-position:-432px -119px;}
+.icon-resize-horizontal{background-position:-456px -118px;}
+.icon-hdd{background-position:0 -144px;}
+.icon-bullhorn{background-position:-24px -144px;}
+.icon-bell{background-position:-48px -144px;}
+.icon-certificate{background-position:-72px -144px;}
+.icon-thumbs-up{background-position:-96px -144px;}
+.icon-thumbs-down{background-position:-120px -144px;}
+.icon-hand-right{background-position:-144px -144px;}
+.icon-hand-left{background-position:-168px -144px;}
+.icon-hand-up{background-position:-192px -144px;}
+.icon-hand-down{background-position:-216px -144px;}
+.icon-circle-arrow-right{background-position:-240px -144px;}
+.icon-circle-arrow-left{background-position:-264px -144px;}
+.icon-circle-arrow-up{background-position:-288px -144px;}
+.icon-circle-arrow-down{background-position:-312px -144px;}
+.icon-globe{background-position:-336px -144px;}
+.icon-wrench{background-position:-360px -144px;}
+.icon-tasks{background-position:-384px -144px;}
+.icon-filter{background-position:-408px -144px;}
+.icon-briefcase{background-position:-432px -144px;}
+.icon-fullscreen{background-position:-456px -144px;}
+.btn-group{position:relative;display:inline-block;*display:inline;*zoom:1;font-size:0;vertical-align:middle;white-space:nowrap;*margin-left:.3em;}.btn-group:first-child{*margin-left:0;}
+.btn-group+.btn-group{margin-left:5px;}
+.btn-toolbar{font-size:0;margin-top:10px;margin-bottom:10px;}.btn-toolbar>.btn+.btn,.btn-toolbar>.btn-group+.btn,.btn-toolbar>.btn+.btn-group{margin-left:5px;}
+.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.btn-group>.btn+.btn{margin-left:-1px;}
+.btn-group>.btn,.btn-group>.dropdown-menu,.btn-group>.popover{font-size:14px;}
+.btn-group>.btn-mini{font-size:10.5px;}
+.btn-group>.btn-small{font-size:11.9px;}
+.btn-group>.btn-large{font-size:17.5px;}
+.btn-group>.btn:first-child{margin-left:0;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;}
+.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;}
+.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-top-left-radius:6px;-moz-border-radius-topleft:6px;border-top-left-radius:6px;-webkit-border-bottom-left-radius:6px;-moz-border-radius-bottomleft:6px;border-bottom-left-radius:6px;}
+.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;-moz-border-radius-topright:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;-moz-border-radius-bottomright:6px;border-bottom-right-radius:6px;}
+.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2;}
+.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0;}
+.btn-group>.btn+.dropdown-toggle{padding-left:8px;padding-right:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);*padding-top:5px;*padding-bottom:5px;}
+.btn-group>.btn-mini+.dropdown-toggle{padding-left:5px;padding-right:5px;*padding-top:2px;*padding-bottom:2px;}
+.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px;}
+.btn-group>.btn-large+.dropdown-toggle{padding-left:12px;padding-right:12px;*padding-top:7px;*padding-bottom:7px;}
+.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);}
+.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6;}
+.btn-group.open .btn-primary.dropdown-toggle{background-color:#0044cc;}
+.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406;}
+.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f;}
+.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351;}
+.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4;}
+.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222222;}
+.btn .caret{margin-top:8px;margin-left:0;}
+.btn-large .caret{margin-top:6px;}
+.btn-large .caret{border-left-width:5px;border-right-width:5px;border-top-width:5px;}
+.btn-mini .caret,.btn-small .caret{margin-top:8px;}
+.dropup .btn-large .caret{border-bottom-width:5px;}
+.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;}
+.btn-group-vertical{display:inline-block;*display:inline;*zoom:1;}
+.btn-group-vertical>.btn{display:block;float:none;max-width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.btn-group-vertical>.btn+.btn{margin-left:0;margin-top:-1px;}
+.btn-group-vertical>.btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;}
+.btn-group-vertical>.btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;}
+.btn-group-vertical>.btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0;}
+.btn-group-vertical>.btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;}
+.nav{margin-left:0;margin-bottom:20px;list-style:none;}
+.nav>li>a{display:block;}
+.nav>li>a:hover,.nav>li>a:focus{text-decoration:none;background-color:#eeeeee;}
+.nav>li>a>img{max-width:none;}
+.nav>.pull-right{float:right;}
+.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999999;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);text-transform:uppercase;}
+.nav li+.nav-header{margin-top:9px;}
+.nav-list{padding-left:15px;padding-right:15px;margin-bottom:0;}
+.nav-list>li>a,.nav-list .nav-header{margin-left:-15px;margin-right:-15px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);}
+.nav-list>li>a{padding:3px 15px;}
+.nav-list>.active>a,.nav-list>.active>a:hover,.nav-list>.active>a:focus{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.2);background-color:#0088cc;}
+.nav-list [class^="icon-"],.nav-list [class*=" icon-"]{margin-right:2px;}
+.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;}
+.nav-tabs,.nav-pills{*zoom:1;}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;content:"";line-height:0;}
+.nav-tabs:after,.nav-pills:after{clear:both;}
+.nav-tabs>li,.nav-pills>li{float:left;}
+.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px;}
+.nav-tabs{border-bottom:1px solid #ddd;}
+.nav-tabs>li{margin-bottom:-1px;}
+.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;}.nav-tabs>li>a:hover,.nav-tabs>li>a:focus{border-color:#eeeeee #eeeeee #dddddd;}
+.nav-tabs>.active>a,.nav-tabs>.active>a:hover,.nav-tabs>.active>a:focus{color:#555555;background-color:#ffffff;border:1px solid #ddd;border-bottom-color:transparent;cursor:default;}
+.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px;}
+.nav-pills>.active>a,.nav-pills>.active>a:hover,.nav-pills>.active>a:focus{color:#ffffff;background-color:#0088cc;}
+.nav-stacked>li{float:none;}
+.nav-stacked>li>a{margin-right:0;}
+.nav-tabs.nav-stacked{border-bottom:0;}
+.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;}
+.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;}
+.nav-tabs.nav-stacked>li>a:hover,.nav-tabs.nav-stacked>li>a:focus{border-color:#ddd;z-index:2;}
+.nav-pills.nav-stacked>li>a{margin-bottom:3px;}
+.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px;}
+.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;}
+.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}
+.nav .dropdown-toggle .caret{border-top-color:#0088cc;border-bottom-color:#0088cc;margin-top:6px;}
+.nav .dropdown-toggle:hover .caret,.nav .dropdown-toggle:focus .caret{border-top-color:#005580;border-bottom-color:#005580;}
+.nav-tabs .dropdown-toggle .caret{margin-top:8px;}
+.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff;}
+.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555555;border-bottom-color:#555555;}
+.nav>.dropdown.active>a:hover,.nav>.dropdown.active>a:focus{cursor:pointer;}
+.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover,.nav>li.dropdown.open.active>a:focus{color:#ffffff;background-color:#999999;border-color:#999999;}
+.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret,.nav li.dropdown.open a:focus .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;opacity:1;filter:alpha(opacity=100);}
+.tabs-stacked .open>a:hover,.tabs-stacked .open>a:focus{border-color:#999999;}
+.tabbable{*zoom:1;}.tabbable:before,.tabbable:after{display:table;content:"";line-height:0;}
+.tabbable:after{clear:both;}
+.tab-content{overflow:auto;}
+.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0;}
+.tab-content>.tab-pane,.pill-content>.pill-pane{display:none;}
+.tab-content>.active,.pill-content>.active{display:block;}
+.tabs-below>.nav-tabs{border-top:1px solid #ddd;}
+.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0;}
+.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;}.tabs-below>.nav-tabs>li>a:hover,.tabs-below>.nav-tabs>li>a:focus{border-bottom-color:transparent;border-top-color:#ddd;}
+.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover,.tabs-below>.nav-tabs>.active>a:focus{border-color:transparent #ddd #ddd #ddd;}
+.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none;}
+.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px;}
+.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd;}
+.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;}
+.tabs-left>.nav-tabs>li>a:hover,.tabs-left>.nav-tabs>li>a:focus{border-color:#eeeeee #dddddd #eeeeee #eeeeee;}
+.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover,.tabs-left>.nav-tabs .active>a:focus{border-color:#ddd transparent #ddd #ddd;*border-right-color:#ffffff;}
+.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd;}
+.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}
+.tabs-right>.nav-tabs>li>a:hover,.tabs-right>.nav-tabs>li>a:focus{border-color:#eeeeee #eeeeee #eeeeee #dddddd;}
+.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover,.tabs-right>.nav-tabs .active>a:focus{border-color:#ddd #ddd #ddd transparent;*border-left-color:#ffffff;}
+.nav>.disabled>a{color:#999999;}
+.nav>.disabled>a:hover,.nav>.disabled>a:focus{text-decoration:none;background-color:transparent;cursor:default;}
+.navbar{overflow:visible;margin-bottom:20px;*position:relative;*z-index:2;}
+.navbar-inner{min-height:40px;padding-left:20px;padding-right:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top, #ffffff, #f2f2f2);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#f2f2f2));background-image:-webkit-linear-gradient(top, #ffffff, #f2f2f2);background-image:-o-linear-gradient(top, #ffffff, #f2f2f2);background-image:linear-gradient(to bottom, #ffffff, #f2f2f2);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#fff2f2f2', GradientType=0);border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);-moz-box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);*zoom:1;}.navbar-inner:before,.navbar-inner:after{display:table;content:"";line-height:0;}
+.navbar-inner:after{clear:both;}
+.navbar .container{width:auto;}
+.nav-collapse.collapse{height:auto;overflow:visible;}
+.navbar .brand{float:left;display:block;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#777777;text-shadow:0 1px 0 #ffffff;}.navbar .brand:hover,.navbar .brand:focus{text-decoration:none;}
+.navbar-text{margin-bottom:0;line-height:40px;color:#777777;}
+.navbar-link{color:#777777;}.navbar-link:hover,.navbar-link:focus{color:#333333;}
+.navbar .divider-vertical{height:40px;margin:0 9px;border-left:1px solid #f2f2f2;border-right:1px solid #ffffff;}
+.navbar .btn,.navbar .btn-group{margin-top:5px;}
+.navbar .btn-group .btn,.navbar .input-prepend .btn,.navbar .input-append .btn,.navbar .input-prepend .btn-group,.navbar .input-append .btn-group{margin-top:0;}
+.navbar-form{margin-bottom:0;*zoom:1;}.navbar-form:before,.navbar-form:after{display:table;content:"";line-height:0;}
+.navbar-form:after{clear:both;}
+.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px;}
+.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0;}
+.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px;}
+.navbar-form .input-append,.navbar-form .input-prepend{margin-top:5px;white-space:nowrap;}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0;}
+.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0;}.navbar-search .search-query{margin-bottom:0;padding:4px 14px;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;}
+.navbar-static-top{position:static;margin-bottom:0;}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0;}
+.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{border-width:0 0 1px;}
+.navbar-fixed-bottom .navbar-inner{border-width:1px 0 0;}
+.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-left:0;padding-right:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}
+.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px;}
+.navbar-fixed-top{top:0;}
+.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:0 1px 10px rgba(0,0,0,.1);-moz-box-shadow:0 1px 10px rgba(0,0,0,.1);box-shadow:0 1px 10px rgba(0,0,0,.1);}
+.navbar-fixed-bottom{bottom:0;}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:0 -1px 10px rgba(0,0,0,.1);-moz-box-shadow:0 -1px 10px rgba(0,0,0,.1);box-shadow:0 -1px 10px rgba(0,0,0,.1);}
+.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0;}
+.navbar .nav.pull-right{float:right;margin-right:0;}
+.navbar .nav>li{float:left;}
+.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#777777;text-decoration:none;text-shadow:0 1px 0 #ffffff;}
+.navbar .nav .dropdown-toggle .caret{margin-top:8px;}
+.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{background-color:transparent;color:#333333;text-decoration:none;}
+.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);-moz-box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);}
+.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-left:5px;margin-right:5px;color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#ededed;background-image:-moz-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#f2f2f2), to(#e5e5e5));background-image:-webkit-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:-o-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:linear-gradient(to bottom, #f2f2f2, #e5e5e5);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2f2f2', endColorstr='#ffe5e5e5', GradientType=0);border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#e5e5e5;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);}.navbar .btn-navbar:hover,.navbar .btn-navbar:focus,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#ffffff;background-color:#e5e5e5;*background-color:#d9d9d9;}
+.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#cccccc \9;}
+.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);-moz-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);}
+.btn-navbar .icon-bar+.icon-bar{margin-top:3px;}
+.navbar .nav>li>.dropdown-menu:before{content:'';display:inline-block;border-left:7px solid transparent;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-bottom-color:rgba(0, 0, 0, 0.2);position:absolute;top:-7px;left:9px;}
+.navbar .nav>li>.dropdown-menu:after{content:'';display:inline-block;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #ffffff;position:absolute;top:-6px;left:10px;}
+.navbar-fixed-bottom .nav>li>.dropdown-menu:before{border-top:7px solid #ccc;border-top-color:rgba(0, 0, 0, 0.2);border-bottom:0;bottom:-7px;top:auto;}
+.navbar-fixed-bottom .nav>li>.dropdown-menu:after{border-top:6px solid #ffffff;border-bottom:0;bottom:-6px;top:auto;}
+.navbar .nav li.dropdown>a:hover .caret,.navbar .nav li.dropdown>a:focus .caret{border-top-color:#333333;border-bottom-color:#333333;}
+.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{background-color:#e5e5e5;color:#555555;}
+.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#777777;border-bottom-color:#777777;}
+.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555555;border-bottom-color:#555555;}
+.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{left:auto;right:0;}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{left:auto;right:12px;}
+.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{left:auto;right:13px;}
+.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{left:auto;right:100%;margin-left:0;margin-right:-1px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px;}
+.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top, #222222, #111111);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#222222), to(#111111));background-image:-webkit-linear-gradient(top, #222222, #111111);background-image:-o-linear-gradient(top, #222222, #111111);background-image:linear-gradient(to bottom, #222222, #111111);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff222222', endColorstr='#ff111111', GradientType=0);border-color:#252525;}
+.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999999;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover,.navbar-inverse .brand:focus,.navbar-inverse .nav>li>a:focus{color:#ffffff;}
+.navbar-inverse .brand{color:#999999;}
+.navbar-inverse .navbar-text{color:#999999;}
+.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{background-color:transparent;color:#ffffff;}
+.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#ffffff;background-color:#111111;}
+.navbar-inverse .navbar-link{color:#999999;}.navbar-inverse .navbar-link:hover,.navbar-inverse .navbar-link:focus{color:#ffffff;}
+.navbar-inverse .divider-vertical{border-left-color:#111111;border-right-color:#222222;}
+.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{background-color:#111111;color:#ffffff;}
+.navbar-inverse .nav li.dropdown>a:hover .caret,.navbar-inverse .nav li.dropdown>a:focus .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;}
+.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999999;border-bottom-color:#999999;}
+.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;}
+.navbar-inverse .navbar-search .search-query{color:#ffffff;background-color:#515151;border-color:#111111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none;}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#cccccc;}
+.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#cccccc;}
+.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#cccccc;}
+.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333333;text-shadow:0 1px 0 #ffffff;background-color:#ffffff;border:0;-webkit-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);-moz-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);box-shadow:0 0 3px rgba(0, 0, 0, 0.15);outline:0;}
+.navbar-inverse .btn-navbar{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#0e0e0e;background-image:-moz-linear-gradient(top, #151515, #040404);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#151515), to(#040404));background-image:-webkit-linear-gradient(top, #151515, #040404);background-image:-o-linear-gradient(top, #151515, #040404);background-image:linear-gradient(to bottom, #151515, #040404);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff151515', endColorstr='#ff040404', GradientType=0);border-color:#040404 #040404 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#040404;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:focus,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#ffffff;background-color:#040404;*background-color:#000000;}
+.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000000 \9;}
+.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}.breadcrumb>li{display:inline-block;*display:inline;*zoom:1;text-shadow:0 1px 0 #ffffff;}.breadcrumb>li>.divider{padding:0 5px;color:#ccc;}
+.breadcrumb>.active{color:#999999;}
+.pagination{margin:20px 0;}
+.pagination ul{display:inline-block;*display:inline;*zoom:1;margin-left:0;margin-bottom:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);-moz-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);}
+.pagination ul>li{display:inline;}
+.pagination ul>li>a,.pagination ul>li>span{float:left;padding:4px 12px;line-height:20px;text-decoration:none;background-color:#ffffff;border:1px solid #dddddd;border-left-width:0;}
+.pagination ul>li>a:hover,.pagination ul>li>a:focus,.pagination ul>.active>a,.pagination ul>.active>span{background-color:#f5f5f5;}
+.pagination ul>.active>a,.pagination ul>.active>span{color:#999999;cursor:default;}
+.pagination ul>.disabled>span,.pagination ul>.disabled>a,.pagination ul>.disabled>a:hover,.pagination ul>.disabled>a:focus{color:#999999;background-color:transparent;cursor:default;}
+.pagination ul>li:first-child>a,.pagination ul>li:first-child>span{border-left-width:1px;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;}
+.pagination ul>li:last-child>a,.pagination ul>li:last-child>span{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;}
+.pagination-centered{text-align:center;}
+.pagination-right{text-align:right;}
+.pagination-large ul>li>a,.pagination-large ul>li>span{padding:11px 19px;font-size:17.5px;}
+.pagination-large ul>li:first-child>a,.pagination-large ul>li:first-child>span{-webkit-border-top-left-radius:6px;-moz-border-radius-topleft:6px;border-top-left-radius:6px;-webkit-border-bottom-left-radius:6px;-moz-border-radius-bottomleft:6px;border-bottom-left-radius:6px;}
+.pagination-large ul>li:last-child>a,.pagination-large ul>li:last-child>span{-webkit-border-top-right-radius:6px;-moz-border-radius-topright:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;-moz-border-radius-bottomright:6px;border-bottom-right-radius:6px;}
+.pagination-mini ul>li:first-child>a,.pagination-small ul>li:first-child>a,.pagination-mini ul>li:first-child>span,.pagination-small ul>li:first-child>span{-webkit-border-top-left-radius:3px;-moz-border-radius-topleft:3px;border-top-left-radius:3px;-webkit-border-bottom-left-radius:3px;-moz-border-radius-bottomleft:3px;border-bottom-left-radius:3px;}
+.pagination-mini ul>li:last-child>a,.pagination-small ul>li:last-child>a,.pagination-mini ul>li:last-child>span,.pagination-small ul>li:last-child>span{-webkit-border-top-right-radius:3px;-moz-border-radius-topright:3px;border-top-right-radius:3px;-webkit-border-bottom-right-radius:3px;-moz-border-radius-bottomright:3px;border-bottom-right-radius:3px;}
+.pagination-small ul>li>a,.pagination-small ul>li>span{padding:2px 10px;font-size:11.9px;}
+.pagination-mini ul>li>a,.pagination-mini ul>li>span{padding:0 6px;font-size:10.5px;}
+.pager{margin:20px 0;list-style:none;text-align:center;*zoom:1;}.pager:before,.pager:after{display:table;content:"";line-height:0;}
+.pager:after{clear:both;}
+.pager li{display:inline;}
+.pager li>a,.pager li>span{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;}
+.pager li>a:hover,.pager li>a:focus{text-decoration:none;background-color:#f5f5f5;}
+.pager .next>a,.pager .next>span{float:right;}
+.pager .previous>a,.pager .previous>span{float:left;}
+.pager .disabled>a,.pager .disabled>a:hover,.pager .disabled>a:focus,.pager .disabled>span{color:#999999;background-color:#fff;cursor:default;}
+.thumbnails{margin-left:-20px;list-style:none;*zoom:1;}.thumbnails:before,.thumbnails:after{display:table;content:"";line-height:0;}
+.thumbnails:after{clear:both;}
+.row-fluid .thumbnails{margin-left:0;}
+.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px;}
+.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);-moz-box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);-webkit-transition:all 0.2s ease-in-out;-moz-transition:all 0.2s ease-in-out;-o-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out;}
+a.thumbnail:hover,a.thumbnail:focus{border-color:#0088cc;-webkit-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);-moz-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);}
+.thumbnail>img{display:block;max-width:100%;margin-left:auto;margin-right:auto;}
+.thumbnail .caption{padding:9px;color:#555555;}
+.alert{padding:8px 35px 8px 14px;margin-bottom:20px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}
+.alert,.alert h4{color:#c09853;}
+.alert h4{margin:0;}
+.alert .close{position:relative;top:-2px;right:-21px;line-height:20px;}
+.alert-success{background-color:#dff0d8;border-color:#d6e9c6;color:#468847;}
+.alert-success h4{color:#468847;}
+.alert-danger,.alert-error{background-color:#f2dede;border-color:#eed3d7;color:#b94a48;}
+.alert-danger h4,.alert-error h4{color:#b94a48;}
+.alert-info{background-color:#d9edf7;border-color:#bce8f1;color:#3a87ad;}
+.alert-info h4{color:#3a87ad;}
+.alert-block{padding-top:14px;padding-bottom:14px;}
+.alert-block>p,.alert-block>ul{margin-bottom:0;}
+.alert-block p+p{margin-top:5px;}
+@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-o-keyframes progress-bar-stripes{from{background-position:0 0;} to{background-position:40px 0;}}@keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}.progress{overflow:hidden;height:20px;margin-bottom:20px;background-color:#f7f7f7;background-image:-moz-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#f5f5f5), to(#f9f9f9));background-image:-webkit-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-o-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:linear-gradient(to bottom, #f5f5f5, #f9f9f9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#fff9f9f9', GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}
+.progress .bar{width:0%;height:100%;color:#ffffff;float:left;font-size:12px;text-align:center;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top, #149bdf, #0480be);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#149bdf), to(#0480be));background-image:-webkit-linear-gradient(top, #149bdf, #0480be);background-image:-o-linear-gradient(top, #149bdf, #0480be);background-image:linear-gradient(to bottom, #149bdf, #0480be);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff149bdf', endColorstr='#ff0480be', GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width 0.6s ease;-moz-transition:width 0.6s ease;-o-transition:width 0.6s ease;transition:width 0.6s ease;}
+.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);}
+.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px;}
+.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite;}
+.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top, #ee5f5b, #c43c35);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#c43c35));background-image:-webkit-linear-gradient(top, #ee5f5b, #c43c35);background-image:-o-linear-gradient(top, #ee5f5b, #c43c35);background-image:linear-gradient(to bottom, #ee5f5b, #c43c35);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b', endColorstr='#ffc43c35', GradientType=0);}
+.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);}
+.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top, #62c462, #57a957);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#57a957));background-image:-webkit-linear-gradient(top, #62c462, #57a957);background-image:-o-linear-gradient(top, #62c462, #57a957);background-image:linear-gradient(to bottom, #62c462, #57a957);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462', endColorstr='#ff57a957', GradientType=0);}
+.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);}
+.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top, #5bc0de, #339bb9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#339bb9));background-image:-webkit-linear-gradient(top, #5bc0de, #339bb9);background-image:-o-linear-gradient(top, #5bc0de, #339bb9);background-image:linear-gradient(to bottom, #5bc0de, #339bb9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff339bb9', GradientType=0);}
+.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);}
+.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(to bottom, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450', endColorstr='#fff89406', GradientType=0);}
+.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);}
+.hero-unit{padding:60px;margin-bottom:30px;font-size:18px;font-weight:200;line-height:30px;color:inherit;background-color:#eeeeee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;color:inherit;letter-spacing:-1px;}
+.hero-unit li{line-height:30px;}
+.media,.media-body{overflow:hidden;*overflow:visible;zoom:1;}
+.media,.media .media{margin-top:15px;}
+.media:first-child{margin-top:0;}
+.media-object{display:block;}
+.media-heading{margin:0 0 5px;}
+.media>.pull-left{margin-right:10px;}
+.media>.pull-right{margin-left:10px;}
+.media-list{margin-left:0;list-style:none;}
+.tooltip{position:absolute;z-index:1030;display:block;visibility:visible;font-size:11px;line-height:1.4;opacity:0;filter:alpha(opacity=0);}.tooltip.in{opacity:0.8;filter:alpha(opacity=80);}
+.tooltip.top{margin-top:-3px;padding:5px 0;}
+.tooltip.right{margin-left:3px;padding:0 5px;}
+.tooltip.bottom{margin-top:3px;padding:5px 0;}
+.tooltip.left{margin-left:-3px;padding:0 5px;}
+.tooltip-inner{max-width:200px;padding:8px;color:#ffffff;text-align:center;text-decoration:none;background-color:#000000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}
+.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid;}
+.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-width:5px 5px 0;border-top-color:#000000;}
+.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-width:5px 5px 5px 0;border-right-color:#000000;}
+.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-width:5px 0 5px 5px;border-left-color:#000000;}
+.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-width:0 5px 5px;border-bottom-color:#000000;}
+.popover{position:absolute;top:0;left:0;z-index:1010;display:none;max-width:276px;padding:1px;text-align:left;background-color:#ffffff;-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-moz-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);white-space:normal;}.popover.top{margin-top:-10px;}
+.popover.right{margin-left:10px;}
+.popover.bottom{margin-top:10px;}
+.popover.left{margin-left:-10px;}
+.popover-title{margin:0;padding:8px 14px;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0;}.popover-title:empty{display:none;}
+.popover-content{padding:9px 14px;}
+.popover .arrow,.popover .arrow:after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid;}
+.popover .arrow{border-width:11px;}
+.popover .arrow:after{border-width:10px;content:"";}
+.popover.top .arrow{left:50%;margin-left:-11px;border-bottom-width:0;border-top-color:#999;border-top-color:rgba(0, 0, 0, 0.25);bottom:-11px;}.popover.top .arrow:after{bottom:1px;margin-left:-10px;border-bottom-width:0;border-top-color:#ffffff;}
+.popover.right .arrow{top:50%;left:-11px;margin-top:-11px;border-left-width:0;border-right-color:#999;border-right-color:rgba(0, 0, 0, 0.25);}.popover.right .arrow:after{left:1px;bottom:-10px;border-left-width:0;border-right-color:#ffffff;}
+.popover.bottom .arrow{left:50%;margin-left:-11px;border-top-width:0;border-bottom-color:#999;border-bottom-color:rgba(0, 0, 0, 0.25);top:-11px;}.popover.bottom .arrow:after{top:1px;margin-left:-10px;border-top-width:0;border-bottom-color:#ffffff;}
+.popover.left .arrow{top:50%;right:-11px;margin-top:-11px;border-right-width:0;border-left-color:#999;border-left-color:rgba(0, 0, 0, 0.25);}.popover.left .arrow:after{right:1px;border-right-width:0;border-left-color:#ffffff;bottom:-10px;}
+.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000000;}.modal-backdrop.fade{opacity:0;}
+.modal-backdrop,.modal-backdrop.fade.in{opacity:0.8;filter:alpha(opacity=80);}
+.modal{position:fixed;top:10%;left:50%;z-index:1050;width:560px;margin-left:-280px;background-color:#ffffff;border:1px solid #999;border:1px solid rgba(0, 0, 0, 0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-moz-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box;outline:none;}.modal.fade{-webkit-transition:opacity .3s linear, top .3s ease-out;-moz-transition:opacity .3s linear, top .3s ease-out;-o-transition:opacity .3s linear, top .3s ease-out;transition:opacity .3s linear, top .3s ease-out;top:-25%;}
+.modal.fade.in{top:10%;}
+.modal-header{padding:9px 15px;border-bottom:1px solid #eee;}.modal-header .close{margin-top:2px;}
+.modal-header h3{margin:0;line-height:30px;}
+.modal-body{position:relative;overflow-y:auto;max-height:400px;padding:15px;}
+.modal-form{margin-bottom:0;}
+.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;-webkit-box-shadow:inset 0 1px 0 #ffffff;-moz-box-shadow:inset 0 1px 0 #ffffff;box-shadow:inset 0 1px 0 #ffffff;*zoom:1;}.modal-footer:before,.modal-footer:after{display:table;content:"";line-height:0;}
+.modal-footer:after{clear:both;}
+.modal-footer .btn+.btn{margin-left:5px;margin-bottom:0;}
+.modal-footer .btn-group .btn+.btn{margin-left:-1px;}
+.modal-footer .btn-block+.btn-block{margin-left:0;}
+.dropup,.dropdown{position:relative;}
+.dropdown-toggle{*margin-bottom:-3px;}
+.dropdown-toggle:active,.open .dropdown-toggle{outline:0;}
+.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000000;border-right:4px solid transparent;border-left:4px solid transparent;content:"";}
+.dropdown .caret{margin-top:8px;margin-left:2px;}
+.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#ffffff;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-moz-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box;}.dropdown-menu.pull-right{right:0;left:auto;}
+.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;}
+.dropdown-menu>li>a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333333;white-space:nowrap;}
+.dropdown-menu>li>a:hover,.dropdown-menu>li>a:focus,.dropdown-submenu:hover>a,.dropdown-submenu:focus>a{text-decoration:none;color:#ffffff;background-color:#0081c2;background-image:-moz-linear-gradient(top, #0088cc, #0077b3);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0077b3));background-image:-webkit-linear-gradient(top, #0088cc, #0077b3);background-image:-o-linear-gradient(top, #0088cc, #0077b3);background-image:linear-gradient(to bottom, #0088cc, #0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0077b3', GradientType=0);}
+.dropdown-menu>.active>a,.dropdown-menu>.active>a:hover,.dropdown-menu>.active>a:focus{color:#ffffff;text-decoration:none;outline:0;background-color:#0081c2;background-image:-moz-linear-gradient(top, #0088cc, #0077b3);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0077b3));background-image:-webkit-linear-gradient(top, #0088cc, #0077b3);background-image:-o-linear-gradient(top, #0088cc, #0077b3);background-image:linear-gradient(to bottom, #0088cc, #0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0077b3', GradientType=0);}
+.dropdown-menu>.disabled>a,.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{color:#999999;}
+.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{text-decoration:none;background-color:transparent;background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);cursor:default;}
+.open{*z-index:1000;}.open>.dropdown-menu{display:block;}
+.dropdown-backdrop{position:fixed;left:0;right:0;bottom:0;top:0;z-index:990;}
+.pull-right>.dropdown-menu{right:0;left:auto;}
+.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000000;content:"";}
+.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px;}
+.dropdown-submenu{position:relative;}
+.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px;}
+.dropdown-submenu:hover>.dropdown-menu{display:block;}
+.dropup .dropdown-submenu>.dropdown-menu{top:auto;bottom:0;margin-top:0;margin-bottom:-2px;-webkit-border-radius:5px 5px 5px 0;-moz-border-radius:5px 5px 5px 0;border-radius:5px 5px 5px 0;}
+.dropdown-submenu>a:after{display:block;content:" ";float:right;width:0;height:0;border-color:transparent;border-style:solid;border-width:5px 0 5px 5px;border-left-color:#cccccc;margin-top:5px;margin-right:-10px;}
+.dropdown-submenu:hover>a:after{border-left-color:#ffffff;}
+.dropdown-submenu.pull-left{float:none;}.dropdown-submenu.pull-left>.dropdown-menu{left:-100%;margin-left:10px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px;}
+.dropdown .dropdown-menu .nav-header{padding-left:20px;padding-right:20px;}
+.typeahead{z-index:1051;margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}
+.accordion{margin-bottom:20px;}
+.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}
+.accordion-heading{border-bottom:0;}
+.accordion-heading .accordion-toggle{display:block;padding:8px 15px;}
+.accordion-toggle{cursor:pointer;}
+.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5;}
+.carousel{position:relative;margin-bottom:20px;line-height:1;}
+.carousel-inner{overflow:hidden;width:100%;position:relative;}
+.carousel-inner>.item{display:none;position:relative;-webkit-transition:0.6s ease-in-out left;-moz-transition:0.6s ease-in-out left;-o-transition:0.6s ease-in-out left;transition:0.6s ease-in-out left;}.carousel-inner>.item>img,.carousel-inner>.item>a>img{display:block;line-height:1;}
+.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block;}
+.carousel-inner>.active{left:0;}
+.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%;}
+.carousel-inner>.next{left:100%;}
+.carousel-inner>.prev{left:-100%;}
+.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0;}
+.carousel-inner>.active.left{left:-100%;}
+.carousel-inner>.active.right{left:100%;}
+.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#ffffff;text-align:center;background:#222222;border:3px solid #ffffff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:0.5;filter:alpha(opacity=50);}.carousel-control.right{left:auto;right:15px;}
+.carousel-control:hover,.carousel-control:focus{color:#ffffff;text-decoration:none;opacity:0.9;filter:alpha(opacity=90);}
+.carousel-indicators{position:absolute;top:15px;right:15px;z-index:5;margin:0;list-style:none;}.carousel-indicators li{display:block;float:left;width:10px;height:10px;margin-left:5px;text-indent:-999px;background-color:#ccc;background-color:rgba(255, 255, 255, 0.25);border-radius:5px;}
+.carousel-indicators .active{background-color:#fff;}
+.carousel-caption{position:absolute;left:0;right:0;bottom:0;padding:15px;background:#333333;background:rgba(0, 0, 0, 0.75);}
+.carousel-caption h4,.carousel-caption p{color:#ffffff;line-height:20px;}
+.carousel-caption h4{margin:0 0 5px;}
+.carousel-caption p{margin-bottom:0;}
+.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);}.well blockquote{border-color:#ddd;border-color:rgba(0, 0, 0, 0.15);}
+.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}
+.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;}
+.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000000;text-shadow:0 1px 0 #ffffff;opacity:0.2;filter:alpha(opacity=20);}.close:hover,.close:focus{color:#000000;text-decoration:none;cursor:pointer;opacity:0.4;filter:alpha(opacity=40);}
+button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none;}
+.pull-right{float:right;}
+.pull-left{float:left;}
+.hide{display:none;}
+.show{display:block;}
+.invisible{visibility:hidden;}
+.affix{position:fixed;}
+.fade{opacity:0;-webkit-transition:opacity 0.15s linear;-moz-transition:opacity 0.15s linear;-o-transition:opacity 0.15s linear;transition:opacity 0.15s linear;}.fade.in{opacity:1;}
+.collapse{position:relative;height:0;overflow:hidden;-webkit-transition:height 0.35s ease;-moz-transition:height 0.35s ease;-o-transition:height 0.35s ease;transition:height 0.35s ease;}.collapse.in{height:auto;}
+@-ms-viewport{width:device-width;}.hidden{display:none;visibility:hidden;}
+.visible-phone{display:none !important;}
+.visible-tablet{display:none !important;}
+.hidden-desktop{display:none !important;}
+.visible-desktop{display:inherit !important;}
+@media (min-width:768px) and (max-width:979px){.hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important ;} .visible-tablet{display:inherit !important;} .hidden-tablet{display:none !important;}}@media (max-width:767px){.hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important;} .visible-phone{display:inherit !important;} .hidden-phone{display:none !important;}}.visible-print{display:none !important;}
+@media print{.visible-print{display:inherit !important;} .hidden-print{display:none !important;}}@media (max-width:767px){body{padding-left:20px;padding-right:20px;} .navbar-fixed-top,.navbar-fixed-bottom,.navbar-static-top{margin-left:-20px;margin-right:-20px;} .container-fluid{padding:0;} .dl-horizontal dt{float:none;clear:none;width:auto;text-align:left;} .dl-horizontal dd{margin-left:0;} .container{width:auto;} .row-fluid{width:100%;} .row,.thumbnails{margin-left:0;} .thumbnails>li{float:none;margin-left:0;} [class*="span"],.uneditable-input[class*="span"],.row-fluid [class*="span"]{float:none;display:block;width:100%;margin-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .span12,.row-fluid .span12{width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .row-fluid [class*="offset"]:first-child{margin-left:0;} .input-large,.input-xlarge,.input-xxlarge,input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .input-prepend input,.input-append input,.input-prepend input[class*="span"],.input-append input[class*="span"]{display:inline-block;width:auto;} .controls-row [class*="span"]+[class*="span"]{margin-left:0;} .modal{position:fixed;top:20px;left:20px;right:20px;width:auto;margin:0;}.modal.fade{top:-100px;} .modal.fade.in{top:20px;}}@media (max-width:480px){.nav-collapse{-webkit-transform:translate3d(0, 0, 0);} .page-header h1 small{display:block;line-height:20px;} input[type="checkbox"],input[type="radio"]{border:1px solid #ccc;} .form-horizontal .control-label{float:none;width:auto;padding-top:0;text-align:left;} .form-horizontal .controls{margin-left:0;} .form-horizontal .control-list{padding-top:0;} .form-horizontal .form-actions{padding-left:10px;padding-right:10px;} .media .pull-left,.media .pull-right{float:none;display:block;margin-bottom:10px;} .media-object{margin-right:0;margin-left:0;} .modal{top:10px;left:10px;right:10px;} .modal-header .close{padding:10px;margin:-10px;} .carousel-caption{position:static;}}@media (min-width:768px) and (max-width:979px){.row{margin-left:-20px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;} .row:after{clear:both;} [class*="span"]{float:left;min-height:1px;margin-left:20px;} .container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:724px;} .span12{width:724px;} .span11{width:662px;} .span10{width:600px;} .span9{width:538px;} .span8{width:476px;} .span7{width:414px;} .span6{width:352px;} .span5{width:290px;} .span4{width:228px;} .span3{width:166px;} .span2{width:104px;} .span1{width:42px;} .offset12{margin-left:764px;} .offset11{margin-left:702px;} .offset10{margin-left:640px;} .offset9{margin-left:578px;} .offset8{margin-left:516px;} .offset7{margin-left:454px;} .offset6{margin-left:392px;} .offset5{margin-left:330px;} .offset4{margin-left:268px;} .offset3{margin-left:206px;} .offset2{margin-left:144px;} .offset1{margin-left:82px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.7624309392265194%;*margin-left:2.709239449864817%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.7624309392265194%;} .row-fluid .span12{width:100%;*width:99.94680851063829%;} .row-fluid .span11{width:91.43646408839778%;*width:91.38327259903608%;} .row-fluid .span10{width:82.87292817679558%;*width:82.81973668743387%;} .row-fluid .span9{width:74.30939226519337%;*width:74.25620077583166%;} .row-fluid .span8{width:65.74585635359117%;*width:65.69266486422946%;} .row-fluid .span7{width:57.18232044198895%;*width:57.12912895262725%;} .row-fluid .span6{width:48.61878453038674%;*width:48.56559304102504%;} .row-fluid .span5{width:40.05524861878453%;*width:40.00205712942283%;} .row-fluid .span4{width:31.491712707182323%;*width:31.43852121782062%;} .row-fluid .span3{width:22.92817679558011%;*width:22.87498530621841%;} .row-fluid .span2{width:14.3646408839779%;*width:14.311449394616199%;} .row-fluid .span1{width:5.801104972375691%;*width:5.747913483013988%;} .row-fluid .offset12{margin-left:105.52486187845304%;*margin-left:105.41847889972962%;} .row-fluid .offset12:first-child{margin-left:102.76243093922652%;*margin-left:102.6560479605031%;} .row-fluid .offset11{margin-left:96.96132596685082%;*margin-left:96.8549429881274%;} .row-fluid .offset11:first-child{margin-left:94.1988950276243%;*margin-left:94.09251204890089%;} .row-fluid .offset10{margin-left:88.39779005524862%;*margin-left:88.2914070765252%;} .row-fluid .offset10:first-child{margin-left:85.6353591160221%;*margin-left:85.52897613729868%;} .row-fluid .offset9{margin-left:79.8342541436464%;*margin-left:79.72787116492299%;} .row-fluid .offset9:first-child{margin-left:77.07182320441989%;*margin-left:76.96544022569647%;} .row-fluid .offset8{margin-left:71.2707182320442%;*margin-left:71.16433525332079%;} .row-fluid .offset8:first-child{margin-left:68.50828729281768%;*margin-left:68.40190431409427%;} .row-fluid .offset7{margin-left:62.70718232044199%;*margin-left:62.600799341718584%;} .row-fluid .offset7:first-child{margin-left:59.94475138121547%;*margin-left:59.838368402492065%;} .row-fluid .offset6{margin-left:54.14364640883978%;*margin-left:54.037263430116376%;} .row-fluid .offset6:first-child{margin-left:51.38121546961326%;*margin-left:51.27483249088986%;} .row-fluid .offset5{margin-left:45.58011049723757%;*margin-left:45.47372751851417%;} .row-fluid .offset5:first-child{margin-left:42.81767955801105%;*margin-left:42.71129657928765%;} .row-fluid .offset4{margin-left:37.01657458563536%;*margin-left:36.91019160691196%;} .row-fluid .offset4:first-child{margin-left:34.25414364640884%;*margin-left:34.14776066768544%;} .row-fluid .offset3{margin-left:28.45303867403315%;*margin-left:28.346655695309746%;} .row-fluid .offset3:first-child{margin-left:25.69060773480663%;*margin-left:25.584224756083227%;} .row-fluid .offset2{margin-left:19.88950276243094%;*margin-left:19.783119783707537%;} .row-fluid .offset2:first-child{margin-left:17.12707182320442%;*margin-left:17.02068884448102%;} .row-fluid .offset1{margin-left:11.32596685082873%;*margin-left:11.219583872105325%;} .row-fluid .offset1:first-child{margin-left:8.56353591160221%;*margin-left:8.457152932878806%;} input,textarea,.uneditable-input{margin-left:0;} .controls-row [class*="span"]+[class*="span"]{margin-left:20px;} input.span12,textarea.span12,.uneditable-input.span12{width:710px;} input.span11,textarea.span11,.uneditable-input.span11{width:648px;} input.span10,textarea.span10,.uneditable-input.span10{width:586px;} input.span9,textarea.span9,.uneditable-input.span9{width:524px;} input.span8,textarea.span8,.uneditable-input.span8{width:462px;} input.span7,textarea.span7,.uneditable-input.span7{width:400px;} input.span6,textarea.span6,.uneditable-input.span6{width:338px;} input.span5,textarea.span5,.uneditable-input.span5{width:276px;} input.span4,textarea.span4,.uneditable-input.span4{width:214px;} input.span3,textarea.span3,.uneditable-input.span3{width:152px;} input.span2,textarea.span2,.uneditable-input.span2{width:90px;} input.span1,textarea.span1,.uneditable-input.span1{width:28px;}}@media (min-width:1200px){.row{margin-left:-30px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;} .row:after{clear:both;} [class*="span"]{float:left;min-height:1px;margin-left:30px;} .container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:1170px;} .span12{width:1170px;} .span11{width:1070px;} .span10{width:970px;} .span9{width:870px;} .span8{width:770px;} .span7{width:670px;} .span6{width:570px;} .span5{width:470px;} .span4{width:370px;} .span3{width:270px;} .span2{width:170px;} .span1{width:70px;} .offset12{margin-left:1230px;} .offset11{margin-left:1130px;} .offset10{margin-left:1030px;} .offset9{margin-left:930px;} .offset8{margin-left:830px;} .offset7{margin-left:730px;} .offset6{margin-left:630px;} .offset5{margin-left:530px;} .offset4{margin-left:430px;} .offset3{margin-left:330px;} .offset2{margin-left:230px;} .offset1{margin-left:130px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.564102564102564%;*margin-left:2.5109110747408616%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.564102564102564%;} .row-fluid .span12{width:100%;*width:99.94680851063829%;} .row-fluid .span11{width:91.45299145299145%;*width:91.39979996362975%;} .row-fluid .span10{width:82.90598290598291%;*width:82.8527914166212%;} .row-fluid .span9{width:74.35897435897436%;*width:74.30578286961266%;} .row-fluid .span8{width:65.81196581196582%;*width:65.75877432260411%;} .row-fluid .span7{width:57.26495726495726%;*width:57.21176577559556%;} .row-fluid .span6{width:48.717948717948715%;*width:48.664757228587014%;} .row-fluid .span5{width:40.17094017094017%;*width:40.11774868157847%;} .row-fluid .span4{width:31.623931623931625%;*width:31.570740134569924%;} .row-fluid .span3{width:23.076923076923077%;*width:23.023731587561375%;} .row-fluid .span2{width:14.52991452991453%;*width:14.476723040552828%;} .row-fluid .span1{width:5.982905982905983%;*width:5.929714493544281%;} .row-fluid .offset12{margin-left:105.12820512820512%;*margin-left:105.02182214948171%;} .row-fluid .offset12:first-child{margin-left:102.56410256410257%;*margin-left:102.45771958537915%;} .row-fluid .offset11{margin-left:96.58119658119658%;*margin-left:96.47481360247316%;} .row-fluid .offset11:first-child{margin-left:94.01709401709402%;*margin-left:93.91071103837061%;} .row-fluid .offset10{margin-left:88.03418803418803%;*margin-left:87.92780505546462%;} .row-fluid .offset10:first-child{margin-left:85.47008547008548%;*margin-left:85.36370249136206%;} .row-fluid .offset9{margin-left:79.48717948717949%;*margin-left:79.38079650845607%;} .row-fluid .offset9:first-child{margin-left:76.92307692307693%;*margin-left:76.81669394435352%;} .row-fluid .offset8{margin-left:70.94017094017094%;*margin-left:70.83378796144753%;} .row-fluid .offset8:first-child{margin-left:68.37606837606839%;*margin-left:68.26968539734497%;} .row-fluid .offset7{margin-left:62.393162393162385%;*margin-left:62.28677941443899%;} .row-fluid .offset7:first-child{margin-left:59.82905982905982%;*margin-left:59.72267685033642%;} .row-fluid .offset6{margin-left:53.84615384615384%;*margin-left:53.739770867430444%;} .row-fluid .offset6:first-child{margin-left:51.28205128205128%;*margin-left:51.175668303327875%;} .row-fluid .offset5{margin-left:45.299145299145295%;*margin-left:45.1927623204219%;} .row-fluid .offset5:first-child{margin-left:42.73504273504273%;*margin-left:42.62865975631933%;} .row-fluid .offset4{margin-left:36.75213675213675%;*margin-left:36.645753773413354%;} .row-fluid .offset4:first-child{margin-left:34.18803418803419%;*margin-left:34.081651209310785%;} .row-fluid .offset3{margin-left:28.205128205128204%;*margin-left:28.0987452264048%;} .row-fluid .offset3:first-child{margin-left:25.641025641025642%;*margin-left:25.53464266230224%;} .row-fluid .offset2{margin-left:19.65811965811966%;*margin-left:19.551736679396257%;} .row-fluid .offset2:first-child{margin-left:17.094017094017094%;*margin-left:16.98763411529369%;} .row-fluid .offset1{margin-left:11.11111111111111%;*margin-left:11.004728132387708%;} .row-fluid .offset1:first-child{margin-left:8.547008547008547%;*margin-left:8.440625568285142%;} input,textarea,.uneditable-input{margin-left:0;} .controls-row [class*="span"]+[class*="span"]{margin-left:30px;} input.span12,textarea.span12,.uneditable-input.span12{width:1156px;} input.span11,textarea.span11,.uneditable-input.span11{width:1056px;} input.span10,textarea.span10,.uneditable-input.span10{width:956px;} input.span9,textarea.span9,.uneditable-input.span9{width:856px;} input.span8,textarea.span8,.uneditable-input.span8{width:756px;} input.span7,textarea.span7,.uneditable-input.span7{width:656px;} input.span6,textarea.span6,.uneditable-input.span6{width:556px;} input.span5,textarea.span5,.uneditable-input.span5{width:456px;} input.span4,textarea.span4,.uneditable-input.span4{width:356px;} input.span3,textarea.span3,.uneditable-input.span3{width:256px;} input.span2,textarea.span2,.uneditable-input.span2{width:156px;} input.span1,textarea.span1,.uneditable-input.span1{width:56px;} .thumbnails{margin-left:-30px;} .thumbnails>li{margin-left:30px;} .row-fluid .thumbnails{margin-left:0;}}@media (max-width:979px){body{padding-top:0;} .navbar-fixed-top,.navbar-fixed-bottom{position:static;} .navbar-fixed-top{margin-bottom:20px;} .navbar-fixed-bottom{margin-top:20px;} .navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding:5px;} .navbar .container{width:auto;padding:0;} .navbar .brand{padding-left:10px;padding-right:10px;margin:0 0 0 -5px;} .nav-collapse{clear:both;} .nav-collapse .nav{float:none;margin:0 0 10px;} .nav-collapse .nav>li{float:none;} .nav-collapse .nav>li>a{margin-bottom:2px;} .nav-collapse .nav>.divider-vertical{display:none;} .nav-collapse .nav .nav-header{color:#777777;text-shadow:none;} .nav-collapse .nav>li>a,.nav-collapse .dropdown-menu a{padding:9px 15px;font-weight:bold;color:#777777;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} .nav-collapse .btn{padding:4px 10px 4px;font-weight:normal;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} .nav-collapse .dropdown-menu li+li a{margin-bottom:2px;} .nav-collapse .nav>li>a:hover,.nav-collapse .nav>li>a:focus,.nav-collapse .dropdown-menu a:hover,.nav-collapse .dropdown-menu a:focus{background-color:#f2f2f2;} .navbar-inverse .nav-collapse .nav>li>a,.navbar-inverse .nav-collapse .dropdown-menu a{color:#999999;} .navbar-inverse .nav-collapse .nav>li>a:hover,.navbar-inverse .nav-collapse .nav>li>a:focus,.navbar-inverse .nav-collapse .dropdown-menu a:hover,.navbar-inverse .nav-collapse .dropdown-menu a:focus{background-color:#111111;} .nav-collapse.in .btn-group{margin-top:5px;padding:0;} .nav-collapse .dropdown-menu{position:static;top:auto;left:auto;float:none;display:none;max-width:none;margin:0 15px;padding:0;background-color:transparent;border:none;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} .nav-collapse .open>.dropdown-menu{display:block;} .nav-collapse .dropdown-menu:before,.nav-collapse .dropdown-menu:after{display:none;} .nav-collapse .dropdown-menu .divider{display:none;} .nav-collapse .nav>li>.dropdown-menu:before,.nav-collapse .nav>li>.dropdown-menu:after{display:none;} .nav-collapse .navbar-form,.nav-collapse .navbar-search{float:none;padding:10px 15px;margin:10px 0;border-top:1px solid #f2f2f2;border-bottom:1px solid #f2f2f2;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);} .navbar-inverse .nav-collapse .navbar-form,.navbar-inverse .nav-collapse .navbar-search{border-top-color:#111111;border-bottom-color:#111111;} .navbar .nav-collapse .nav.pull-right{float:none;margin-left:0;} .nav-collapse,.nav-collapse.collapse{overflow:hidden;height:0;} .navbar .btn-navbar{display:block;} .navbar-static .navbar-inner{padding-left:10px;padding-right:10px;}}@media (min-width:980px){.nav-collapse.collapse{height:auto !important;overflow:visible !important;}}
diff --git a/core/src/main/resources/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index 7abb9011cc..7abb9011cc 100644
--- a/core/src/main/resources/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
diff --git a/core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png b/core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png
index 6c5f0993c4..6c5f0993c4 100644
--- a/core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png
Binary files differ
diff --git a/core/src/main/resources/spark/ui/static/spark_logo.png b/core/src/main/resources/org/apache/spark/ui/static/spark_logo.png
index 4b18734779..4b18734779 100644
--- a/core/src/main/resources/spark/ui/static/spark_logo.png
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark_logo.png
Binary files differ
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
new file mode 100644
index 0000000000..a600c06c04
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -0,0 +1,63 @@
+.navbar {
+ height: 50px;
+ font-size: 15px;
+ margin-bottom: 15px;
+}
+
+.navbar .navbar-inner {
+ height: 50px;
+}
+
+.navbar .brand {
+ margin-right: 20px;
+ margin-bottom: 0;
+ margin-top: 0;
+ margin-left: 10px;
+ padding: 0;
+}
+
+.navbar .nav > li {
+ height: 50px;
+}
+
+.navbar .nav > li a {
+ height: 30px;
+ line-height: 30px;
+}
+
+.navbar-text {
+ height: 50px;
+ line-height: 50px;
+}
+
+table.sortable thead {
+ cursor: pointer;
+}
+
+.progress {
+ margin-bottom: 0px; position: relative
+}
+
+.progress-completed .bar,
+.progress .bar-completed {
+ background-color: #3EC0FF;
+ background-image: -moz-linear-gradient(top, #44CBFF, #34B0EE);
+ background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#44CBFF), to(#34B0EE));
+ background-image: -webkit-linear-gradient(top, #44CBFF, #34B0EE);
+ background-image: -o-linear-gradient(top, #44CBFF, #34B0EE);
+ background-image: linear-gradient(to bottom, #64CBFF, #54B0EE);
+ background-repeat: repeat-x;
+ filter: progid:dximagetransform.microsoft.gradient(startColorstr='#FF44CBFF', endColorstr='#FF34B0EE', GradientType=0);
+}
+
+.progress-running .bar,
+.progress .bar-running {
+ background-color: #A0DFFF;
+ background-image: -moz-linear-gradient(top, #A4EDFF, #94DDFF);
+ background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#A4EDFF), to(#94DDFF));
+ background-image: -webkit-linear-gradient(top, #A4EDFF, #94DDFF);
+ background-image: -o-linear-gradient(top, #A4EDFF, #94DDFF);
+ background-image: linear-gradient(to bottom, #A4EDFF, #94DDFF);
+ background-repeat: repeat-x;
+ filter: progid:dximagetransform.microsoft.gradient(startColorstr='#FFA4EDFF', endColorstr='#FF94DDFF', GradientType=0);
+}
diff --git a/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css b/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css
deleted file mode 100644
index f4ede63f32..0000000000
--- a/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css
+++ /dev/null
@@ -1,9 +0,0 @@
-/*!
- * Bootstrap Responsive v2.3.2
- *
- * Copyright 2012 Twitter, Inc
- * Licensed under the Apache License v2.0
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Designed and built with all the love in the world @twitter by @mdo and @fat.
- */.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}@-ms-viewport{width:device-width}.hidden{display:none;visibility:hidden}.visible-phone{display:none!important}.visible-tablet{display:none!important}.hidden-desktop{display:none!important}.visible-desktop{display:inherit!important}@media(min-width:768px) and (max-width:979px){.hidden-desktop{display:inherit!important}.visible-desktop{display:none!important}.visible-tablet{display:inherit!important}.hidden-tablet{display:none!important}}@media(max-width:767px){.hidden-desktop{display:inherit!important}.visible-desktop{display:none!important}.visible-phone{display:inherit!important}.hidden-phone{display:none!important}}.visible-print{display:none!important}@media print{.visible-print{display:inherit!important}.hidden-print{display:none!important}}@media(min-width:1200px){.row{margin-left:-30px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:30px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:1170px}.span12{width:1170px}.span11{width:1070px}.span10{width:970px}.span9{width:870px}.span8{width:770px}.span7{width:670px}.span6{width:570px}.span5{width:470px}.span4{width:370px}.span3{width:270px}.span2{width:170px}.span1{width:70px}.offset12{margin-left:1230px}.offset11{margin-left:1130px}.offset10{margin-left:1030px}.offset9{margin-left:930px}.offset8{margin-left:830px}.offset7{margin-left:730px}.offset6{margin-left:630px}.offset5{margin-left:530px}.offset4{margin-left:430px}.offset3{margin-left:330px}.offset2{margin-left:230px}.offset1{margin-left:130px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.564102564102564%;*margin-left:2.5109110747408616%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.564102564102564%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.45299145299145%;*width:91.39979996362975%}.row-fluid .span10{width:82.90598290598291%;*width:82.8527914166212%}.row-fluid .span9{width:74.35897435897436%;*width:74.30578286961266%}.row-fluid .span8{width:65.81196581196582%;*width:65.75877432260411%}.row-fluid .span7{width:57.26495726495726%;*width:57.21176577559556%}.row-fluid .span6{width:48.717948717948715%;*width:48.664757228587014%}.row-fluid .span5{width:40.17094017094017%;*width:40.11774868157847%}.row-fluid .span4{width:31.623931623931625%;*width:31.570740134569924%}.row-fluid .span3{width:23.076923076923077%;*width:23.023731587561375%}.row-fluid .span2{width:14.52991452991453%;*width:14.476723040552828%}.row-fluid .span1{width:5.982905982905983%;*width:5.929714493544281%}.row-fluid .offset12{margin-left:105.12820512820512%;*margin-left:105.02182214948171%}.row-fluid .offset12:first-child{margin-left:102.56410256410257%;*margin-left:102.45771958537915%}.row-fluid .offset11{margin-left:96.58119658119658%;*margin-left:96.47481360247316%}.row-fluid .offset11:first-child{margin-left:94.01709401709402%;*margin-left:93.91071103837061%}.row-fluid .offset10{margin-left:88.03418803418803%;*margin-left:87.92780505546462%}.row-fluid .offset10:first-child{margin-left:85.47008547008548%;*margin-left:85.36370249136206%}.row-fluid .offset9{margin-left:79.48717948717949%;*margin-left:79.38079650845607%}.row-fluid .offset9:first-child{margin-left:76.92307692307693%;*margin-left:76.81669394435352%}.row-fluid .offset8{margin-left:70.94017094017094%;*margin-left:70.83378796144753%}.row-fluid .offset8:first-child{margin-left:68.37606837606839%;*margin-left:68.26968539734497%}.row-fluid .offset7{margin-left:62.393162393162385%;*margin-left:62.28677941443899%}.row-fluid .offset7:first-child{margin-left:59.82905982905982%;*margin-left:59.72267685033642%}.row-fluid .offset6{margin-left:53.84615384615384%;*margin-left:53.739770867430444%}.row-fluid .offset6:first-child{margin-left:51.28205128205128%;*margin-left:51.175668303327875%}.row-fluid .offset5{margin-left:45.299145299145295%;*margin-left:45.1927623204219%}.row-fluid .offset5:first-child{margin-left:42.73504273504273%;*margin-left:42.62865975631933%}.row-fluid .offset4{margin-left:36.75213675213675%;*margin-left:36.645753773413354%}.row-fluid .offset4:first-child{margin-left:34.18803418803419%;*margin-left:34.081651209310785%}.row-fluid .offset3{margin-left:28.205128205128204%;*margin-left:28.0987452264048%}.row-fluid .offset3:first-child{margin-left:25.641025641025642%;*margin-left:25.53464266230224%}.row-fluid .offset2{margin-left:19.65811965811966%;*margin-left:19.551736679396257%}.row-fluid .offset2:first-child{margin-left:17.094017094017094%;*margin-left:16.98763411529369%}.row-fluid .offset1{margin-left:11.11111111111111%;*margin-left:11.004728132387708%}.row-fluid .offset1:first-child{margin-left:8.547008547008547%;*margin-left:8.440625568285142%}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:30px}input.span12,textarea.span12,.uneditable-input.span12{width:1156px}input.span11,textarea.span11,.uneditable-input.span11{width:1056px}input.span10,textarea.span10,.uneditable-input.span10{width:956px}input.span9,textarea.span9,.uneditable-input.span9{width:856px}input.span8,textarea.span8,.uneditable-input.span8{width:756px}input.span7,textarea.span7,.uneditable-input.span7{width:656px}input.span6,textarea.span6,.uneditable-input.span6{width:556px}input.span5,textarea.span5,.uneditable-input.span5{width:456px}input.span4,textarea.span4,.uneditable-input.span4{width:356px}input.span3,textarea.span3,.uneditable-input.span3{width:256px}input.span2,textarea.span2,.uneditable-input.span2{width:156px}input.span1,textarea.span1,.uneditable-input.span1{width:56px}.thumbnails{margin-left:-30px}.thumbnails>li{margin-left:30px}.row-fluid .thumbnails{margin-left:0}}@media(min-width:768px) and (max-width:979px){.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:724px}.span12{width:724px}.span11{width:662px}.span10{width:600px}.span9{width:538px}.span8{width:476px}.span7{width:414px}.span6{width:352px}.span5{width:290px}.span4{width:228px}.span3{width:166px}.span2{width:104px}.span1{width:42px}.offset12{margin-left:764px}.offset11{margin-left:702px}.offset10{margin-left:640px}.offset9{margin-left:578px}.offset8{margin-left:516px}.offset7{margin-left:454px}.offset6{margin-left:392px}.offset5{margin-left:330px}.offset4{margin-left:268px}.offset3{margin-left:206px}.offset2{margin-left:144px}.offset1{margin-left:82px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.7624309392265194%;*margin-left:2.709239449864817%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.7624309392265194%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.43646408839778%;*width:91.38327259903608%}.row-fluid .span10{width:82.87292817679558%;*width:82.81973668743387%}.row-fluid .span9{width:74.30939226519337%;*width:74.25620077583166%}.row-fluid .span8{width:65.74585635359117%;*width:65.69266486422946%}.row-fluid .span7{width:57.18232044198895%;*width:57.12912895262725%}.row-fluid .span6{width:48.61878453038674%;*width:48.56559304102504%}.row-fluid .span5{width:40.05524861878453%;*width:40.00205712942283%}.row-fluid .span4{width:31.491712707182323%;*width:31.43852121782062%}.row-fluid .span3{width:22.92817679558011%;*width:22.87498530621841%}.row-fluid .span2{width:14.3646408839779%;*width:14.311449394616199%}.row-fluid .span1{width:5.801104972375691%;*width:5.747913483013988%}.row-fluid .offset12{margin-left:105.52486187845304%;*margin-left:105.41847889972962%}.row-fluid .offset12:first-child{margin-left:102.76243093922652%;*margin-left:102.6560479605031%}.row-fluid .offset11{margin-left:96.96132596685082%;*margin-left:96.8549429881274%}.row-fluid .offset11:first-child{margin-left:94.1988950276243%;*margin-left:94.09251204890089%}.row-fluid .offset10{margin-left:88.39779005524862%;*margin-left:88.2914070765252%}.row-fluid .offset10:first-child{margin-left:85.6353591160221%;*margin-left:85.52897613729868%}.row-fluid .offset9{margin-left:79.8342541436464%;*margin-left:79.72787116492299%}.row-fluid .offset9:first-child{margin-left:77.07182320441989%;*margin-left:76.96544022569647%}.row-fluid .offset8{margin-left:71.2707182320442%;*margin-left:71.16433525332079%}.row-fluid .offset8:first-child{margin-left:68.50828729281768%;*margin-left:68.40190431409427%}.row-fluid .offset7{margin-left:62.70718232044199%;*margin-left:62.600799341718584%}.row-fluid .offset7:first-child{margin-left:59.94475138121547%;*margin-left:59.838368402492065%}.row-fluid .offset6{margin-left:54.14364640883978%;*margin-left:54.037263430116376%}.row-fluid .offset6:first-child{margin-left:51.38121546961326%;*margin-left:51.27483249088986%}.row-fluid .offset5{margin-left:45.58011049723757%;*margin-left:45.47372751851417%}.row-fluid .offset5:first-child{margin-left:42.81767955801105%;*margin-left:42.71129657928765%}.row-fluid .offset4{margin-left:37.01657458563536%;*margin-left:36.91019160691196%}.row-fluid .offset4:first-child{margin-left:34.25414364640884%;*margin-left:34.14776066768544%}.row-fluid .offset3{margin-left:28.45303867403315%;*margin-left:28.346655695309746%}.row-fluid .offset3:first-child{margin-left:25.69060773480663%;*margin-left:25.584224756083227%}.row-fluid .offset2{margin-left:19.88950276243094%;*margin-left:19.783119783707537%}.row-fluid .offset2:first-child{margin-left:17.12707182320442%;*margin-left:17.02068884448102%}.row-fluid .offset1{margin-left:11.32596685082873%;*margin-left:11.219583872105325%}.row-fluid .offset1:first-child{margin-left:8.56353591160221%;*margin-left:8.457152932878806%}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:710px}input.span11,textarea.span11,.uneditable-input.span11{width:648px}input.span10,textarea.span10,.uneditable-input.span10{width:586px}input.span9,textarea.span9,.uneditable-input.span9{width:524px}input.span8,textarea.span8,.uneditable-input.span8{width:462px}input.span7,textarea.span7,.uneditable-input.span7{width:400px}input.span6,textarea.span6,.uneditable-input.span6{width:338px}input.span5,textarea.span5,.uneditable-input.span5{width:276px}input.span4,textarea.span4,.uneditable-input.span4{width:214px}input.span3,textarea.span3,.uneditable-input.span3{width:152px}input.span2,textarea.span2,.uneditable-input.span2{width:90px}input.span1,textarea.span1,.uneditable-input.span1{width:28px}}@media(max-width:767px){body{padding-right:20px;padding-left:20px}.navbar-fixed-top,.navbar-fixed-bottom,.navbar-static-top{margin-right:-20px;margin-left:-20px}.container-fluid{padding:0}.dl-horizontal dt{float:none;width:auto;clear:none;text-align:left}.dl-horizontal dd{margin-left:0}.container{width:auto}.row-fluid{width:100%}.row,.thumbnails{margin-left:0}.thumbnails>li{float:none;margin-left:0}[class*="span"],.uneditable-input[class*="span"],.row-fluid [class*="span"]{display:block;float:none;width:100%;margin-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.span12,.row-fluid .span12{width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="offset"]:first-child{margin-left:0}.input-large,.input-xlarge,.input-xxlarge,input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.input-prepend input,.input-append input,.input-prepend input[class*="span"],.input-append input[class*="span"]{display:inline-block;width:auto}.controls-row [class*="span"]+[class*="span"]{margin-left:0}.modal{position:fixed;top:20px;right:20px;left:20px;width:auto;margin:0}.modal.fade{top:-100px}.modal.fade.in{top:20px}}@media(max-width:480px){.nav-collapse{-webkit-transform:translate3d(0,0,0)}.page-header h1 small{display:block;line-height:20px}input[type="checkbox"],input[type="radio"]{border:1px solid #ccc}.form-horizontal .control-label{float:none;width:auto;padding-top:0;text-align:left}.form-horizontal .controls{margin-left:0}.form-horizontal .control-list{padding-top:0}.form-horizontal .form-actions{padding-right:10px;padding-left:10px}.media .pull-left,.media .pull-right{display:block;float:none;margin-bottom:10px}.media-object{margin-right:0;margin-left:0}.modal{top:10px;right:10px;left:10px}.modal-header .close{padding:10px;margin:-10px}.carousel-caption{position:static}}@media(max-width:979px){body{padding-top:0}.navbar-fixed-top,.navbar-fixed-bottom{position:static}.navbar-fixed-top{margin-bottom:20px}.navbar-fixed-bottom{margin-top:20px}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding:5px}.navbar .container{width:auto;padding:0}.navbar .brand{padding-right:10px;padding-left:10px;margin:0 0 0 -5px}.nav-collapse{clear:both}.nav-collapse .nav{float:none;margin:0 0 10px}.nav-collapse .nav>li{float:none}.nav-collapse .nav>li>a{margin-bottom:2px}.nav-collapse .nav>.divider-vertical{display:none}.nav-collapse .nav .nav-header{color:#777;text-shadow:none}.nav-collapse .nav>li>a,.nav-collapse .dropdown-menu a{padding:9px 15px;font-weight:bold;color:#777;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.nav-collapse .btn{padding:4px 10px 4px;font-weight:normal;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.nav-collapse .dropdown-menu li+li a{margin-bottom:2px}.nav-collapse .nav>li>a:hover,.nav-collapse .nav>li>a:focus,.nav-collapse .dropdown-menu a:hover,.nav-collapse .dropdown-menu a:focus{background-color:#f2f2f2}.navbar-inverse .nav-collapse .nav>li>a,.navbar-inverse .nav-collapse .dropdown-menu a{color:#999}.navbar-inverse .nav-collapse .nav>li>a:hover,.navbar-inverse .nav-collapse .nav>li>a:focus,.navbar-inverse .nav-collapse .dropdown-menu a:hover,.navbar-inverse .nav-collapse .dropdown-menu a:focus{background-color:#111}.nav-collapse.in .btn-group{padding:0;margin-top:5px}.nav-collapse .dropdown-menu{position:static;top:auto;left:auto;display:none;float:none;max-width:none;padding:0;margin:0 15px;background-color:transparent;border:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.nav-collapse .open>.dropdown-menu{display:block}.nav-collapse .dropdown-menu:before,.nav-collapse .dropdown-menu:after{display:none}.nav-collapse .dropdown-menu .divider{display:none}.nav-collapse .nav>li>.dropdown-menu:before,.nav-collapse .nav>li>.dropdown-menu:after{display:none}.nav-collapse .navbar-form,.nav-collapse .navbar-search{float:none;padding:10px 15px;margin:10px 0;border-top:1px solid #f2f2f2;border-bottom:1px solid #f2f2f2;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1)}.navbar-inverse .nav-collapse .navbar-form,.navbar-inverse .nav-collapse .navbar-search{border-top-color:#111;border-bottom-color:#111}.navbar .nav-collapse .nav.pull-right{float:none;margin-left:0}.nav-collapse,.nav-collapse.collapse{height:0;overflow:hidden}.navbar .btn-navbar{display:block}.navbar-static .navbar-inner{padding-right:10px;padding-left:10px}}@media(min-width:980px){.nav-collapse.collapse{height:auto!important;overflow:visible!important}}
diff --git a/core/src/main/resources/spark/ui/static/bootstrap.min.css b/core/src/main/resources/spark/ui/static/bootstrap.min.css
deleted file mode 100644
index b6428e6958..0000000000
--- a/core/src/main/resources/spark/ui/static/bootstrap.min.css
+++ /dev/null
@@ -1,9 +0,0 @@
-/*!
- * Bootstrap v2.3.2
- *
- * Copyright 2012 Twitter, Inc
- * Licensed under the Apache License v2.0
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Designed and built with all the love in the world @twitter by @mdo and @fat.
- */.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{width:auto\9;height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img,.google-maps img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,html input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}label,select,button,input[type="button"],input[type="reset"],input[type="submit"],input[type="radio"],input[type="checkbox"]{cursor:pointer}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}@media print{*{color:#000!important;text-shadow:none!important;background:transparent!important;box-shadow:none!important}a,a:visited{text-decoration:underline}a[href]:after{content:" (" attr(href) ")"}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover,a:focus{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.127659574468085%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:21px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}a.muted:hover,a.muted:focus{color:#808080}.text-warning{color:#c09853}a.text-warning:hover,a.text-warning:focus{color:#a47e3c}.text-error{color:#b94a48}a.text-error:hover,a.text-error:focus{color:#953b39}.text-info{color:#3a87ad}a.text-info:hover,a.text-info:focus{color:#2d6987}.text-success{color:#468847}a.text-success:hover,a.text-success:focus{color:#356635}.text-left{text-align:left}.text-right{text-align:right}.text-center{text-align:center}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:20px;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1,h2,h3{line-height:40px}h1{font-size:38.5px}h2{font-size:31.5px}h3{font-size:24.5px}h4{font-size:17.5px}h5{font-size:14px}h6{font-size:11.9px}h1 small{font-size:24.5px}h2 small{font-size:17.5px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}ul.inline,ol.inline{margin-left:0;list-style:none}ul.inline>li,ol.inline>li{display:inline-block;*display:inline;padding-right:5px;padding-left:5px;*zoom:1}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal{*zoom:1}.dl-horizontal:before,.dl-horizontal:after{display:table;line-height:0;content:""}.dl-horizontal:after{clear:both}.dl-horizontal dt{float:left;width:160px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:180px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:17.5px;font-weight:300;line-height:1.25}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;white-space:nowrap;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;white-space:pre;white-space:pre-wrap;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:10px;font-size:14px;line-height:20px;color:#555;vertical-align:middle;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}input,textarea,.uneditable-input{width:206px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #ccc}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:20px;padding-left:20px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-20px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"],.row-fluid .controls-row [class*="span"]{float:left}.controls-row .checkbox[class*="span"],.controls-row .radio[class*="span"]{padding-top:5px}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning .control-label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853}.control-group.warning input,.control-group.warning select,.control-group.warning textarea{border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error .control-label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48}.control-group.error input,.control-group.error select,.control-group.error textarea{border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success .control-label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847}.control-group.success input,.control-group.success select,.control-group.success textarea{border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}.control-group.info .control-label,.control-group.info .help-block,.control-group.info .help-inline{color:#3a87ad}.control-group.info .checkbox,.control-group.info .radio,.control-group.info input,.control-group.info select,.control-group.info textarea{color:#3a87ad}.control-group.info input,.control-group.info select,.control-group.info textarea{border-color:#3a87ad;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.info input:focus,.control-group.info select:focus,.control-group.info textarea:focus{border-color:#2d6987;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3}.control-group.info .input-prepend .add-on,.control-group.info .input-append .add-on{color:#3a87ad;background-color:#d9edf7;border-color:#3a87ad}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{display:inline-block;margin-bottom:10px;font-size:0;white-space:nowrap;vertical-align:middle}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input,.input-append .dropdown-menu,.input-prepend .dropdown-menu,.input-append .popover,.input-prepend .popover{font-size:14px}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;vertical-align:top;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn,.input-append .btn-group>.dropdown-toggle,.input-prepend .btn-group>.dropdown-toggle{vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-append input+.btn-group .btn:last-child,.input-append select+.btn-group .btn:last-child,.input-append .uneditable-input+.btn-group .btn:last-child{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-append .add-on,.input-append .btn,.input-append .btn-group{margin-left:-1px}.input-append .add-on:last-child,.input-append .btn:last-child,.input-append .btn-group:last-child>.dropdown-toggle{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append input+.btn-group .btn,.input-prepend.input-append select+.btn-group .btn,.input-prepend.input-append .uneditable-input+.btn-group .btn{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append .btn-group:first-child{margin-left:0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:160px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:180px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:180px}.form-horizontal .help-block{margin-bottom:0}.form-horizontal input+.help-block,.form-horizontal select+.help-block,.form-horizontal textarea+.help-block,.form-horizontal .uneditable-input+.help-block,.form-horizontal .input-prepend+.help-block,.form-horizontal .input-append+.help-block{margin-top:10px}.form-horizontal .form-actions{padding-left:180px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table .table{background-color:#fff}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child>th:first-child,.table-bordered tbody:first-child tr:first-child>td:first-child,.table-bordered tbody:first-child tr:first-child>th:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child>th:last-child,.table-bordered tbody:first-child tr:first-child>td:last-child,.table-bordered tbody:first-child tr:first-child>th:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child>th:first-child,.table-bordered tbody:last-child tr:last-child>td:first-child,.table-bordered tbody:last-child tr:last-child>th:first-child,.table-bordered tfoot:last-child tr:last-child>td:first-child,.table-bordered tfoot:last-child tr:last-child>th:first-child{-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child>th:last-child,.table-bordered tbody:last-child tr:last-child>td:last-child,.table-bordered tbody:last-child tr:last-child>th:last-child,.table-bordered tfoot:last-child tr:last-child>td:last-child,.table-bordered tfoot:last-child tr:last-child>th:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered tfoot+tbody:last-child tr:last-child td:first-child{-webkit-border-bottom-left-radius:0;border-bottom-left-radius:0;-moz-border-radius-bottomleft:0}.table-bordered tfoot+tbody:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:0;border-bottom-right-radius:0;-moz-border-radius-bottomright:0}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-striped tbody>tr:nth-child(odd)>td,.table-striped tbody>tr:nth-child(odd)>th{background-color:#f9f9f9}.table-hover tbody tr:hover>td,.table-hover tbody tr:hover>th{background-color:#f5f5f5}table td[class*="span"],table th[class*="span"],.row-fluid table td[class*="span"],.row-fluid table th[class*="span"]{display:table-cell;float:none;margin-left:0}.table td.span1,.table th.span1{float:none;width:44px;margin-left:0}.table td.span2,.table th.span2{float:none;width:124px;margin-left:0}.table td.span3,.table th.span3{float:none;width:204px;margin-left:0}.table td.span4,.table th.span4{float:none;width:284px;margin-left:0}.table td.span5,.table th.span5{float:none;width:364px;margin-left:0}.table td.span6,.table th.span6{float:none;width:444px;margin-left:0}.table td.span7,.table th.span7{float:none;width:524px;margin-left:0}.table td.span8,.table th.span8{float:none;width:604px;margin-left:0}.table td.span9,.table th.span9{float:none;width:684px;margin-left:0}.table td.span10,.table th.span10{float:none;width:764px;margin-left:0}.table td.span11,.table th.span11{float:none;width:844px;margin-left:0}.table td.span12,.table th.span12{float:none;width:924px;margin-left:0}.table tbody tr.success>td{background-color:#dff0d8}.table tbody tr.error>td{background-color:#f2dede}.table tbody tr.warning>td{background-color:#fcf8e3}.table tbody tr.info>td{background-color:#d9edf7}.table-hover tbody tr.success:hover>td{background-color:#d0e9c6}.table-hover tbody tr.error:hover>td{background-color:#ebcccc}.table-hover tbody tr.warning:hover>td{background-color:#faf2cc}.table-hover tbody tr.info:hover>td{background-color:#c4e3f3}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav-pills>.active>a>[class^="icon-"],.nav-pills>.active>a>[class*=" icon-"],.nav-list>.active>a>[class^="icon-"],.nav-list>.active>a>[class*=" icon-"],.navbar-inverse .nav>.active>a>[class^="icon-"],.navbar-inverse .nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:focus>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>li>a:focus>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"],.dropdown-submenu:hover>a>[class^="icon-"],.dropdown-submenu:focus>a>[class^="icon-"],.dropdown-submenu:hover>a>[class*=" icon-"],.dropdown-submenu:focus>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{width:16px;background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu>li>a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu>li>a:hover,.dropdown-menu>li>a:focus,.dropdown-submenu:hover>a,.dropdown-submenu:focus>a{color:#fff;text-decoration:none;background-color:#0081c2;background-image:-moz-linear-gradient(top,#08c,#0077b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#0077b3));background-image:-webkit-linear-gradient(top,#08c,#0077b3);background-image:-o-linear-gradient(top,#08c,#0077b3);background-image:linear-gradient(to bottom,#08c,#0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0077b3',GradientType=0)}.dropdown-menu>.active>a,.dropdown-menu>.active>a:hover,.dropdown-menu>.active>a:focus{color:#fff;text-decoration:none;background-color:#0081c2;background-image:-moz-linear-gradient(top,#08c,#0077b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#0077b3));background-image:-webkit-linear-gradient(top,#08c,#0077b3);background-image:-o-linear-gradient(top,#08c,#0077b3);background-image:linear-gradient(to bottom,#08c,#0077b3);background-repeat:repeat-x;outline:0;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0077b3',GradientType=0)}.dropdown-menu>.disabled>a,.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{color:#999}.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{text-decoration:none;cursor:default;background-color:transparent;background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.open{*z-index:1000}.open>.dropdown-menu{display:block}.dropdown-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:990}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:""}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover>.dropdown-menu{display:block}.dropup .dropdown-submenu>.dropdown-menu{top:auto;bottom:0;margin-top:0;margin-bottom:-2px;-webkit-border-radius:5px 5px 5px 0;-moz-border-radius:5px 5px 5px 0;border-radius:5px 5px 5px 0}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown-submenu.pull-left{float:none}.dropdown-submenu.pull-left>.dropdown-menu{left:-100%;margin-left:10px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{z-index:1051;margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover,.close:focus{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 12px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #ccc;*border:0;border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-bottom-color:#b3b3b3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:focus,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover,.btn:focus{color:#333;text-decoration:none;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:11px 19px;font-size:17.5px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.btn-large [class^="icon-"],.btn-large [class*=" icon-"]{margin-top:4px}.btn-small{padding:2px 10px;font-size:11.9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.btn-small [class^="icon-"],.btn-small [class*=" icon-"]{margin-top:0}.btn-mini [class^="icon-"],.btn-mini [class*=" icon-"]{margin-top:-1px}.btn-mini{padding:0 6px;font-size:10.5px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-moz-linear-gradient(top,#08c,#04c);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:focus,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:focus,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:focus,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-moz-linear-gradient(top,#62c462,#51a351);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:focus,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:focus,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-moz-linear-gradient(top,#444,#222);background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:focus,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active,.btn-link[disabled]{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover,.btn-link:focus{color:#005580;text-decoration:underline;background-color:transparent}.btn-link[disabled]:hover,.btn-link[disabled]:focus{color:#333;text-decoration:none}.btn-group{position:relative;display:inline-block;*display:inline;*margin-left:.3em;font-size:0;white-space:nowrap;vertical-align:middle;*zoom:1}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar>.btn+.btn,.btn-toolbar>.btn-group+.btn,.btn-toolbar>.btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu,.btn-group>.popover{font-size:14px}.btn-group>.btn-mini{font-size:10.5px}.btn-group>.btn-small{font-size:11.9px}.btn-group>.btn-large{font-size:17.5px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.btn-mini .caret,.btn-small .caret{margin-top:8px}.dropup .btn-large .caret{border-bottom-width:5px}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical>.btn{display:block;float:none;max-width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical>.btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical>.btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical>.btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical>.btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical>.btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert,.alert h4{color:#c09853}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-success h4{color:#468847}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-danger h4,.alert-error h4{color:#b94a48}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-info h4{color:#3a87ad}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover,.nav>li>a:focus{text-decoration:none;background-color:#eee}.nav>li>a>img{max-width:none}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover,.nav-list>.active>a:focus{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"],.nav-list [class*=" icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover,.nav-tabs>li>a:focus{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover,.nav-tabs>.active>a:focus{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover,.nav-pills>.active>a:focus{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover,.nav-tabs.nav-stacked>li>a:focus{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret,.nav .dropdown-toggle:focus .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover,.nav>.dropdown.active>a:focus{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover,.nav>li.dropdown.open.active>a:focus{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret,.nav li.dropdown.open a:focus .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover,.tabs-stacked .open>a:focus{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover,.tabs-below>.nav-tabs>li>a:focus{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover,.tabs-below>.nav-tabs>.active>a:focus{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover,.tabs-left>.nav-tabs>li>a:focus{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover,.tabs-left>.nav-tabs .active>a:focus{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover,.tabs-right>.nav-tabs>li>a:focus{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover,.tabs-right>.nav-tabs .active>a:focus{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover,.nav>.disabled>a:focus{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#f2f2f2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#f2f2f2));background-image:-webkit-linear-gradient(top,#fff,#f2f2f2);background-image:-o-linear-gradient(top,#fff,#f2f2f2);background-image:linear-gradient(to bottom,#fff,#f2f2f2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff',endColorstr='#fff2f2f2',GradientType=0);*zoom:1;-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar-inner:before,.navbar-inner:after{display:table;line-height:0;content:""}.navbar-inner:after{clear:both}.navbar .container{width:auto}.nav-collapse.collapse{height:auto;overflow:visible}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#777;text-shadow:0 1px 0 #fff}.navbar .brand:hover,.navbar .brand:focus{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px;color:#777}.navbar-link{color:#777}.navbar-link:hover,.navbar-link:focus{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:5px}.navbar .btn-group .btn,.navbar .input-prepend .btn,.navbar .input-append .btn,.navbar .input-prepend .btn-group,.navbar .input-append .btn-group{margin-top:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:5px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{border-width:0 0 1px}.navbar-fixed-bottom .navbar-inner{border-width:1px 0 0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 10px rgba(0,0,0,0.1);box-shadow:0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:0 -1px 10px rgba(0,0,0,0.1);box-shadow:0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right;margin-right:0}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#777;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:focus,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown>a:hover .caret,.navbar .nav li.dropdown>a:focus .caret{border-top-color:#333;border-bottom-color:#333}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#777;border-bottom-color:#777}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover,.navbar-inverse .brand:focus,.navbar-inverse .nav>li>a:focus{color:#fff}.navbar-inverse .brand{color:#999}.navbar-inverse .navbar-text{color:#999}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover,.navbar-inverse .navbar-link:focus{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>a:hover .caret,.navbar-inverse .nav li.dropdown>a:focus .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-moz-linear-gradient(top,#151515,#040404);background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:focus,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb>li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb>li>.divider{padding:0 5px;color:#ccc}.breadcrumb>.active{color:#999}.pagination{margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination ul>li{display:inline}.pagination ul>li>a,.pagination ul>li>span{float:left;padding:4px 12px;line-height:20px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination ul>li>a:hover,.pagination ul>li>a:focus,.pagination ul>.active>a,.pagination ul>.active>span{background-color:#f5f5f5}.pagination ul>.active>a,.pagination ul>.active>span{color:#999;cursor:default}.pagination ul>.disabled>span,.pagination ul>.disabled>a,.pagination ul>.disabled>a:hover,.pagination ul>.disabled>a:focus{color:#999;cursor:default;background-color:transparent}.pagination ul>li:first-child>a,.pagination ul>li:first-child>span{border-left-width:1px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.pagination ul>li:last-child>a,.pagination ul>li:last-child>span{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pagination-large ul>li>a,.pagination-large ul>li>span{padding:11px 19px;font-size:17.5px}.pagination-large ul>li:first-child>a,.pagination-large ul>li:first-child>span{-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.pagination-large ul>li:last-child>a,.pagination-large ul>li:last-child>span{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.pagination-mini ul>li:first-child>a,.pagination-small ul>li:first-child>a,.pagination-mini ul>li:first-child>span,.pagination-small ul>li:first-child>span{-webkit-border-bottom-left-radius:3px;border-bottom-left-radius:3px;-webkit-border-top-left-radius:3px;border-top-left-radius:3px;-moz-border-radius-bottomleft:3px;-moz-border-radius-topleft:3px}.pagination-mini ul>li:last-child>a,.pagination-small ul>li:last-child>a,.pagination-mini ul>li:last-child>span,.pagination-small ul>li:last-child>span{-webkit-border-top-right-radius:3px;border-top-right-radius:3px;-webkit-border-bottom-right-radius:3px;border-bottom-right-radius:3px;-moz-border-radius-topright:3px;-moz-border-radius-bottomright:3px}.pagination-small ul>li>a,.pagination-small ul>li>span{padding:2px 10px;font-size:11.9px}.pagination-mini ul>li>a,.pagination-mini ul>li>span{padding:0 6px;font-size:10.5px}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager li>a,.pager li>span{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager li>a:hover,.pager li>a:focus{text-decoration:none;background-color:#f5f5f5}.pager .next>a,.pager .next>span{float:right}.pager .previous>a,.pager .previous>span{float:left}.pager .disabled>a,.pager .disabled>a:hover,.pager .disabled>a:focus,.pager .disabled>span{color:#999;cursor:default;background-color:#fff}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:10%;left:50%;z-index:1050;width:560px;margin-left:-280px;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;outline:0;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:10%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{position:relative;max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.modal-footer .btn-block+.btn-block{margin-left:0}.tooltip{position:absolute;z-index:1030;display:block;font-size:11px;line-height:1.4;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{padding:5px 0;margin-top:-3px}.tooltip.right{padding:0 5px;margin-left:3px}.tooltip.bottom{padding:5px 0;margin-top:3px}.tooltip.left{padding:0 5px;margin-left:-3px}.tooltip-inner{max-width:200px;padding:8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;max-width:276px;padding:1px;text-align:left;white-space:normal;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-top:-10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-left:-10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-title:empty{display:none}.popover-content{padding:9px 14px}.popover .arrow,.popover .arrow:after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow{border-width:11px}.popover .arrow:after{border-width:10px;content:""}.popover.top .arrow{bottom:-11px;left:50%;margin-left:-11px;border-top-color:#999;border-top-color:rgba(0,0,0,0.25);border-bottom-width:0}.popover.top .arrow:after{bottom:1px;margin-left:-10px;border-top-color:#fff;border-bottom-width:0}.popover.right .arrow{top:50%;left:-11px;margin-top:-11px;border-right-color:#999;border-right-color:rgba(0,0,0,0.25);border-left-width:0}.popover.right .arrow:after{bottom:-10px;left:1px;border-right-color:#fff;border-left-width:0}.popover.bottom .arrow{top:-11px;left:50%;margin-left:-11px;border-bottom-color:#999;border-bottom-color:rgba(0,0,0,0.25);border-top-width:0}.popover.bottom .arrow:after{top:1px;margin-left:-10px;border-bottom-color:#fff;border-top-width:0}.popover.left .arrow{top:50%;right:-11px;margin-top:-11px;border-left-color:#999;border-left-color:rgba(0,0,0,0.25);border-right-width:0}.popover.left .arrow:after{right:1px;bottom:-10px;border-left-color:#fff;border-right-width:0}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover,a.thumbnail:focus{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.media,.media-body{overflow:hidden;*overflow:visible;zoom:1}.media,.media .media{margin-top:15px}.media:first-child{margin-top:0}.media-object{display:block}.media-heading{margin:0 0 5px}.media>.pull-left{margin-right:10px}.media>.pull-right{margin-left:10px}.media-list{margin-left:0;list-style:none}.label,.badge{display:inline-block;padding:2px 4px;font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding-right:9px;padding-left:9px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}.label:empty,.badge:empty{display:none}a.label:hover,a.label:focus,a.badge:hover,a.badge:focus{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner>.item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel-inner>.item>img,.carousel-inner>.item>a>img{display:block;line-height:1}.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block}.carousel-inner>.active{left:0}.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%}.carousel-inner>.next{left:100%}.carousel-inner>.prev{left:-100%}.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0}.carousel-inner>.active.left{left:-100%}.carousel-inner>.active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover,.carousel-control:focus{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-indicators{position:absolute;top:15px;right:15px;z-index:5;margin:0;list-style:none}.carousel-indicators li{display:block;float:left;width:10px;height:10px;margin-left:5px;text-indent:-999px;background-color:#ccc;background-color:rgba(255,255,255,0.25);border-radius:5px}.carousel-indicators .active{background-color:#fff}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;font-size:18px;font-weight:200;line-height:30px;color:inherit;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit li{line-height:30px}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed}
diff --git a/core/src/main/resources/spark/ui/static/webui.css b/core/src/main/resources/spark/ui/static/webui.css
deleted file mode 100644
index f7537bb766..0000000000
--- a/core/src/main/resources/spark/ui/static/webui.css
+++ /dev/null
@@ -1,49 +0,0 @@
-.navbar .brand {
- height: 50px;
- width: 110px;
- margin-left: 1px;
- padding: 0;
-}
-
-.version {
- line-height: 30px;
- vertical-align: bottom;
- font-size: 12px;
- padding: 0;
- margin: 0;
- font-weight: bold;
- color: #777;
-}
-
-.navbar-inner {
- padding-top: 2px;
- height: 50px;
-}
-
-.navbar-inner .nav {
- margin-top: 5px;
- font-size: 15px;
-}
-
-
-#infolist {
- margin-left: 400px;
- margin-top: 14px;
-}
-
-#infolist li {
- display: inline;
- list-style-type: none;
- list-style-position: outside;
- padding-right: 20px;
- padding-top: 10px;
- padding-bottom: 10px;
-}
-
-.progress-cell {
- width: 134px;
- border-right: 0;
- padding: 0;
- padding-top: 7px;
- padding-left: 4px;
-}
diff --git a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
new file mode 100644
index 0000000000..f87460039b
--- /dev/null
+++ b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred
+
+trait SparkHadoopMapRedUtil {
+ def newJobContext(conf: JobConf, jobId: JobID): JobContext = {
+ val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", "org.apache.hadoop.mapred.JobContext");
+ val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[org.apache.hadoop.mapreduce.JobID])
+ ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
+ }
+
+ def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = {
+ val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl", "org.apache.hadoop.mapred.TaskAttemptContext")
+ val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID])
+ ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
+ }
+
+ def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = {
+ new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId)
+ }
+
+ private def firstAvailableClass(first: String, second: String): Class[_] = {
+ try {
+ Class.forName(first)
+ } catch {
+ case e: ClassNotFoundException =>
+ Class.forName(second)
+ }
+ }
+}
diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
new file mode 100644
index 0000000000..93180307fa
--- /dev/null
+++ b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce
+
+import org.apache.hadoop.conf.Configuration
+import java.lang.{Integer => JInteger, Boolean => JBoolean}
+
+trait SparkHadoopMapReduceUtil {
+ def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
+ val klass = firstAvailableClass(
+ "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn
+ "org.apache.hadoop.mapreduce.JobContext") // hadoop1
+ val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
+ ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
+ }
+
+ def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
+ val klass = firstAvailableClass(
+ "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn
+ "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1
+ val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
+ ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
+ }
+
+ def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = {
+ val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID");
+ try {
+ // first, attempt to use the old-style constructor that takes a boolean isMap (not available in YARN)
+ val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean],
+ classOf[Int], classOf[Int])
+ ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new
+ JInteger(attemptId)).asInstanceOf[TaskAttemptID]
+ } catch {
+ case exc: NoSuchMethodException => {
+ // failed, look for the new ctor that takes a TaskType (not available in 1.x)
+ val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType").asInstanceOf[Class[Enum[_]]]
+ val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke(taskTypeClass, if(isMap) "MAP" else "REDUCE")
+ val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass,
+ classOf[Int], classOf[Int])
+ ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new
+ JInteger(attemptId)).asInstanceOf[TaskAttemptID]
+ }
+ }
+ }
+
+ private def firstAvailableClass(first: String, second: String): Class[_] = {
+ try {
+ Class.forName(first)
+ } catch {
+ case e: ClassNotFoundException =>
+ Class.forName(second)
+ }
+ }
+}
diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 6ff92ce833..6e922a612a 100644
--- a/core/src/main/scala/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -15,12 +15,13 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io._
import scala.collection.mutable.Map
import scala.collection.generic.Growable
+import org.apache.spark.serializer.JavaSerializer
/**
* A datatype that can be accumulated, i.e. has an commutative and associative "add" operation,
@@ -28,7 +29,7 @@ import scala.collection.generic.Growable
*
* You must define how to add data, and how to merge two of these together. For some datatypes,
* such as a counter, these might be the same operation. In that case, you can use the simpler
- * [[spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
+ * [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
* accumulating a set. You will add items to the set, and you will union two sets together.
*
* @param initialValue initial value of accumulator
@@ -176,7 +177,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
def zero(initialValue: R): R = {
// We need to clone initialValue, but it's hard to specify that R should also be Cloneable.
// Instead we'll serialize it to a buffer and load it back.
- val ser = (new spark.JavaSerializer).newInstance()
+ val ser = new JavaSerializer().newInstance()
val copy = ser.deserialize[R](ser.serialize(initialValue))
copy.clear() // In case it contained stuff
copy
@@ -184,7 +185,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
}
/**
- * A simpler value of [[spark.Accumulable]] where the result type being accumulated is the same
+ * A simpler value of [[org.apache.spark.Accumulable]] where the result type being accumulated is the same
* as the types of elements being merged.
*
* @param initialValue initial value of accumulator
@@ -195,7 +196,7 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
extends Accumulable[T,T](initialValue, param)
/**
- * A simpler version of [[spark.AccumulableParam]] where the only datatype you can add in is the same type
+ * A simpler version of [[org.apache.spark.AccumulableParam]] where the only datatype you can add in is the same type
* as the accumulated value. An implicit AccumulatorParam object needs to be available when you create
* Accumulators of a specific type.
*
diff --git a/core/src/main/scala/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 136b4da61e..3ef402926e 100644
--- a/core/src/main/scala/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.util.{HashMap => JHashMap}
@@ -28,18 +28,18 @@ import scala.collection.JavaConversions._
* @param mergeCombiners function to merge outputs from multiple mergeValue function.
*/
case class Aggregator[K, V, C] (
- val createCombiner: V => C,
- val mergeValue: (C, V) => C,
- val mergeCombiners: (C, C) => C) {
+ createCombiner: V => C,
+ mergeValue: (C, V) => C,
+ mergeCombiners: (C, C) => C) {
- def combineValuesByKey(iter: Iterator[(K, V)]) : Iterator[(K, C)] = {
+ def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = {
val combiners = new JHashMap[K, C]
- for ((k, v) <- iter) {
- val oldC = combiners.get(k)
+ for (kv <- iter) {
+ val oldC = combiners.get(kv._1)
if (oldC == null) {
- combiners.put(k, createCombiner(v))
+ combiners.put(kv._1, createCombiner(kv._2))
} else {
- combiners.put(k, mergeValue(oldC, v))
+ combiners.put(kv._1, mergeValue(oldC, kv._2))
}
}
combiners.iterator
@@ -47,7 +47,7 @@ case class Aggregator[K, V, C] (
def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] = {
val combiners = new JHashMap[K, C]
- for ((k, c) <- iter) {
+ iter.foreach { case(k, c) =>
val oldC = combiners.get(k)
if (oldC == null) {
combiners.put(k, c)
diff --git a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
index 8f6953b1f5..908ff56a6b 100644
--- a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
@@ -15,21 +15,22 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
-import spark.executor.{ShuffleReadMetrics, TaskMetrics}
-import spark.serializer.Serializer
-import spark.storage.BlockManagerId
-import spark.util.CompletionIterator
+import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.CompletionIterator
private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging {
- override def fetch[K, V](
- shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) = {
+ override def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer)
+ : Iterator[T] =
+ {
logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId))
val blockManager = SparkEnv.get.blockManager
@@ -49,12 +50,12 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
(address, splits.map(s => ("shuffle_%d_%d_%d".format(shuffleId, s._1, reduceId), s._2)))
}
- def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[(K, V)] = {
+ def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[T] = {
val blockId = blockPair._1
val blockOption = blockPair._2
blockOption match {
case Some(block) => {
- block.asInstanceOf[Iterator[(K, V)]]
+ block.asInstanceOf[Iterator[T]]
}
case None => {
val regex = "shuffle_([0-9]*)_([0-9]*)_([0-9]*)".r
@@ -73,7 +74,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer)
val itr = blockFetcherItr.flatMap(unpackBlock)
- CompletionIterator[(K,V), Iterator[(K,V)]](itr, {
+ CompletionIterator[T, Iterator[T]](itr, {
val shuffleMetrics = new ShuffleReadMetrics
shuffleMetrics.shuffleFinishTime = System.currentTimeMillis
shuffleMetrics.remoteFetchTime = blockFetcherItr.remoteFetchTime
diff --git a/core/src/main/scala/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
index 81314805a9..e299a106ee 100644
--- a/core/src/main/scala/spark/CacheManager.scala
+++ b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -15,10 +15,11 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import scala.collection.mutable.{ArrayBuffer, HashSet}
-import spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.rdd.RDD
/** Spark class responsible for passing RDDs split contents to the BlockManager and making
diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index d17e70a4fa..cc30105940 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -15,7 +15,9 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
+
+import org.apache.spark.rdd.RDD
/**
* Base class for dependencies.
@@ -39,16 +41,15 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
/**
* Represents a dependency on the output of a shuffle stage.
- * @param shuffleId the shuffle id
* @param rdd the parent RDD
* @param partitioner partitioner used to partition the shuffle output
* @param serializerClass class name of the serializer to use
*/
class ShuffleDependency[K, V](
- @transient rdd: RDD[(K, V)],
+ @transient rdd: RDD[_ <: Product2[K, V]],
val partitioner: Partitioner,
val serializerClass: String = null)
- extends Dependency(rdd) {
+ extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
val shuffleId: Int = rdd.context.newShuffleId()
}
diff --git a/core/src/main/scala/spark/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/FetchFailedException.scala
index a2dae6cae9..d242047502 100644
--- a/core/src/main/scala/spark/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/FetchFailedException.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
-import spark.storage.BlockManagerId
+import org.apache.spark.storage.BlockManagerId
private[spark] class FetchFailedException(
taskEndReason: TaskEndReason,
diff --git a/core/src/main/scala/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
index a13a7a2859..ad1ee20045 100644
--- a/core/src/main/scala/spark/HttpFileServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
@@ -15,10 +15,11 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io.{File}
import com.google.common.io.Files
+import org.apache.spark.util.Utils
private[spark] class HttpFileServer extends Logging {
diff --git a/core/src/main/scala/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala
index c9dffbc631..cdfc9dd54e 100644
--- a/core/src/main/scala/spark/HttpServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpServer.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io.File
import java.net.InetAddress
@@ -26,6 +26,7 @@ import org.eclipse.jetty.server.handler.DefaultHandler
import org.eclipse.jetty.server.handler.HandlerList
import org.eclipse.jetty.server.handler.ResourceHandler
import org.eclipse.jetty.util.thread.QueuedThreadPool
+import org.apache.spark.util.Utils
/**
* Exception type thrown by HttpServer when it is in the wrong state for an operation.
diff --git a/core/src/main/scala/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
index 79b0362830..6a973ea495 100644
--- a/core/src/main/scala/spark/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.slf4j.Logger
import org.slf4j.LoggerFactory
diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 2c417e31db..ae7cf2a893 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io._
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
@@ -30,9 +30,9 @@ import akka.remote._
import akka.util.Duration
-import spark.scheduler.MapStatus
-import spark.storage.BlockManagerId
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashMap}
private[spark] sealed trait MapOutputTrackerMessage
@@ -64,11 +64,11 @@ private[spark] class MapOutputTracker extends Logging {
// Incremented every time a fetch fails so that client nodes know to clear
// their cache of map output locations if this happens.
- private var generation: Long = 0
- private val generationLock = new java.lang.Object
+ private var epoch: Long = 0
+ private val epochLock = new java.lang.Object
// Cache a serialized version of the output statuses for each shuffle to send them out faster
- var cacheGeneration = generation
+ var cacheEpoch = epoch
private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
val metadataCleaner = new MetadataCleaner("MapOutputTracker", this.cleanup)
@@ -108,10 +108,10 @@ private[spark] class MapOutputTracker extends Logging {
def registerMapOutputs(
shuffleId: Int,
statuses: Array[MapStatus],
- changeGeneration: Boolean = false) {
+ changeEpoch: Boolean = false) {
mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
- if (changeGeneration) {
- incrementGeneration()
+ if (changeEpoch) {
+ incrementEpoch()
}
}
@@ -124,7 +124,7 @@ private[spark] class MapOutputTracker extends Logging {
array(mapId) = null
}
}
- incrementGeneration()
+ incrementEpoch()
} else {
throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
}
@@ -206,58 +206,58 @@ private[spark] class MapOutputTracker extends Logging {
trackerActor = null
}
- // Called on master to increment the generation number
- def incrementGeneration() {
- generationLock.synchronized {
- generation += 1
- logDebug("Increasing generation to " + generation)
+ // Called on master to increment the epoch number
+ def incrementEpoch() {
+ epochLock.synchronized {
+ epoch += 1
+ logDebug("Increasing epoch to " + epoch)
}
}
- // Called on master or workers to get current generation number
- def getGeneration: Long = {
- generationLock.synchronized {
- return generation
+ // Called on master or workers to get current epoch number
+ def getEpoch: Long = {
+ epochLock.synchronized {
+ return epoch
}
}
- // Called on workers to update the generation number, potentially clearing old outputs
- // because of a fetch failure. (Each Mesos task calls this with the latest generation
+ // Called on workers to update the epoch number, potentially clearing old outputs
+ // because of a fetch failure. (Each worker task calls this with the latest epoch
// number on the master at the time it was created.)
- def updateGeneration(newGen: Long) {
- generationLock.synchronized {
- if (newGen > generation) {
- logInfo("Updating generation to " + newGen + " and clearing cache")
+ def updateEpoch(newEpoch: Long) {
+ epochLock.synchronized {
+ if (newEpoch > epoch) {
+ logInfo("Updating epoch to " + newEpoch + " and clearing cache")
// mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
mapStatuses.clear()
- generation = newGen
+ epoch = newEpoch
}
}
}
def getSerializedLocations(shuffleId: Int): Array[Byte] = {
var statuses: Array[MapStatus] = null
- var generationGotten: Long = -1
- generationLock.synchronized {
- if (generation > cacheGeneration) {
+ var epochGotten: Long = -1
+ epochLock.synchronized {
+ if (epoch > cacheEpoch) {
cachedSerializedStatuses.clear()
- cacheGeneration = generation
+ cacheEpoch = epoch
}
cachedSerializedStatuses.get(shuffleId) match {
case Some(bytes) =>
return bytes
case None =>
statuses = mapStatuses(shuffleId)
- generationGotten = generation
+ epochGotten = epoch
}
}
// If we got here, we failed to find the serialized locations in the cache, so we pulled
// out a snapshot of the locations as "locs"; let's serialize and return that
val bytes = serializeStatuses(statuses)
logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length))
- // Add them into the table only if the generation hasn't changed while we were working
- generationLock.synchronized {
- if (generation == generationGotten) {
+ // Add them into the table only if the epoch hasn't changed while we were working
+ epochLock.synchronized {
+ if (epoch == epochGotten) {
cachedSerializedStatuses(shuffleId) = bytes
}
}
diff --git a/core/src/main/scala/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala
index 2a4edcec98..87914a061f 100644
--- a/core/src/main/scala/spark/Partition.scala
+++ b/core/src/main/scala/org/apache/spark/Partition.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
/**
* A partition of an RDD.
diff --git a/core/src/main/scala/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 660af70d52..0e2c987a59 100644
--- a/core/src/main/scala/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -15,7 +15,10 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
+
+import org.apache.spark.util.Utils
+import org.apache.spark.rdd.RDD
/**
* An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -56,7 +59,7 @@ object Partitioner {
}
/**
- * A [[spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`.
+ * A [[org.apache.spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`.
*
* Java arrays have hashCodes that are based on the arrays' identities rather than their contents,
* so attempting to partition an RDD[Array[_]] or RDD[(Array[_], _)] using a HashPartitioner will
@@ -65,17 +68,9 @@ object Partitioner {
class HashPartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
- def getPartition(key: Any): Int = {
- if (key == null) {
- return 0
- } else {
- val mod = key.hashCode % partitions
- if (mod < 0) {
- mod + partitions
- } else {
- mod // Guard against negative hash codes
- }
- }
+ def getPartition(key: Any): Int = key match {
+ case null => 0
+ case _ => Utils.nonNegativeMod(key.hashCode, numPartitions)
}
override def equals(other: Any): Boolean = other match {
@@ -87,12 +82,12 @@ class HashPartitioner(partitions: Int) extends Partitioner {
}
/**
- * A [[spark.Partitioner]] that partitions sortable records by range into roughly equal ranges.
+ * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly equal ranges.
* Determines the ranges by sampling the RDD passed in.
*/
class RangePartitioner[K <% Ordered[K]: ClassManifest, V](
partitions: Int,
- @transient rdd: RDD[(K,V)],
+ @transient rdd: RDD[_ <: Product2[K,V]],
private val ascending: Boolean = true)
extends Partitioner {
diff --git a/core/src/main/scala/spark/SerializableWritable.scala b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
index 0236611ef9..fdd4c24e23 100644
--- a/core/src/main/scala/spark/SerializableWritable.scala
+++ b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io._
import org.apache.hadoop.io.ObjectWritable
import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.conf.Configuration
class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
def value = t
@@ -35,7 +35,7 @@ class SerializableWritable[T <: Writable](@transient var t: T) extends Serializa
private def readObject(in: ObjectInputStream) {
in.defaultReadObject()
val ow = new ObjectWritable()
- ow.setConf(new JobConf())
+ ow.setConf(new Configuration())
ow.readFields(in)
t = ow.get().asInstanceOf[T]
}
diff --git a/core/src/main/scala/spark/ShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
index dcced035e7..307c383a89 100644
--- a/core/src/main/scala/spark/ShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
@@ -15,19 +15,20 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
-import spark.executor.TaskMetrics
-import spark.serializer.Serializer
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.serializer.Serializer
private[spark] abstract class ShuffleFetcher {
+
/**
* Fetch the shuffle outputs for a given ShuffleDependency.
* @return An iterator over the elements of the fetched shuffle outputs.
*/
- def fetch[K, V](shuffleId: Int, reduceId: Int, metrics: TaskMetrics,
- serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[(K,V)]
+ def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics,
+ serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[T]
/** Stop the fetcher */
def stop() {}
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 46b9935cb7..faf0c2362a 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -15,23 +15,19 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io._
import java.net.URI
import java.util.Properties
-import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.atomic.AtomicInteger
-import scala.collection.JavaConversions._
import scala.collection.Map
import scala.collection.generic.Growable
-import scala.collection.mutable.HashMap
import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
import scala.util.DynamicVariable
-import scala.collection.mutable.{ConcurrentMap, HashMap}
-
-import akka.actor.Actor._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
@@ -53,20 +49,23 @@ import org.apache.hadoop.mapred.TextInputFormat
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
import org.apache.hadoop.mapreduce.{Job => NewHadoopJob}
import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
-import org.apache.hadoop.security.UserGroupInformation
import org.apache.mesos.MesosNativeLibrary
-import spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
-import spark.partial.{ApproximateEvaluator, PartialResult}
-import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD}
-import spark.scheduler.{DAGScheduler, ResultTask, ShuffleMapTask, SparkListener, SplitInfo, Stage, StageInfo, TaskScheduler}
-import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, ClusterScheduler}
-import spark.scheduler.local.LocalScheduler
-import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
-import spark.storage.{StorageStatus, StorageUtils, RDDInfo}
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
-import ui.{SparkUI}
+import org.apache.spark.deploy.LocalSparkCluster
+import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
+import org.apache.spark.rdd._
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend,
+ ClusterScheduler, Schedulable, SchedulingMode}
+import org.apache.spark.scheduler.local.LocalScheduler
+import org.apache.spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
+import org.apache.spark.storage.{StorageUtils, BlockManagerSource}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.util.{ClosureCleaner, Utils, MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.storage.RDDInfo
+import org.apache.spark.storage.StorageStatus
/**
* Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -101,7 +100,7 @@ class SparkContext(
System.setProperty("spark.driver.port", "0")
}
- private val isLocal = (master == "local" || master.startsWith("local["))
+ val isLocal = (master == "local" || master.startsWith("local["))
// Create the Spark execution environment (cache, map output tracker, etc)
private[spark] val env = SparkEnv.createFromSystemProperties(
@@ -124,6 +123,8 @@ class SparkContext(
private[spark] val ui = new SparkUI(this)
ui.bind()
+ val startTime = System.currentTimeMillis()
+
// Add each JAR given through the constructor
if (jars != null) {
jars.foreach { addJar(_) }
@@ -235,7 +236,8 @@ class SparkContext(
/** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
val hadoopConfiguration = {
- val conf = SparkHadoopUtil.newConfiguration()
+ val env = SparkEnv.get
+ val conf = env.hadoop.newConfiguration()
// Explicitly check for S3 environment variables
if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
@@ -261,15 +263,35 @@ class SparkContext(
localProperties.value = new Properties()
}
- def addLocalProperties(key: String, value: String) {
- if(localProperties.value == null) {
+ def setLocalProperty(key: String, value: String) {
+ if (localProperties.value == null) {
localProperties.value = new Properties()
}
- localProperties.value.setProperty(key,value)
+ if (value == null) {
+ localProperties.value.remove(key)
+ } else {
+ localProperties.value.setProperty(key, value)
+ }
+ }
+
+ /** Set a human readable description of the current job. */
+ def setJobDescription(value: String) {
+ setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
}
+
// Post init
taskScheduler.postStartHook()
+ val dagSchedulerSource = new DAGSchedulerSource(this.dagScheduler)
+ val blockManagerSource = new BlockManagerSource(SparkEnv.get.blockManager)
+
+ def initDriverMetrics() {
+ SparkEnv.get.metricsSystem.registerSource(dagSchedulerSource)
+ SparkEnv.get.metricsSystem.registerSource(blockManagerSource)
+ }
+
+ initDriverMetrics()
+
// Methods for creating RDDs
/** Distribute a local Scala collection to form an RDD. */
@@ -470,14 +492,14 @@ class SparkContext(
// Methods for creating shared variables
/**
- * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values
* to using the `+=` method. Only the driver can access the accumulator's `value`.
*/
def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
new Accumulator(initialValue, param)
/**
- * Create an [[spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
+ * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
* Only the driver can access the accumuable's `value`.
* @tparam T accumulator type
* @tparam R type that can be added to the accumulator
@@ -497,7 +519,7 @@ class SparkContext(
}
/**
- * Broadcast a read-only variable to the cluster, returning a [[spark.broadcast.Broadcast]] object for
+ * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.broadcast.Broadcast]] object for
* reading it in distributed functions. The variable will be sent to each cluster only once.
*/
def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal)
@@ -525,7 +547,7 @@ class SparkContext(
}
def addSparkListener(listener: SparkListener) {
- dagScheduler.sparkListeners += listener
+ dagScheduler.addSparkListener(listener)
}
/**
@@ -546,6 +568,12 @@ class SparkContext(
StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
}
+ /**
+ * Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
+ * Note that this does not necessarily mean the caching or computation was successful.
+ */
+ def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
+
def getStageInfo: Map[Stage,StageInfo] = {
dagScheduler.stageToInfos
}
@@ -558,6 +586,28 @@ class SparkContext(
}
/**
+ * Return pools for fair scheduler
+ * TODO(xiajunluan): We should take nested pools into account
+ */
+ def getAllPools: ArrayBuffer[Schedulable] = {
+ taskScheduler.rootPool.schedulableQueue
+ }
+
+ /**
+ * Return the pool associated with the given name, if one exists
+ */
+ def getPoolForName(pool: String): Option[Schedulable] = {
+ taskScheduler.rootPool.schedulableNameToSchedulable.get(pool)
+ }
+
+ /**
+ * Return current scheduling mode
+ */
+ def getSchedulingMode: SchedulingMode.SchedulingMode = {
+ taskScheduler.schedulingMode
+ }
+
+ /**
* Clear the job's list of files added by `addFile` so that they do not get downloaded to
* any new nodes.
*/
@@ -566,6 +616,16 @@ class SparkContext(
}
/**
+ * Gets the locality information associated with the partition in a particular rdd
+ * @param rdd of interest
+ * @param partition to be looked up for locality
+ * @return list of preferred locations for the partition
+ */
+ private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = {
+ dagScheduler.getPreferredLocs(rdd, partition)
+ }
+
+ /**
* Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.
* The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
* filesystems), or an HTTP, HTTPS or FTP URI.
@@ -575,9 +635,15 @@ class SparkContext(
logWarning("null specified as parameter to addJar",
new SparkException("null specified as parameter to addJar"))
} else {
+ val env = SparkEnv.get
val uri = new URI(path)
val key = uri.getScheme match {
- case null | "file" => env.httpFileServer.addJar(new File(uri.getPath))
+ case null | "file" =>
+ if (env.hadoop.isYarnMode()) {
+ logWarning("local jar specified as parameter to addJar under Yarn mode")
+ return
+ }
+ env.httpFileServer.addJar(new File(uri.getPath))
case _ => path
}
addedJars(key) = System.currentTimeMillis
@@ -756,8 +822,9 @@ class SparkContext(
* prevent accidental overriding of checkpoint files in the existing directory.
*/
def setCheckpointDir(dir: String, useExisting: Boolean = false) {
+ val env = SparkEnv.get
val path = new Path(dir)
- val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+ val fs = path.getFileSystem(env.hadoop.newConfiguration())
if (!useExisting) {
if (fs.exists(path)) {
throw new Exception("Checkpoint directory '" + path + "' already exists.")
@@ -774,11 +841,11 @@ class SparkContext(
/** Default min number of partitions for Hadoop RDDs when not given by user */
def defaultMinSplits: Int = math.min(defaultParallelism, 2)
- private var nextShuffleId = new AtomicInteger(0)
+ private val nextShuffleId = new AtomicInteger(0)
private[spark] def newShuffleId(): Int = nextShuffleId.getAndIncrement()
- private var nextRddId = new AtomicInteger(0)
+ private val nextRddId = new AtomicInteger(0)
/** Register a new RDD, returning its RDD ID */
private[spark] def newRddId(): Int = nextRddId.getAndIncrement()
@@ -794,6 +861,7 @@ class SparkContext(
* various Spark features.
*/
object SparkContext {
+ val SPARK_JOB_DESCRIPTION = "spark.job.description"
implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
def addInPlace(t1: Double, t2: Double): Double = t1 + t2
@@ -826,7 +894,7 @@ object SparkContext {
implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
rdd: RDD[(K, V)]) =
- new OrderedRDDFunctions(rdd)
+ new OrderedRDDFunctions[K, V, (K, V)](rdd)
implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd)
@@ -911,7 +979,6 @@ object SparkContext {
}
}
-
/**
* A class encapsulating how to convert some type T to Writable. It stores both the Writable class
* corresponding to T (e.g. IntWritable for Int) and a function for doing the conversion.
@@ -923,3 +990,4 @@ private[spark] class WritableConverter[T](
val writableClass: ClassManifest[T] => Class[_ <: Writable],
val convert: Writable => T)
extends Serializable
+
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index f2bdc11bdb..478e5a0aaf 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import collection.mutable
import serializer.Serializer
@@ -23,13 +23,14 @@ import serializer.Serializer
import akka.actor.{Actor, ActorRef, Props, ActorSystemImpl, ActorSystem}
import akka.remote.RemoteActorRefProvider
-import spark.broadcast.BroadcastManager
-import spark.storage.BlockManager
-import spark.storage.BlockManagerMaster
-import spark.network.ConnectionManager
-import spark.serializer.{Serializer, SerializerManager}
-import spark.util.AkkaUtils
-import spark.api.python.PythonWorkerFactory
+import org.apache.spark.broadcast.BroadcastManager
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.storage.{BlockManagerMasterActor, BlockManager, BlockManagerMaster}
+import org.apache.spark.network.ConnectionManager
+import org.apache.spark.serializer.{Serializer, SerializerManager}
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.api.python.PythonWorkerFactory
/**
@@ -53,13 +54,23 @@ class SparkEnv (
val connectionManager: ConnectionManager,
val httpFileServer: HttpFileServer,
val sparkFilesDir: String,
- // To be set only as part of initialization of SparkContext.
- // (executorId, defaultHostPort) => executorHostPort
- // If executorId is NOT found, return defaultHostPort
- var executorIdToHostPort: Option[(String, String) => String]) {
+ val metricsSystem: MetricsSystem) {
private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
+ val hadoop = {
+ val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
+ if(yarnMode) {
+ try {
+ Class.forName("spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil]
+ } catch {
+ case th: Throwable => throw new SparkException("Unable to load YARN support", th)
+ }
+ } else {
+ new SparkHadoopUtil
+ }
+ }
+
def stop() {
pythonWorkers.foreach { case(key, worker) => worker.stop() }
httpFileServer.stop()
@@ -68,6 +79,7 @@ class SparkEnv (
broadcastManager.stop()
blockManager.stop()
blockManager.master.stop()
+ metricsSystem.stop()
actorSystem.shutdown()
// Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut
// down, but let's call it anyway in case it gets fixed in a later release
@@ -80,27 +92,30 @@ class SparkEnv (
pythonWorkers.getOrElseUpdate(key, new PythonWorkerFactory(pythonExec, envVars)).create()
}
}
-
- def resolveExecutorIdToHostPort(executorId: String, defaultHostPort: String): String = {
- val env = SparkEnv.get
- if (env.executorIdToHostPort.isEmpty) {
- // default to using host, not host port. Relevant to non cluster modes.
- return defaultHostPort
- }
-
- env.executorIdToHostPort.get(executorId, defaultHostPort)
- }
}
object SparkEnv extends Logging {
private val env = new ThreadLocal[SparkEnv]
+ @volatile private var lastSetSparkEnv : SparkEnv = _
def set(e: SparkEnv) {
+ lastSetSparkEnv = e
env.set(e)
}
+ /**
+ * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv
+ * previously set in any thread.
+ */
def get: SparkEnv = {
- env.get()
+ Option(env.get()).getOrElse(lastSetSparkEnv)
+ }
+
+ /**
+ * Returns the ThreadLocal SparkEnv.
+ */
+ def getThreadLocal : SparkEnv = {
+ env.get()
}
def createFromSystemProperties(
@@ -140,10 +155,10 @@ object SparkEnv extends Logging {
val serializerManager = new SerializerManager
val serializer = serializerManager.setDefault(
- System.getProperty("spark.serializer", "spark.JavaSerializer"))
+ System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer"))
val closureSerializer = serializerManager.get(
- System.getProperty("spark.closure.serializer", "spark.JavaSerializer"))
+ System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer"))
def registerOrLookup(name: String, newActor: => Actor): ActorRef = {
if (isDriver) {
@@ -161,7 +176,7 @@ object SparkEnv extends Logging {
val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
"BlockManagerMaster",
- new spark.storage.BlockManagerMasterActor(isLocal)))
+ new BlockManagerMasterActor(isLocal)))
val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer)
val connectionManager = blockManager.connectionManager
@@ -178,12 +193,19 @@ object SparkEnv extends Logging {
new MapOutputTrackerActor(mapOutputTracker))
val shuffleFetcher = instantiateClass[ShuffleFetcher](
- "spark.shuffle.fetcher", "spark.BlockStoreShuffleFetcher")
+ "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher")
val httpFileServer = new HttpFileServer()
httpFileServer.initialize()
System.setProperty("spark.fileserver.uri", httpFileServer.serverUri)
+ val metricsSystem = if (isDriver) {
+ MetricsSystem.createMetricsSystem("driver")
+ } else {
+ MetricsSystem.createMetricsSystem("executor")
+ }
+ metricsSystem.start()
+
// Set the sparkFiles directory, used when downloading dependencies. In local mode,
// this is a temporary directory; in distributed mode, this is the executor's current working
// directory.
@@ -213,6 +235,6 @@ object SparkEnv extends Logging {
connectionManager,
httpFileServer,
sparkFilesDir,
- None)
+ metricsSystem)
}
}
diff --git a/core/src/main/scala/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala
index b7045eea63..d34e47e8ca 100644
--- a/core/src/main/scala/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
class SparkException(message: String, cause: Throwable)
extends Exception(message, cause) {
diff --git a/core/src/main/scala/spark/SparkFiles.java b/core/src/main/scala/org/apache/spark/SparkFiles.java
index f9b3f7965e..af9cf85e37 100644
--- a/core/src/main/scala/spark/SparkFiles.java
+++ b/core/src/main/scala/org/apache/spark/SparkFiles.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark;
+package org.apache.spark;
import java.io.File;
diff --git a/core/src/main/scala/spark/HadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index b1fe0075a3..2bab9d6e3d 100644
--- a/core/src/main/scala/spark/HadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -25,8 +25,8 @@ import java.text.NumberFormat
import java.io.IOException
import java.util.Date
-import spark.Logging
-import spark.SerializableWritable
+import org.apache.spark.Logging
+import org.apache.spark.SerializableWritable
/**
* Internal helper class that saves an RDD using a Hadoop OutputFormat. This is only public
@@ -36,7 +36,7 @@ import spark.SerializableWritable
* Saves the RDD using a JobConf, which should contain an output key class, an output value class,
* a filename to write to, etc, exactly like in a Hadoop MapReduce job.
*/
-class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRedUtil with Serializable {
+class SparkHadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable {
private val now = new Date()
private val conf = new SerializableWritable(jobConf)
@@ -165,7 +165,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe
splitID = splitid
attemptID = attemptid
- jID = new SerializableWritable[JobID](HadoopWriter.createJobID(now, jobid))
+ jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobid))
taID = new SerializableWritable[TaskAttemptID](
new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID))
}
@@ -179,7 +179,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe
}
}
-object HadoopWriter {
+object SparkHadoopWriter {
def createJobID(time: Date, id: Int): JobID = {
val formatter = new SimpleDateFormat("yyyyMMddHHmm")
val jobtrackerID = formatter.format(new Date())
diff --git a/core/src/main/scala/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index b79f4ca813..b2dd668330 100644
--- a/core/src/main/scala/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import executor.TaskMetrics
import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 3ad665da34..03bf268863 100644
--- a/core/src/main/scala/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
-import spark.executor.TaskMetrics
-import spark.storage.BlockManagerId
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.storage.BlockManagerId
/**
* Various possible reasons why a task ended. The low-level TaskScheduler is supposed to retry
diff --git a/core/src/main/scala/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala
index 9df7d8277b..19ce8369d9 100644
--- a/core/src/main/scala/spark/TaskState.scala
+++ b/core/src/main/scala/org/apache/spark/TaskState.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.apache.mesos.Protos.{TaskState => MesosTaskState}
@@ -24,9 +24,11 @@ private[spark] object TaskState
val LAUNCHING, RUNNING, FINISHED, FAILED, KILLED, LOST = Value
+ val FINISHED_STATES = Set(FINISHED, FAILED, KILLED, LOST)
+
type TaskState = Value
- def isFinished(state: TaskState) = Seq(FINISHED, FAILED, LOST).contains(state)
+ def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
def toMesos(state: TaskState): MesosTaskState = state match {
case LAUNCHING => MesosTaskState.TASK_STARTING
diff --git a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 392556f261..5fd1fab580 100644
--- a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package spark.api.java
-
-import spark.RDD
-import spark.SparkContext.doubleRDDToDoubleRDDFunctions
-import spark.api.java.function.{Function => JFunction}
-import spark.util.StatCounter
-import spark.partial.{BoundedDouble, PartialResult}
-import spark.storage.StorageLevel
+package org.apache.spark.api.java
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext.doubleRDDToDoubleRDDFunctions
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.util.StatCounter
+import org.apache.spark.partial.{BoundedDouble, PartialResult}
+import org.apache.spark.storage.StorageLevel
import java.lang.Double
-import spark.Partitioner
+import org.apache.spark.Partitioner
class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, JavaDoubleRDD] {
@@ -115,33 +115,48 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav
// Double RDD functions
- /** Return the sum of the elements in this RDD. */
+ /** Add up the elements in this RDD. */
def sum(): Double = srdd.sum()
- /** Return a [[spark.StatCounter]] describing the elements in this RDD. */
+ /**
+ * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count
+ * of the RDD's elements in one operation.
+ */
def stats(): StatCounter = srdd.stats()
- /** Return the mean of the elements in this RDD. */
+ /** Compute the mean of this RDD's elements. */
def mean(): Double = srdd.mean()
- /** Return the variance of the elements in this RDD. */
+ /** Compute the variance of this RDD's elements. */
def variance(): Double = srdd.variance()
- /** Return the standard deviation of the elements in this RDD. */
+ /** Compute the standard deviation of this RDD's elements. */
def stdev(): Double = srdd.stdev()
+ /**
+ * Compute the sample standard deviation of this RDD's elements (which corrects for bias in
+ * estimating the standard deviation by dividing by N-1 instead of N).
+ */
+ def sampleStdev(): Double = srdd.sampleStdev()
+
+ /**
+ * Compute the sample variance of this RDD's elements (which corrects for bias in
+ * estimating the standard variance by dividing by N-1 instead of N).
+ */
+ def sampleVariance(): Double = srdd.sampleVariance()
+
/** Return the approximate mean of the elements in this RDD. */
def meanApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] =
srdd.meanApprox(timeout, confidence)
- /** Return the approximate mean of the elements in this RDD. */
+ /** (Experimental) Approximate operation to return the mean within a timeout. */
def meanApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.meanApprox(timeout)
- /** Return the approximate sum of the elements in this RDD. */
+ /** (Experimental) Approximate operation to return the sum within a timeout. */
def sumApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] =
srdd.sumApprox(timeout, confidence)
-
- /** Return the approximate sum of the elements in this RDD. */
+
+ /** (Experimental) Approximate operation to return the sum within a timeout. */
def sumApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.sumApprox(timeout)
}
diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index ccc511dc5f..a6518abf45 100644
--- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java
+package org.apache.spark.api.java
import java.util.{List => JList}
import java.util.Comparator
@@ -23,23 +23,25 @@ import java.util.Comparator
import scala.Tuple2
import scala.collection.JavaConversions._
+import com.google.common.base.Optional
import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.OutputFormat
import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.hadoop.conf.Configuration
-import spark.api.java.function.{Function2 => JFunction2}
-import spark.api.java.function.{Function => JFunction}
-import spark.partial.BoundedDouble
-import spark.partial.PartialResult
-import spark.OrderedRDDFunctions
-import spark.storage.StorageLevel
-import spark.HashPartitioner
-import spark.Partitioner
-import spark.Partitioner._
-import spark.RDD
-import spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.HashPartitioner
+import org.apache.spark.Partitioner
+import org.apache.spark.Partitioner._
+import org.apache.spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.api.java.function.{Function2 => JFunction2}
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.OrderedRDDFunctions
+import org.apache.spark.storage.StorageLevel
+
class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManifest[K],
implicit val vManifest: ClassManifest[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] {
@@ -252,11 +254,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
fromRDD(rdd.subtract(other, p))
/**
- * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine`
- * is true, Spark will group values of the same key together on the map side before the
- * repartitioning, to only send each key over the network once. If a large number of
- * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should
- * be set to true.
+ * Return a copy of the RDD partitioned using the specified partitioner.
*/
def partitionBy(partitioner: Partitioner): JavaPairRDD[K, V] =
fromRDD(rdd.partitionBy(partitioner))
@@ -276,8 +274,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* partition the output RDD.
*/
def leftOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
- : JavaPairRDD[K, (V, Option[W])] =
- fromRDD(rdd.leftOuterJoin(other, partitioner))
+ : JavaPairRDD[K, (V, Optional[W])] = {
+ val joinResult = rdd.leftOuterJoin(other, partitioner)
+ fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+ }
/**
* Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -286,8 +286,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* partition the output RDD.
*/
def rightOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
- : JavaPairRDD[K, (Option[V], W)] =
- fromRDD(rdd.rightOuterJoin(other, partitioner))
+ : JavaPairRDD[K, (Optional[V], W)] = {
+ val joinResult = rdd.rightOuterJoin(other, partitioner)
+ fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+ }
/**
* Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
@@ -340,8 +342,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output
* using the existing partitioner/parallelism level.
*/
- def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Option[W])] =
- fromRDD(rdd.leftOuterJoin(other))
+ def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Optional[W])] = {
+ val joinResult = rdd.leftOuterJoin(other)
+ fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+ }
/**
* Perform a left outer join of `this` and `other`. For each element (k, v) in `this`, the
@@ -349,8 +353,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output
* into `numPartitions` partitions.
*/
- def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Option[W])] =
- fromRDD(rdd.leftOuterJoin(other, numPartitions))
+ def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Optional[W])] = {
+ val joinResult = rdd.leftOuterJoin(other, numPartitions)
+ fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+ }
/**
* Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -358,8 +364,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting
* RDD using the existing partitioner/parallelism level.
*/
- def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Option[V], W)] =
- fromRDD(rdd.rightOuterJoin(other))
+ def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], W)] = {
+ val joinResult = rdd.rightOuterJoin(other)
+ fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+ }
/**
* Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -367,8 +375,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
* pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting
* RDD into the given number of partitions.
*/
- def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Option[V], W)] =
- fromRDD(rdd.rightOuterJoin(other, numPartitions))
+ def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Optional[V], W)] = {
+ val joinResult = rdd.rightOuterJoin(other, numPartitions)
+ fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+ }
/**
* Return the key-value pairs in this RDD to the master as a Map.
@@ -554,7 +564,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
override def compare(b: K) = comp.compare(a, b)
}
implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
- fromRDD(new OrderedRDDFunctions(rdd).sortByKey(ascending))
+ fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending))
}
/**
diff --git a/core/src/main/scala/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index c0bf2cf568..eec58abdd6 100644
--- a/core/src/main/scala/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -15,11 +15,12 @@
* limitations under the License.
*/
-package spark.api.java
+package org.apache.spark.api.java
-import spark._
-import spark.api.java.function.{Function => JFunction}
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.storage.StorageLevel
class JavaRDD[T](val rdd: RDD[T])(implicit val classManifest: ClassManifest[T]) extends
JavaRDDLike[T, JavaRDD[T]] {
diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 21b5abf053..7e6e691f11 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -15,19 +15,21 @@
* limitations under the License.
*/
-package spark.api.java
+package org.apache.spark.api.java
import java.util.{List => JList, Comparator}
import scala.Tuple2
import scala.collection.JavaConversions._
-import org.apache.hadoop.io.compress.CompressionCodec
-import spark.{SparkContext, Partition, RDD, TaskContext}
-import spark.api.java.JavaPairRDD._
-import spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
-import spark.partial.{PartialResult, BoundedDouble}
-import spark.storage.StorageLevel
import com.google.common.base.Optional
+import org.apache.hadoop.io.compress.CompressionCodec
+
+import org.apache.spark.{SparkContext, Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.api.java.JavaPairRDD._
+import org.apache.spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
+import org.apache.spark.partial.{PartialResult, BoundedDouble}
+import org.apache.spark.storage.StorageLevel
trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
@@ -40,7 +42,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/** Set of partitions in this RDD. */
def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
- /** The [[spark.SparkContext]] that this RDD was created on. */
+ /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
def context: SparkContext = rdd.context
/** A unique ID for this RDD (within its SparkContext). */
@@ -207,12 +209,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
* of elements in each partition.
*/
def zipPartitions[U, V](
- f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V],
- other: JavaRDDLike[U, _]): JavaRDD[V] = {
+ other: JavaRDDLike[U, _],
+ f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V]): JavaRDD[V] = {
def fn = (x: Iterator[T], y: Iterator[U]) => asScalaIterator(
f.apply(asJavaIterator(x), asJavaIterator(y)).iterator())
JavaRDD.fromRDD(
- rdd.zipPartitions(fn, other.rdd)(other.classManifest, f.elementType()))(f.elementType())
+ rdd.zipPartitions(other.rdd)(fn)(other.classManifest, f.elementType()))(f.elementType())
}
// Actions (launch a job to return a value to the user program)
@@ -366,10 +368,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
* Gets the name of the file to which this RDD was checkpointed
*/
def getCheckpointFile(): Optional[String] = {
- rdd.getCheckpointFile match {
- case Some(file) => Optional.of(file)
- case _ => Optional.absent()
- }
+ JavaUtils.optionToOptional(rdd.getCheckpointFile)
}
/** A description of this RDD and its recursive dependencies for debugging. */
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index fe182e7ab6..8869e072bf 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java
+package org.apache.spark.api.java
import java.util.{Map => JMap}
@@ -26,14 +26,16 @@ import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+import com.google.common.base.Optional
-import spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, RDD, SparkContext}
-import spark.SparkContext.IntAccumulatorParam
-import spark.SparkContext.DoubleAccumulatorParam
-import spark.broadcast.Broadcast
+import org.apache.spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, SparkContext}
+import org.apache.spark.SparkContext.IntAccumulatorParam
+import org.apache.spark.SparkContext.DoubleAccumulatorParam
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
/**
- * A Java-friendly version of [[spark.SparkContext]] that returns [[spark.api.java.JavaRDD]]s and
+ * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns [[org.apache.spark.api.java.JavaRDD]]s and
* works with Java collections instead of Scala ones.
*/
class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround {
@@ -281,48 +283,48 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
}
/**
- * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
* to using the `add` method. Only the master can access the accumulator's `value`.
*/
def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] =
sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]]
/**
- * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
* to using the `add` method. Only the master can access the accumulator's `value`.
*/
def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
/**
- * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
* to using the `add` method. Only the master can access the accumulator's `value`.
*/
def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue)
/**
- * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
* to using the `add` method. Only the master can access the accumulator's `value`.
*/
def accumulator(initialValue: Double): Accumulator[java.lang.Double] =
doubleAccumulator(initialValue)
/**
- * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
+ * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values
* to using the `add` method. Only the master can access the accumulator's `value`.
*/
def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] =
sc.accumulator(initialValue)(accumulatorParam)
/**
- * Create an [[spark.Accumulable]] shared variable of the given type, to which tasks can
+ * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks can
* "add" values with `add`. Only the master can access the accumuable's `value`.
*/
def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] =
sc.accumulable(initialValue)(param)
/**
- * Broadcast a read-only variable to the cluster, returning a [[spark.Broadcast]] object for
+ * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.Broadcast]] object for
* reading it in distributed functions. The variable will be sent to each cluster only once.
*/
def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)
@@ -337,7 +339,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
* or the spark.home Java property, or the SPARK_HOME environment variable
* (in that order of preference). If neither of these is set, return None.
*/
- def getSparkHome(): Option[String] = sc.getSparkHome()
+ def getSparkHome(): Optional[String] = JavaUtils.optionToOptional(sc.getSparkHome())
/**
* Add a file to be downloaded with this Spark job on every node.
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
index 42b1de01b1..c9cbce5624 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java;
+package org.apache.spark.api.java;
import java.util.Arrays;
import java.util.ArrayList;
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
new file mode 100644
index 0000000000..ecbf18849a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java
+
+import com.google.common.base.Optional
+
+object JavaUtils {
+ def optionToOptional[T](option: Option[T]): Optional[T] =
+ option match {
+ case Some(value) => Optional.of(value)
+ case None => Optional.absent()
+ }
+}
diff --git a/core/src/main/scala/spark/api/java/StorageLevels.java b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java
index f385636e83..0744269773 100644
--- a/core/src/main/scala/spark/api/java/StorageLevels.java
+++ b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.api.java;
+package org.apache.spark.api.java;
-import spark.storage.StorageLevel;
+import org.apache.spark.storage.StorageLevel;
/**
* Expose some commonly useful storage level constants.
diff --git a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
index 8bc88d757f..4830067f7a 100644
--- a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.runtime.AbstractFunction1;
diff --git a/core/src/main/scala/spark/api/java/function/DoubleFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java
index 1aa1e5dae0..db34cd190a 100644
--- a/core/src/main/scala/spark/api/java/function/DoubleFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.runtime.AbstractFunction1;
diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
index 9eb0cfe3f9..158539a846 100644
--- a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function
+package org.apache.spark.api.java.function
/**
* A function that returns zero or more output records from each input record.
diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
index dda98710c2..5ef6a814f5 100644
--- a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function
+package org.apache.spark.api.java.function
/**
* A function that takes two inputs and returns zero or more output records.
diff --git a/core/src/main/scala/spark/api/java/function/Function.java b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
index 2a2ea0aacf..b9070cfd83 100644
--- a/core/src/main/scala/spark/api/java/function/Function.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.reflect.ClassManifest;
import scala.reflect.ClassManifest$;
diff --git a/core/src/main/scala/spark/api/java/function/Function2.java b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
index 952d31ece4..d4c9154869 100644
--- a/core/src/main/scala/spark/api/java/function/Function2.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.reflect.ClassManifest;
import scala.reflect.ClassManifest$;
diff --git a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
index 4aad602da3..c0e5544b7d 100644
--- a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.Tuple2;
import scala.reflect.ClassManifest;
diff --git a/core/src/main/scala/spark/api/java/function/PairFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
index ccfe64ecf1..40480fe8e8 100644
--- a/core/src/main/scala/spark/api/java/function/PairFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
import scala.Tuple2;
import scala.reflect.ClassManifest;
diff --git a/core/src/main/scala/spark/api/java/function/VoidFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala
index f6fc0b0f7d..ea94313a4a 100644
--- a/core/src/main/scala/spark/api/java/function/VoidFunction.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function
+package org.apache.spark.api.java.function
/**
* A function with no return value.
diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala
index 1758a38c4e..cfe694f65d 100644
--- a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function
+package org.apache.spark.api.java.function
import scala.runtime.AbstractFunction1
diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala
index b093567d2c..eb9277c6fb 100644
--- a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.java.function
+package org.apache.spark.api.java.function
import scala.runtime.AbstractFunction2
diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala
index 31a719fbff..b090c6edf3 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.api.python
-
-import spark.Partitioner
+package org.apache.spark.api.python
+import org.apache.spark.Partitioner
import java.util.Arrays
+import org.apache.spark.util.Utils
/**
- * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
+ * A [[org.apache.spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
*
* Stores the unique id() of the Python-side partitioning function so that it is incorporated into
* equality comparisons. Correctness requires that the id is a unique identifier for the
@@ -35,25 +35,10 @@ private[spark] class PythonPartitioner(
val pyPartitionFunctionId: Long)
extends Partitioner {
- override def getPartition(key: Any): Int = {
- if (key == null) {
- return 0
- }
- else {
- val hashCode = {
- if (key.isInstanceOf[Array[Byte]]) {
- Arrays.hashCode(key.asInstanceOf[Array[Byte]])
- } else {
- key.hashCode()
- }
- }
- val mod = hashCode % numPartitions
- if (mod < 0) {
- mod + numPartitions
- } else {
- mod // Guard against negative hash codes
- }
- }
+ override def getPartition(key: Any): Int = key match {
+ case null => 0
+ case key: Array[Byte] => Utils.nonNegativeMod(Arrays.hashCode(key), numPartitions)
+ case _ => Utils.nonNegativeMod(key.hashCode(), numPartitions)
}
override def equals(other: Any): Boolean = other match {
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index af10822dbd..ccd3833964 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.api.python
+package org.apache.spark.api.python
import java.io._
import java.net._
@@ -23,16 +23,19 @@ import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collectio
import scala.collection.JavaConversions._
-import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
-import spark.broadcast.Broadcast
-import spark._
-import spark.rdd.PipedRDD
+import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.PipedRDD
+import org.apache.spark.util.Utils
private[spark] class PythonRDD[T: ClassManifest](
parent: RDD[T],
command: Seq[String],
envVars: JMap[String, String],
+ pythonIncludes: JList[String],
preservePartitoning: Boolean,
pythonExec: String,
broadcastVars: JList[Broadcast[Array[Byte]]],
@@ -44,10 +47,11 @@ private[spark] class PythonRDD[T: ClassManifest](
// Similar to Runtime.exec(), if we are given a single string, split it into words
// using a standard StringTokenizer (i.e. by spaces)
def this(parent: RDD[T], command: String, envVars: JMap[String, String],
+ pythonIncludes: JList[String],
preservePartitoning: Boolean, pythonExec: String,
broadcastVars: JList[Broadcast[Array[Byte]]],
accumulator: Accumulator[JList[Array[Byte]]]) =
- this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
+ this(parent, PipedRDD.tokenize(command), envVars, pythonIncludes, preservePartitoning, pythonExec,
broadcastVars, accumulator)
override def getPartitions = parent.partitions
@@ -63,34 +67,47 @@ private[spark] class PythonRDD[T: ClassManifest](
// Start a thread to feed the process input from our parent's iterator
new Thread("stdin writer for " + pythonExec) {
override def run() {
- SparkEnv.set(env)
- val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
- val dataOut = new DataOutputStream(stream)
- val printOut = new PrintWriter(stream)
- // Partition index
- dataOut.writeInt(split.index)
- // sparkFilesDir
- PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut)
- // Broadcast variables
- dataOut.writeInt(broadcastVars.length)
- for (broadcast <- broadcastVars) {
- dataOut.writeLong(broadcast.id)
- dataOut.writeInt(broadcast.value.length)
- dataOut.write(broadcast.value)
- }
- dataOut.flush()
- // Serialized user code
- for (elem <- command) {
- printOut.println(elem)
- }
- printOut.flush()
- // Data values
- for (elem <- parent.iterator(split, context)) {
- PythonRDD.writeAsPickle(elem, dataOut)
+ try {
+ SparkEnv.set(env)
+ val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
+ val dataOut = new DataOutputStream(stream)
+ val printOut = new PrintWriter(stream)
+ // Partition index
+ dataOut.writeInt(split.index)
+ // sparkFilesDir
+ PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut)
+ // Broadcast variables
+ dataOut.writeInt(broadcastVars.length)
+ for (broadcast <- broadcastVars) {
+ dataOut.writeLong(broadcast.id)
+ dataOut.writeInt(broadcast.value.length)
+ dataOut.write(broadcast.value)
+ }
+ // Python includes (*.zip and *.egg files)
+ dataOut.writeInt(pythonIncludes.length)
+ for (f <- pythonIncludes) {
+ PythonRDD.writeAsPickle(f, dataOut)
+ }
+ dataOut.flush()
+ // Serialized user code
+ for (elem <- command) {
+ printOut.println(elem)
+ }
+ printOut.flush()
+ // Data values
+ for (elem <- parent.iterator(split, context)) {
+ PythonRDD.writeAsPickle(elem, dataOut)
+ }
+ dataOut.flush()
+ printOut.flush()
+ worker.shutdownOutput()
+ } catch {
+ case e: IOException =>
+ // This can happen for legitimate reasons if the Python code stops returning data before we are done
+ // passing elements through, e.g., for take(). Just log a message to say it happened.
+ logInfo("stdin writer to Python finished early")
+ logDebug("stdin writer to Python finished early", e)
}
- dataOut.flush()
- printOut.flush()
- worker.shutdownOutput()
}
}.start()
@@ -283,7 +300,7 @@ private object Pickle {
val APPENDS: Byte = 'e'
}
-private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] {
+private class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] {
override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8")
}
@@ -297,7 +314,7 @@ class PythonAccumulatorParam(@transient serverHost: String, serverPort: Int)
Utils.checkHost(serverHost, "Expected hostname")
val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
-
+
override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList
override def addInPlace(val1: JList[Array[Byte]], val2: JList[Array[Byte]])
diff --git a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 078ad45ce8..08e3f670f5 100644
--- a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.api.python
+package org.apache.spark.api.python
-import java.io.{DataInputStream, IOException}
+import java.io.{File, DataInputStream, IOException}
import java.net.{Socket, SocketException, InetAddress}
import scala.collection.JavaConversions._
-import spark._
+import org.apache.spark._
private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
extends Logging {
@@ -67,6 +67,8 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/daemon.py"))
val workerEnv = pb.environment()
workerEnv.putAll(envVars)
+ val pythonPath = sparkHome + "/python/" + File.pathSeparator + workerEnv.get("PYTHONPATH")
+ workerEnv.put("PYTHONPATH", pythonPath)
daemon = pb.start()
// Redirect the stderr to ours
diff --git a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
index 6f7d385379..93e7815ab5 100644
--- a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
import java.io._
import java.net._
@@ -25,8 +25,9 @@ import java.util.concurrent.atomic.AtomicInteger
import scala.collection.mutable.{ListBuffer, Map, Set}
import scala.math
-import spark._
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
extends Broadcast[T](id)
diff --git a/core/src/main/scala/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index aba56a60ca..43c18294c5 100644
--- a/core/src/main/scala/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
import java.io._
import java.util.concurrent.atomic.AtomicLong
-import spark._
+import org.apache.spark._
abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
def value: T
@@ -28,7 +28,7 @@ abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
// We cannot have an abstract readObject here due to some weird issues with
// readObject having to be 'private' in sub-classes.
- override def toString = "spark.Broadcast(" + id + ")"
+ override def toString = "Broadcast(" + id + ")"
}
private[spark]
@@ -44,7 +44,7 @@ class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable
synchronized {
if (!initialized) {
val broadcastFactoryClass = System.getProperty(
- "spark.broadcast.factory", "spark.broadcast.HttpBroadcastFactory")
+ "spark.broadcast.factory", "org.apache.spark.broadcast.HttpBroadcastFactory")
broadcastFactory =
Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
diff --git a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index d33d95c7d9..68bff75b90 100644
--- a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
/**
* An interface for all the broadcast implementations in Spark (to allow
diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index c565876950..9db26ae6de 100644
--- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -15,23 +15,22 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
-import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
-
-import java.io._
-import java.net._
-import java.util.UUID
+import java.io.{File, FileOutputStream, ObjectInputStream, OutputStream}
+import java.net.URL
import it.unimi.dsi.fastutil.io.FastBufferedInputStream
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
-import spark._
-import spark.storage.StorageLevel
-import util.{MetadataCleaner, TimeStampedHashSet}
+import org.apache.spark.{HttpServer, Logging, SparkEnv}
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashSet}
+
private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
-extends Broadcast[T](id) with Logging with Serializable {
+ extends Broadcast[T](id) with Logging with Serializable {
def value = value_
@@ -85,6 +84,7 @@ private object HttpBroadcast extends Logging {
private val files = new TimeStampedHashSet[String]
private val cleaner = new MetadataCleaner("HttpBroadcast", cleanup)
+ private lazy val compressionCodec = CompressionCodec.createCodec()
def initialize(isDriver: Boolean) {
synchronized {
@@ -122,10 +122,12 @@ private object HttpBroadcast extends Logging {
def write(id: Long, value: Any) {
val file = new File(broadcastDir, "broadcast-" + id)
- val out: OutputStream = if (compress) {
- new LZFOutputStream(new FileOutputStream(file)) // Does its own buffering
- } else {
- new FastBufferedOutputStream(new FileOutputStream(file), bufferSize)
+ val out: OutputStream = {
+ if (compress) {
+ compressionCodec.compressedOutputStream(new FileOutputStream(file))
+ } else {
+ new FastBufferedOutputStream(new FileOutputStream(file), bufferSize)
+ }
}
val ser = SparkEnv.get.serializer.newInstance()
val serOut = ser.serializeStream(out)
@@ -136,10 +138,12 @@ private object HttpBroadcast extends Logging {
def read[T](id: Long): T = {
val url = serverUri + "/broadcast-" + id
- var in = if (compress) {
- new LZFInputStream(new URL(url).openStream()) // Does its own buffering
- } else {
- new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+ val in = {
+ if (compress) {
+ compressionCodec.compressedInputStream(new URL(url).openStream())
+ } else {
+ new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+ }
}
val ser = SparkEnv.get.serializer.newInstance()
val serIn = ser.deserializeStream(in)
diff --git a/core/src/main/scala/spark/broadcast/MultiTracker.scala b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
index 7855d44e9b..21ec94659e 100644
--- a/core/src/main/scala/spark/broadcast/MultiTracker.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
import java.io._
import java.net._
@@ -23,7 +23,8 @@ import java.util.Random
import scala.collection.mutable.Map
-import spark._
+import org.apache.spark._
+import org.apache.spark.util.Utils
private object MultiTracker
extends Logging {
diff --git a/core/src/main/scala/spark/broadcast/SourceInfo.scala b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
index b17ae63b5c..baa1fd6da4 100644
--- a/core/src/main/scala/spark/broadcast/SourceInfo.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
import java.util.BitSet
-import spark._
+import org.apache.spark._
/**
* Used to keep and pass around information of peers involved in a broadcast
diff --git a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
index ea1e9a12c1..80c97ca073 100644
--- a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.broadcast
+package org.apache.spark.broadcast
import java.io._
import java.net._
@@ -24,8 +24,9 @@ import java.util.{Comparator, Random, UUID}
import scala.collection.mutable.{ListBuffer, Map, Set}
import scala.math
-import spark._
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
private[spark] class TreeBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
extends Broadcast[T](id) with Logging with Serializable {
diff --git a/core/src/main/scala/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index a8b22fbef8..19d393a0db 100644
--- a/core/src/main/scala/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
private[spark] class ApplicationDescription(
val name: String,
diff --git a/core/src/main/scala/spark/deploy/Command.scala b/core/src/main/scala/org/apache/spark/deploy/Command.scala
index bad629e965..fa8af9a646 100644
--- a/core/src/main/scala/spark/deploy/Command.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Command.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
import scala.collection.Map
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
new file mode 100644
index 0000000000..c31619db27
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import scala.collection.immutable.List
+
+import org.apache.spark.deploy.ExecutorState.ExecutorState
+import org.apache.spark.deploy.master.{WorkerInfo, ApplicationInfo}
+import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.util.Utils
+
+
+private[deploy] sealed trait DeployMessage extends Serializable
+
+private[deploy] object DeployMessages {
+
+ // Worker to Master
+
+ case class RegisterWorker(
+ id: String,
+ host: String,
+ port: Int,
+ cores: Int,
+ memory: Int,
+ webUiPort: Int,
+ publicAddress: String)
+ extends DeployMessage {
+ Utils.checkHost(host, "Required hostname")
+ assert (port > 0)
+ }
+
+ case class ExecutorStateChanged(
+ appId: String,
+ execId: Int,
+ state: ExecutorState,
+ message: Option[String],
+ exitStatus: Option[Int])
+ extends DeployMessage
+
+ case class Heartbeat(workerId: String) extends DeployMessage
+
+ // Master to Worker
+
+ case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage
+
+ case class RegisterWorkerFailed(message: String) extends DeployMessage
+
+ case class KillExecutor(appId: String, execId: Int) extends DeployMessage
+
+ case class LaunchExecutor(
+ appId: String,
+ execId: Int,
+ appDesc: ApplicationDescription,
+ cores: Int,
+ memory: Int,
+ sparkHome: String)
+ extends DeployMessage
+
+ // Client to Master
+
+ case class RegisterApplication(appDescription: ApplicationDescription)
+ extends DeployMessage
+
+ // Master to Client
+
+ case class RegisteredApplication(appId: String) extends DeployMessage
+
+ // TODO(matei): replace hostPort with host
+ case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) {
+ Utils.checkHostPort(hostPort, "Required hostport")
+ }
+
+ case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String],
+ exitStatus: Option[Int])
+
+ case class ApplicationRemoved(message: String)
+
+ // Internal message in Client
+
+ case object StopClient
+
+ // MasterWebUI To Master
+
+ case object RequestMasterState
+
+ // Master to MasterWebUI
+
+ case class MasterStateResponse(host: String, port: Int, workers: Array[WorkerInfo],
+ activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) {
+
+ Utils.checkHost(host, "Required hostname")
+ assert (port > 0)
+
+ def uri = "spark://" + host + ":" + port
+ }
+
+ // WorkerWebUI to Worker
+
+ case object RequestWorkerState
+
+ // Worker to WorkerWebUI
+
+ case class WorkerStateResponse(host: String, port: Int, workerId: String,
+ executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner], masterUrl: String,
+ cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) {
+
+ Utils.checkHost(host, "Required hostname")
+ assert (port > 0)
+ }
+
+ // Actor System to Master
+
+ case object CheckForWorkerTimeOut
+
+}
diff --git a/core/src/main/scala/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
index 08c9a3b725..fcfea96ad6 100644
--- a/core/src/main/scala/spark/deploy/ExecutorState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
private[spark] object ExecutorState
extends Enumeration("LAUNCHING", "LOADING", "RUNNING", "KILLED", "FAILED", "LOST") {
diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 64f89623e1..a6be8efef1 100644
--- a/core/src/main/scala/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -15,11 +15,14 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
-import master.{ApplicationInfo, WorkerInfo}
import net.liftweb.json.JsonDSL._
-import worker.ExecutorRunner
+
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
+import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.deploy.worker.ExecutorRunner
+
private[spark] object JsonProtocol {
def writeWorkerInfo(obj: WorkerInfo) = {
@@ -30,7 +33,8 @@ private[spark] object JsonProtocol {
("cores" -> obj.cores) ~
("coresused" -> obj.coresUsed) ~
("memory" -> obj.memory) ~
- ("memoryused" -> obj.memoryUsed)
+ ("memoryused" -> obj.memoryUsed) ~
+ ("state" -> obj.state.toString)
}
def writeApplicationInfo(obj: ApplicationInfo) = {
@@ -57,7 +61,7 @@ private[spark] object JsonProtocol {
("appdesc" -> writeApplicationDescription(obj.appDesc))
}
- def writeMasterState(obj: MasterState) = {
+ def writeMasterState(obj: MasterStateResponse) = {
("url" -> ("spark://" + obj.uri)) ~
("workers" -> obj.workers.toList.map(writeWorkerInfo)) ~
("cores" -> obj.workers.map(_.cores).sum) ~
@@ -68,7 +72,7 @@ private[spark] object JsonProtocol {
("completedapps" -> obj.completedApps.toList.map(writeApplicationInfo))
}
- def writeWorkerState(obj: WorkerState) = {
+ def writeWorkerState(obj: WorkerStateResponse) = {
("id" -> obj.workerId) ~
("masterurl" -> obj.masterUrl) ~
("masterwebuiurl" -> obj.masterWebUiUrl) ~
diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index 6b8e9f27af..78e3747ad8 100644
--- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
-import spark.deploy.worker.Worker
-import spark.deploy.master.Master
-import spark.util.AkkaUtils
-import spark.{Logging, Utils}
+import org.apache.spark.deploy.worker.Worker
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.{Logging}
import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index df55be1254..0a5f4c368f 100644
--- a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapred.JobConf
@@ -23,22 +23,14 @@ import org.apache.hadoop.mapred.JobConf
/**
* Contains util methods to interact with Hadoop from spark.
*/
-object SparkHadoopUtil {
-
- def getUserNameFromEnvironment(): String = {
- // defaulting to -D ...
- System.getProperty("user.name")
- }
-
- def runAsUser(func: (Product) => Unit, args: Product) {
-
- // Add support, if exists - for now, simply run func !
- func(args)
- }
+class SparkHadoopUtil {
// Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
def newConfiguration(): Configuration = new Configuration()
// add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
def addCredentials(conf: JobConf) {}
+
+ def isYarnMode(): Boolean = { false }
+
}
diff --git a/core/src/main/scala/spark/deploy/WebUI.scala b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala
index 8ea7792ef4..ae258b58b9 100644
--- a/core/src/main/scala/spark/deploy/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy
import java.text.SimpleDateFormat
import java.util.Date
diff --git a/core/src/main/scala/spark/deploy/client/Client.scala b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
index edefa0292d..a342dd724a 100644
--- a/core/src/main/scala/spark/deploy/client/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
@@ -15,23 +15,25 @@
* limitations under the License.
*/
-package spark.deploy.client
+package org.apache.spark.deploy.client
+
+import java.util.concurrent.TimeoutException
-import spark.deploy._
import akka.actor._
+import akka.actor.Terminated
import akka.pattern.ask
import akka.util.Duration
-import akka.util.duration._
-import akka.pattern.AskTimeoutException
-import spark.{SparkException, Logging}
+import akka.remote.RemoteClientDisconnected
import akka.remote.RemoteClientLifeCycleEvent
import akka.remote.RemoteClientShutdown
-import spark.deploy.RegisterApplication
-import spark.deploy.master.Master
-import akka.remote.RemoteClientDisconnected
-import akka.actor.Terminated
import akka.dispatch.Await
+import org.apache.spark.Logging
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.Master
+
+
/**
* The main class used to talk to a Spark deploy cluster. Takes a master URL, an app description,
* and a listener for cluster events, and calls back the listener when various events occur.
@@ -134,7 +136,8 @@ private[spark] class Client(
val future = actor.ask(StopClient)(timeout)
Await.result(future, timeout)
} catch {
- case e: AskTimeoutException => // Ignore it, maybe master went away
+ case e: TimeoutException =>
+ logInfo("Stop request to Master timed out; it may already be shut down.")
}
actor = null
}
diff --git a/core/src/main/scala/spark/deploy/client/ClientListener.scala b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
index 064024455e..4605368c11 100644
--- a/core/src/main/scala/spark/deploy/client/ClientListener.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.client
+package org.apache.spark.deploy.client
/**
* Callbacks invoked by deploy client when various events happen. There are currently four events:
diff --git a/core/src/main/scala/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 4f4daa141a..d5e9a0e095 100644
--- a/core/src/main/scala/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.deploy.client
+package org.apache.spark.deploy.client
-import spark.util.AkkaUtils
-import spark.{Logging, Utils}
-import spark.deploy.{Command, ApplicationDescription}
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.{Command, ApplicationDescription}
private[spark] object TestClient {
diff --git a/core/src/main/scala/spark/deploy/client/TestExecutor.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala
index 8a22b6b89f..c5ac45c673 100644
--- a/core/src/main/scala/spark/deploy/client/TestExecutor.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.client
+package org.apache.spark.deploy.client
private[spark] object TestExecutor {
def main(args: Array[String]) {
diff --git a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 15ff919738..bd5327627a 100644
--- a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
-import spark.deploy.ApplicationDescription
+import org.apache.spark.deploy.ApplicationDescription
import java.util.Date
import akka.actor.ActorRef
import scala.collection.mutable
@@ -34,6 +34,7 @@ private[spark] class ApplicationInfo(
var executors = new mutable.HashMap[Int, ExecutorInfo]
var coresGranted = 0
var endTime = -1L
+ val appSource = new ApplicationSource(this)
private var nextExecutorId = 0
@@ -51,8 +52,10 @@ private[spark] class ApplicationInfo(
}
def removeExecutor(exec: ExecutorInfo) {
- executors -= exec.id
- coresGranted -= exec.cores
+ if (executors.contains(exec.id)) {
+ executors -= exec.id
+ coresGranted -= exec.cores
+ }
}
def coresLeft: Int = desc.maxCores - coresGranted
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
new file mode 100644
index 0000000000..2d75ad5a2c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
@@ -0,0 +1,24 @@
+package org.apache.spark.deploy.master
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+class ApplicationSource(val application: ApplicationInfo) extends Source {
+ val metricRegistry = new MetricRegistry()
+ val sourceName = "%s.%s.%s".format("application", application.desc.name,
+ System.currentTimeMillis())
+
+ metricRegistry.register(MetricRegistry.name("status"), new Gauge[String] {
+ override def getValue: String = application.state.toString
+ })
+
+ metricRegistry.register(MetricRegistry.name("runtime_ms"), new Gauge[Long] {
+ override def getValue: Long = application.duration
+ })
+
+ metricRegistry.register(MetricRegistry.name("cores", "number"), new Gauge[Int] {
+ override def getValue: Int = application.coresGranted
+ })
+
+}
diff --git a/core/src/main/scala/spark/deploy/master/ApplicationState.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
index 94f0ad8bae..7e804223cf 100644
--- a/core/src/main/scala/spark/deploy/master/ApplicationState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
private[spark] object ApplicationState
extends Enumeration("WAITING", "RUNNING", "FINISHED", "FAILED") {
diff --git a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
index 99b60f7d09..cf384a985e 100644
--- a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
-import spark.deploy.ExecutorState
+import org.apache.spark.deploy.ExecutorState
private[spark] class ExecutorInfo(
val id: Int,
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index e5a7a87e2e..7cf0a7754f 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -15,28 +15,32 @@
* limitations under the License.
*/
-package spark.deploy.master
-
-import akka.actor._
-import akka.actor.Terminated
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown}
-import akka.util.duration._
+package org.apache.spark.deploy.master
import java.text.SimpleDateFormat
import java.util.Date
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-import spark.deploy._
-import spark.{Logging, SparkException, Utils}
-import spark.util.AkkaUtils
-import ui.MasterWebUI
+import akka.actor._
+import akka.actor.Terminated
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown}
+import akka.util.duration._
+
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.ui.MasterWebUI
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.util.{Utils, AkkaUtils}
private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Actor with Logging {
val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
val WORKER_TIMEOUT = System.getProperty("spark.worker.timeout", "60").toLong * 1000
-
+ val RETAINED_APPLICATIONS = System.getProperty("spark.deploy.retainedApplications", "200").toInt
+ val REAPER_ITERATIONS = System.getProperty("spark.dead.worker.persistence", "15").toInt
+
var nextAppNumber = 0
val workers = new HashSet[WorkerInfo]
val idToWorker = new HashMap[String, WorkerInfo]
@@ -53,10 +57,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
var firstApp: Option[ApplicationInfo] = None
- val webUi = new MasterWebUI(self)
-
Utils.checkHost(host, "Expected hostname")
+ val masterMetricsSystem = MetricsSystem.createMetricsSystem("master")
+ val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications")
+ val masterSource = new MasterSource(this)
+
+ val webUi = new MasterWebUI(this, webUiPort)
+
val masterPublicAddress = {
val envVar = System.getenv("SPARK_PUBLIC_DNS")
if (envVar != null) envVar else host
@@ -72,17 +80,23 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
// Listen for remote client disconnection events, since they don't go through Akka's watch()
context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
webUi.start()
- context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis)(timeOutDeadWorkers())
+ context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
+
+ masterMetricsSystem.registerSource(masterSource)
+ masterMetricsSystem.start()
+ applicationMetricsSystem.start()
}
override def postStop() {
webUi.stop()
+ masterMetricsSystem.stop()
+ applicationMetricsSystem.stop()
}
override def receive = {
case RegisterWorker(id, host, workerPort, cores, memory, worker_webUiPort, publicAddress) => {
logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
- host, workerPort, cores, Utils.memoryMegabytesToString(memory)))
+ host, workerPort, cores, Utils.megabytesToString(memory)))
if (idToWorker.contains(id)) {
sender ! RegisterWorkerFailed("Duplicate worker ID")
} else {
@@ -160,7 +174,11 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
}
case RequestMasterState => {
- sender ! MasterState(host, port, workers.toArray, apps.toArray, completedApps.toArray)
+ sender ! MasterStateResponse(host, port, workers.toArray, apps.toArray, completedApps.toArray)
+ }
+
+ case CheckForWorkerTimeOut => {
+ timeOutDeadWorkers()
}
}
@@ -225,20 +243,27 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: String) {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
worker.addExecutor(exec)
- worker.actor ! LaunchExecutor(exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome)
- exec.application.driver ! ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
+ worker.actor ! LaunchExecutor(
+ exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome)
+ exec.application.driver ! ExecutorAdded(
+ exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
}
def addWorker(id: String, host: String, port: Int, cores: Int, memory: Int, webUiPort: Int,
publicAddress: String): WorkerInfo = {
- // There may be one or more refs to dead workers on this same node (w/ different ID's), remove them.
- workers.filter(w => (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)).foreach(workers -= _)
+ // There may be one or more refs to dead workers on this same node (w/ different ID's),
+ // remove them.
+ workers.filter { w =>
+ (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)
+ }.foreach { w =>
+ workers -= w
+ }
val worker = new WorkerInfo(id, host, port, cores, memory, sender, webUiPort, publicAddress)
workers += worker
idToWorker(worker.id) = worker
actorToWorker(sender) = worker
addressToWorker(sender.path.address) = worker
- return worker
+ worker
}
def removeWorker(worker: WorkerInfo) {
@@ -249,7 +274,8 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
addressToWorker -= worker.actor.path.address
for (exec <- worker.executors.values) {
logInfo("Telling app of lost executor: " + exec.id)
- exec.application.driver ! ExecutorUpdated(exec.id, ExecutorState.LOST, Some("worker lost"), None)
+ exec.application.driver ! ExecutorUpdated(
+ exec.id, ExecutorState.LOST, Some("worker lost"), None)
exec.application.removeExecutor(exec)
}
}
@@ -258,6 +284,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
val now = System.currentTimeMillis()
val date = new Date(now)
val app = new ApplicationInfo(now, newApplicationId(date), desc, date, driver, desc.appUiUrl)
+ applicationMetricsSystem.registerSource(app.appSource)
apps += app
idToApp(app.id) = app
actorToApp(driver) = app
@@ -269,7 +296,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
if (workersAlive.size > 0 && !workersAlive.exists(_.memoryFree >= desc.memoryPerSlave)) {
logWarning("Could not find any workers with enough memory for " + firstApp.get.id)
}
- return app
+ app
}
def finishApplication(app: ApplicationInfo) {
@@ -283,7 +310,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
idToApp -= app.id
actorToApp -= app.driver
addressToApp -= app.driver.path.address
- completedApps += app // Remember it in our history
+ if (completedApps.size >= RETAINED_APPLICATIONS) {
+ val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
+ completedApps.take(toRemove).foreach( a => {
+ applicationMetricsSystem.removeSource(a.appSource)
+ })
+ completedApps.trimStart(toRemove)
+ }
+ completedApps += app // Remember it in our history
waitingApps -= app
for (exec <- app.executors.values) {
exec.worker.removeExecutor(exec)
@@ -308,12 +342,17 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
/** Check for, and remove, any timed-out workers */
def timeOutDeadWorkers() {
// Copy the workers into an array so we don't modify the hashset while iterating through it
- val expirationTime = System.currentTimeMillis() - WORKER_TIMEOUT
- val toRemove = workers.filter(_.lastHeartbeat < expirationTime).toArray
+ val currentTime = System.currentTimeMillis()
+ val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT).toArray
for (worker <- toRemove) {
- logWarning("Removing %s because we got no heartbeat in %d seconds".format(
- worker.id, WORKER_TIMEOUT))
- removeWorker(worker)
+ if (worker.state != WorkerState.DEAD) {
+ logWarning("Removing %s because we got no heartbeat in %d seconds".format(
+ worker.id, WORKER_TIMEOUT/1000))
+ removeWorker(worker)
+ } else {
+ if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT))
+ workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
+ }
}
}
}
diff --git a/core/src/main/scala/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index d0ec3d5ea0..9d89b455fb 100644
--- a/core/src/main/scala/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -15,10 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
-import spark.util.IntParam
-import spark.Utils
+import org.apache.spark.util.{Utils, IntParam}
/**
* Command-line parser for the master.
@@ -38,7 +37,10 @@ private[spark] class MasterArguments(args: Array[String]) {
if (System.getenv("SPARK_MASTER_WEBUI_PORT") != null) {
webUiPort = System.getenv("SPARK_MASTER_WEBUI_PORT").toInt
}
-
+ if (System.getProperty("master.ui.port") != null) {
+ webUiPort = System.getProperty("master.ui.port").toInt
+ }
+
parse(args.toList)
def parse(args: List[String]): Unit = args match {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
new file mode 100644
index 0000000000..8dd0a42f71
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -0,0 +1,25 @@
+package org.apache.spark.deploy.master
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class MasterSource(val master: Master) extends Source {
+ val metricRegistry = new MetricRegistry()
+ val sourceName = "master"
+
+ // Gauge for worker numbers in cluster
+ metricRegistry.register(MetricRegistry.name("workers","number"), new Gauge[Int] {
+ override def getValue: Int = master.workers.size
+ })
+
+ // Gauge for application numbers in cluster
+ metricRegistry.register(MetricRegistry.name("apps", "number"), new Gauge[Int] {
+ override def getValue: Int = master.apps.size
+ })
+
+ // Gauge for waiting application numbers in cluster
+ metricRegistry.register(MetricRegistry.name("waitingApps", "number"), new Gauge[Int] {
+ override def getValue: Int = master.waitingApps.size
+ })
+}
diff --git a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index 4135cfeb28..6219f11f2a 100644
--- a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
import akka.actor.ActorRef
import scala.collection.mutable
-import spark.Utils
+import org.apache.spark.util.Utils
private[spark] class WorkerInfo(
val id: String,
diff --git a/core/src/main/scala/spark/deploy/master/WorkerState.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
index 3e50b7748d..b5ee6dca79 100644
--- a/core/src/main/scala/spark/deploy/master/WorkerState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.master
+package org.apache.spark.deploy.master
private[spark] object WorkerState extends Enumeration("ALIVE", "DEAD", "DECOMMISSIONED") {
type WorkerState = Value
diff --git a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 32264af393..f4e574d15d 100644
--- a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -15,7 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
+
+import scala.xml.Node
import akka.dispatch.Await
import akka.pattern.ask
@@ -25,20 +27,20 @@ import javax.servlet.http.HttpServletRequest
import net.liftweb.json.JsonAST.JValue
-import scala.xml.Node
-
-import spark.deploy.{RequestMasterState, JsonProtocol, MasterState}
-import spark.deploy.master.ExecutorInfo
-import spark.ui.UIUtils
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.master.ExecutorInfo
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
private[spark] class ApplicationPage(parent: MasterWebUI) {
- val master = parent.master
+ val master = parent.masterActorRef
implicit val timeout = parent.timeout
/** Executor details for a particular application */
def renderJson(request: HttpServletRequest): JValue = {
val appId = request.getParameter("appId")
- val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+ val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
val state = Await.result(stateFuture, 30 seconds)
val app = state.activeApps.find(_.id == appId).getOrElse({
state.completedApps.find(_.id == appId).getOrElse(null)
@@ -49,7 +51,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
/** Executor details for a particular application */
def render(request: HttpServletRequest): Seq[Node] = {
val appId = request.getParameter("appId")
- val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+ val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
val state = Await.result(stateFuture, 30 seconds)
val app = state.activeApps.find(_.id == appId).getOrElse({
state.completedApps.find(_.id == appId).getOrElse(null)
@@ -60,24 +62,26 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
val executorTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
val content =
- <hr />
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {app.id}</li>
- <li><strong>Description:</strong> {app.desc.name}</li>
+ <li><strong>Name:</strong> {app.desc.name}</li>
<li><strong>User:</strong> {app.desc.user}</li>
<li><strong>Cores:</strong>
{
if (app.desc.maxCores == Integer.MAX_VALUE) {
- "Unlimited %s granted".format(app.coresGranted)
+ "Unlimited (%s granted)".format(app.coresGranted)
} else {
"%s (%s granted, %s left)".format(
app.desc.maxCores, app.coresGranted, app.coresLeft)
}
}
</li>
- <li><strong>Memory per Slave:</strong> {app.desc.memoryPerSlave}</li>
+ <li>
+ <strong>Executor Memory:</strong>
+ {Utils.megabytesToString(app.desc.memoryPerSlave)}
+ </li>
<li><strong>Submit Date:</strong> {app.submitDate}</li>
<li><strong>State:</strong> {app.state}</li>
<li><strong><a href={app.appUiUrl}>Application Detail UI</a></strong></li>
@@ -85,16 +89,13 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
</div>
</div>
- <hr/>
-
- <div class="row"> <!-- Executors -->
+ <div class="row-fluid"> <!-- Executors -->
<div class="span12">
- <h3> Executor Summary </h3>
- <br/>
+ <h4> Executor Summary </h4>
{executorTable}
</div>
</div>;
- UIUtils.basicSparkPage(content, "Application Info: " + app.desc.name)
+ UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
}
def executorRow(executor: ExecutorInfo): Seq[Node] = {
diff --git a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
index b05197c1b9..d7a57229b0 100644
--- a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
@@ -15,35 +15,45 @@
* limitations under the License.
*/
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
import akka.dispatch.Await
import akka.pattern.ask
import akka.util.duration._
-import javax.servlet.http.HttpServletRequest
-
-import scala.xml.Node
+import net.liftweb.json.JsonAST.JValue
-import spark.deploy.{RequestMasterState, DeployWebUI, MasterState}
-import spark.Utils
-import spark.ui.UIUtils
-import spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.deploy.DeployWebUI
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
private[spark] class IndexPage(parent: MasterWebUI) {
- val master = parent.master
+ val master = parent.masterActorRef
implicit val timeout = parent.timeout
+ def renderJson(request: HttpServletRequest): JValue = {
+ val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
+ val state = Await.result(stateFuture, 30 seconds)
+ JsonProtocol.writeMasterState(state)
+ }
+
/** Index view listing applications and executors */
def render(request: HttpServletRequest): Seq[Node] = {
- val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+ val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
val state = Await.result(stateFuture, 30 seconds)
val workerHeaders = Seq("Id", "Address", "State", "Cores", "Memory")
val workers = state.workers.sortBy(_.id)
val workerTable = UIUtils.listingTable(workerHeaders, workerRow, workers)
- val appHeaders = Seq("ID", "Description", "Cores", "Memory per Node", "Submit Time", "User",
+ val appHeaders = Seq("ID", "Name", "Cores", "Memory per Node", "Submitted Time", "User",
"State", "Duration")
val activeApps = state.activeApps.sortBy(_.startTime).reverse
val activeAppsTable = UIUtils.listingTable(appHeaders, appRow, activeApps)
@@ -51,8 +61,7 @@ private[spark] class IndexPage(parent: MasterWebUI) {
val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps)
val content =
- <hr />
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
<ul class="unstyled">
<li><strong>URL:</strong> {state.uri}</li>
@@ -60,8 +69,8 @@ private[spark] class IndexPage(parent: MasterWebUI) {
<li><strong>Cores:</strong> {state.workers.map(_.cores).sum} Total,
{state.workers.map(_.coresUsed).sum} Used</li>
<li><strong>Memory:</strong>
- {Utils.memoryMegabytesToString(state.workers.map(_.memory).sum)} Total,
- {Utils.memoryMegabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li>
+ {Utils.megabytesToString(state.workers.map(_.memory).sum)} Total,
+ {Utils.megabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li>
<li><strong>Applications:</strong>
{state.activeApps.size} Running,
{state.completedApps.size} Completed </li>
@@ -69,34 +78,28 @@ private[spark] class IndexPage(parent: MasterWebUI) {
</div>
</div>
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
- <h3> Workers </h3>
- <br/>
+ <h4> Workers </h4>
{workerTable}
</div>
</div>
- <hr/>
-
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
- <h3> Running Applications </h3>
- <br/>
+ <h4> Running Applications </h4>
+
{activeAppsTable}
</div>
</div>
- <hr/>
-
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
- <h3> Completed Applications </h3>
- <br/>
+ <h4> Completed Applications </h4>
{completedAppsTable}
</div>
</div>;
- UIUtils.basicSparkPage(content, "Spark Master: " + state.uri)
+ UIUtils.basicSparkPage(content, "Spark Master at " + state.uri)
}
def workerRow(worker: WorkerInfo): Seq[Node] = {
@@ -108,8 +111,8 @@ private[spark] class IndexPage(parent: MasterWebUI) {
<td>{worker.state}</td>
<td>{worker.cores} ({worker.coresUsed} Used)</td>
<td sorttable_customkey={"%s.%s".format(worker.memory, worker.memoryUsed)}>
- {Utils.memoryMegabytesToString(worker.memory)}
- ({Utils.memoryMegabytesToString(worker.memoryUsed)} Used)
+ {Utils.megabytesToString(worker.memory)}
+ ({Utils.megabytesToString(worker.memoryUsed)} Used)
</td>
</tr>
}
@@ -127,7 +130,7 @@ private[spark] class IndexPage(parent: MasterWebUI) {
{app.coresGranted}
</td>
<td sorttable_customkey={app.desc.memoryPerSlave.toString}>
- {Utils.memoryMegabytesToString(app.desc.memoryPerSlave)}
+ {Utils.megabytesToString(app.desc.memoryPerSlave)}
</td>
<td>{DeployWebUI.formatDate(app.submitDate)}</td>
<td>{app.desc.user}</td>
diff --git a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index 04b32c7968..f4df729e87 100644
--- a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -15,29 +15,31 @@
* limitations under the License.
*/
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
-import akka.actor.ActorRef
import akka.util.Duration
import javax.servlet.http.HttpServletRequest
import org.eclipse.jetty.server.{Handler, Server}
-import spark.{Logging, Utils}
-import spark.ui.JettyUtils
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.util.Utils
/**
* Web UI server for the standalone master.
*/
private[spark]
-class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) extends Logging {
+class MasterWebUI(val master: Master, requestedPort: Int) extends Logging {
implicit val timeout = Duration.create(
System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
val host = Utils.localHostName()
- val port = requestedPort.getOrElse(
- System.getProperty("master.ui.port", MasterWebUI.DEFAULT_PORT).toInt)
+ val port = requestedPort
+
+ val masterActorRef = master.self
var server: Option[Server] = None
var boundPort: Option[Int] = None
@@ -58,10 +60,14 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten
}
}
- val handlers = Array[(String, Handler)](
+ val metricsHandlers = master.masterMetricsSystem.getServletHandlers ++
+ master.applicationMetricsSystem.getServletHandlers
+
+ val handlers = metricsHandlers ++ Array[(String, Handler)](
("/static", createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR)),
("/app/json", (request: HttpServletRequest) => applicationPage.renderJson(request)),
("/app", (request: HttpServletRequest) => applicationPage.render(request)),
+ ("/json", (request: HttpServletRequest) => indexPage.renderJson(request)),
("*", (request: HttpServletRequest) => indexPage.render(request))
)
@@ -71,6 +77,5 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten
}
private[spark] object MasterWebUI {
- val STATIC_RESOURCE_DIR = "spark/ui/static"
- val DEFAULT_PORT = "8080"
+ val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
}
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8b51ff1c3a..e3dc30eefc 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -15,18 +15,20 @@
* limitations under the License.
*/
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
import java.io._
import java.lang.System.getenv
-import spark.deploy.{ExecutorState, ExecutorStateChanged, ApplicationDescription}
+
import akka.actor.ActorRef
-import spark.{Utils, Logging}
-import java.net.{URI, URL}
-import org.apache.hadoop.fs.{Path, FileSystem}
-import org.apache.hadoop.conf.Configuration
-import scala.Some
-import spark.deploy.ExecutorStateChanged
+
+import com.google.common.base.Charsets
+import com.google.common.io.Files
+
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.{ExecutorState, ApplicationDescription}
+import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
+import org.apache.spark.util.Utils
/**
* Manages the execution of one executor process.
@@ -39,18 +41,19 @@ private[spark] class ExecutorRunner(
val memory: Int,
val worker: ActorRef,
val workerId: String,
- val hostPort: String,
+ val host: String,
val sparkHome: File,
val workDir: File)
extends Logging {
- Utils.checkHostPort(hostPort, "Expected hostport")
-
val fullId = appId + "/" + execId
var workerThread: Thread = null
var process: Process = null
var shutdownHook: Thread = null
+ private def getAppEnv(key: String): Option[String] =
+ appDesc.command.environment.get(key).orElse(Option(getenv(key)))
+
def start() {
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run() { fetchAndRunExecutor() }
@@ -88,14 +91,14 @@ private[spark] class ExecutorRunner(
/** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
def substituteVariables(argument: String): String = argument match {
case "{{EXECUTOR_ID}}" => execId.toString
- case "{{HOSTNAME}}" => Utils.parseHostPort(hostPort)._1
+ case "{{HOSTNAME}}" => host
case "{{CORES}}" => cores.toString
case other => other
}
def buildCommandSeq(): Seq[String] = {
val command = appDesc.command
- val runner = Option(getenv("JAVA_HOME")).map(_ + "/bin/java").getOrElse("java")
+ val runner = getAppEnv("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
// SPARK-698: do not call the run.cmd script, as process.destroy()
// fails to kill a process tree on Windows
Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++
@@ -107,10 +110,11 @@ private[spark] class ExecutorRunner(
* the way the JAVA_OPTS are assembled there.
*/
def buildJavaOpts(): Seq[String] = {
- val libraryOpts = Option(getenv("SPARK_LIBRARY_PATH"))
+ val libraryOpts = getAppEnv("SPARK_LIBRARY_PATH")
.map(p => List("-Djava.library.path=" + p))
.getOrElse(Nil)
- val userOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
+ val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
+ val userOpts = getAppEnv("SPARK_JAVA_OPTS").map(Utils.splitCommandString).getOrElse(Nil)
val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M")
// Figure out our classpath with the external compute-classpath script
@@ -119,12 +123,12 @@ private[spark] class ExecutorRunner(
Seq(sparkHome + "/bin/compute-classpath" + ext),
extraEnvironment=appDesc.command.environment)
- Seq("-cp", classPath) ++ libraryOpts ++ userOpts ++ memoryOpts
+ Seq("-cp", classPath) ++ libraryOpts ++ workerLocalOpts ++ userOpts ++ memoryOpts
}
/** Spawn a thread that will redirect a given stream to a file */
def redirectStream(in: InputStream, file: File) {
- val out = new FileOutputStream(file)
+ val out = new FileOutputStream(file, true)
new Thread("redirect output to " + file) {
override def run() {
try {
@@ -150,6 +154,7 @@ private[spark] class ExecutorRunner(
// Launch the process
val command = buildCommandSeq()
+ logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
val builder = new ProcessBuilder(command: _*).directory(executorDir)
val env = builder.environment()
for ((key, value) <- appDesc.command.environment) {
@@ -160,9 +165,16 @@ private[spark] class ExecutorRunner(
env.put("SPARK_LAUNCH_WITH_SCALA", "0")
process = builder.start()
+ val header = "Spark Executor Command: %s\n%s\n\n".format(
+ command.mkString("\"", "\" \"", "\""), "=" * 40)
+
// Redirect its stdout and stderr to files
- redirectStream(process.getInputStream, new File(executorDir, "stdout"))
- redirectStream(process.getErrorStream, new File(executorDir, "stderr"))
+ val stdout = new File(executorDir, "stdout")
+ redirectStream(process.getInputStream, stdout)
+
+ val stderr = new File(executorDir, "stderr")
+ Files.write(header, stderr, Charsets.UTF_8)
+ redirectStream(process.getErrorStream, stderr)
// Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
// long-lived processes only. However, in the future, we might restart the executor a few
diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 0bd88ea253..09530beb3b 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -15,23 +15,26 @@
* limitations under the License.
*/
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
-import akka.util.duration._
-import spark.{Logging, Utils}
-import spark.util.AkkaUtils
-import spark.deploy._
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
import java.text.SimpleDateFormat
import java.util.Date
-import spark.deploy.RegisterWorker
-import spark.deploy.LaunchExecutor
-import spark.deploy.RegisterWorkerFailed
-import spark.deploy.master.Master
import java.io.File
-import ui.WorkerWebUI
+
+import scala.collection.mutable.HashMap
+
+import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
+import akka.util.duration._
+
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.ExecutorState
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.deploy.worker.ui.WorkerWebUI
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.util.{Utils, AkkaUtils}
+
private[spark] class Worker(
host: String,
@@ -67,6 +70,9 @@ private[spark] class Worker(
var coresUsed = 0
var memoryUsed = 0
+ val metricsSystem = MetricsSystem.createMetricsSystem("worker")
+ val workerSource = new WorkerSource(this)
+
def coresFree: Int = cores - coresUsed
def memoryFree: Int = memory - memoryUsed
@@ -90,13 +96,17 @@ private[spark] class Worker(
override def preStart() {
logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
- host, port, cores, Utils.memoryMegabytesToString(memory)))
+ host, port, cores, Utils.megabytesToString(memory)))
sparkHome = new File(Option(System.getenv("SPARK_HOME")).getOrElse("."))
logInfo("Spark home: " + sparkHome)
createWorkDir()
webUi = new WorkerWebUI(this, workDir, Some(webUiPort))
+
webUi.start()
connectToMaster()
+
+ metricsSystem.registerSource(workerSource)
+ metricsSystem.start()
}
def connectToMaster() {
@@ -122,7 +132,7 @@ private[spark] class Worker(
case LaunchExecutor(appId, execId, appDesc, cores_, memory_, execSparkHome_) =>
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
val manager = new ExecutorRunner(
- appId, execId, appDesc, cores_, memory_, self, workerId, host + ":" + port, new File(execSparkHome_), workDir)
+ appId, execId, appDesc, cores_, memory_, self, workerId, host, new File(execSparkHome_), workDir)
executors(appId + "/" + execId) = manager
manager.start()
coresUsed += cores_
@@ -155,10 +165,10 @@ private[spark] class Worker(
case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
masterDisconnected()
-
+
case RequestWorkerState => {
- sender ! WorkerState(host, port, workerId, executors.values.toList,
- finishedExecutors.values.toList, masterUrl, cores, memory,
+ sender ! WorkerStateResponse(host, port, workerId, executors.values.toList,
+ finishedExecutors.values.toList, masterUrl, cores, memory,
coresUsed, memoryUsed, masterWebUiUrl)
}
}
@@ -178,6 +188,7 @@ private[spark] class Worker(
override def postStop() {
executors.values.foreach(_.kill())
webUi.stop()
+ metricsSystem.stop()
}
}
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 9fcd3260ca..0ae89a864f 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -15,11 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
-import spark.util.IntParam
-import spark.util.MemoryParam
-import spark.Utils
+import org.apache.spark.util.{Utils, IntParam, MemoryParam}
import java.lang.management.ManagementFactory
/**
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
new file mode 100644
index 0000000000..6427c0178f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
@@ -0,0 +1,34 @@
+package org.apache.spark.deploy.worker
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class WorkerSource(val worker: Worker) extends Source {
+ val sourceName = "worker"
+ val metricRegistry = new MetricRegistry()
+
+ metricRegistry.register(MetricRegistry.name("executors", "number"), new Gauge[Int] {
+ override def getValue: Int = worker.executors.size
+ })
+
+ // Gauge for cores used of this worker
+ metricRegistry.register(MetricRegistry.name("coresUsed", "number"), new Gauge[Int] {
+ override def getValue: Int = worker.coresUsed
+ })
+
+ // Gauge for memory used of this worker
+ metricRegistry.register(MetricRegistry.name("memUsed", "MBytes"), new Gauge[Int] {
+ override def getValue: Int = worker.memoryUsed
+ })
+
+ // Gauge for cores free of this worker
+ metricRegistry.register(MetricRegistry.name("coresFree", "number"), new Gauge[Int] {
+ override def getValue: Int = worker.coresFree
+ })
+
+ // Gauge for memory free of this worker
+ metricRegistry.register(MetricRegistry.name("memFree", "MBytes"), new Gauge[Int] {
+ override def getValue: Int = worker.memoryFree
+ })
+}
diff --git a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
index 7548a26c2e..d2d3617498 100644
--- a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
@@ -15,22 +15,24 @@
* limitations under the License.
*/
-package spark.deploy.worker.ui
+package org.apache.spark.deploy.worker.ui
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
import akka.dispatch.Await
import akka.pattern.ask
import akka.util.duration._
-import javax.servlet.http.HttpServletRequest
-
import net.liftweb.json.JsonAST.JValue
-import scala.xml.Node
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse}
+import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
-import spark.deploy.{RequestWorkerState, JsonProtocol, WorkerState}
-import spark.deploy.worker.ExecutorRunner
-import spark.Utils
-import spark.ui.UIUtils
private[spark] class IndexPage(parent: WorkerWebUI) {
val workerActor = parent.worker.self
@@ -38,13 +40,13 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
val timeout = parent.timeout
def renderJson(request: HttpServletRequest): JValue = {
- val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState]
+ val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
val workerState = Await.result(stateFuture, 30 seconds)
JsonProtocol.writeWorkerState(workerState)
}
def render(request: HttpServletRequest): Seq[Node] = {
- val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState]
+ val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
val workerState = Await.result(stateFuture, 30 seconds)
val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs")
@@ -54,8 +56,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors)
val content =
- <hr />
- <div class="row"> <!-- Worker Details -->
+ <div class="row-fluid"> <!-- Worker Details -->
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {workerState.workerId}</li>
@@ -63,32 +64,29 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
Master URL:</strong> {workerState.masterUrl}
</li>
<li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
- <li><strong>Memory:</strong> {Utils.memoryMegabytesToString(workerState.memory)}
- ({Utils.memoryMegabytesToString(workerState.memoryUsed)} Used)</li>
+ <li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
+ ({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
</ul>
<p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
</div>
</div>
- <hr/>
- <div class="row"> <!-- Running Executors -->
+ <div class="row-fluid"> <!-- Running Executors -->
<div class="span12">
- <h3> Running Executors {workerState.executors.size} </h3>
- <br/>
+ <h4> Running Executors {workerState.executors.size} </h4>
{runningExecutorTable}
</div>
</div>
- <hr/>
- <div class="row"> <!-- Finished Executors -->
+ <div class="row-fluid"> <!-- Finished Executors -->
<div class="span12">
- <h3> Finished Executors </h3>
- <br/>
+ <h4> Finished Executors </h4>
{finishedExecutorTable}
</div>
</div>;
- UIUtils.basicSparkPage(content, "Spark Worker on %s:%s".format(workerState.host, workerState.port))
+ UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
+ workerState.host, workerState.port))
}
def executorRow(executor: ExecutorRunner): Seq[Node] = {
@@ -96,7 +94,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
<td>{executor.execId}</td>
<td>{executor.cores}</td>
<td sorttable_customkey={executor.memory.toString}>
- {Utils.memoryMegabytesToString(executor.memory)}
+ {Utils.megabytesToString(executor.memory)}
</td>
<td>
<ul class="unstyled">
diff --git a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 61d4cd6d99..95d6007f3b 100644
--- a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -15,9 +15,8 @@
* limitations under the License.
*/
-package spark.deploy.worker.ui
+package org.apache.spark.deploy.worker.ui
-import akka.actor.ActorRef
import akka.util.{Duration, Timeout}
import java.io.{FileInputStream, File}
@@ -26,18 +25,19 @@ import javax.servlet.http.HttpServletRequest
import org.eclipse.jetty.server.{Handler, Server}
-import spark.deploy.worker.Worker
-import spark.{Utils, Logging}
-import spark.ui.JettyUtils
-import spark.ui.JettyUtils._
-import spark.ui.UIUtils
+import org.apache.spark.deploy.worker.Worker
+import org.apache.spark.{Logging}
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
/**
* Web UI server for the standalone worker.
*/
private[spark]
class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[Int] = None)
- extends Logging {
+ extends Logging {
implicit val timeout = Timeout(
Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds"))
val host = Utils.localHostName()
@@ -49,7 +49,9 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
val indexPage = new IndexPage(this)
- val handlers = Array[(String, Handler)](
+ val metricsHandlers = worker.metricsSystem.getServletHandlers
+
+ val handlers = metricsHandlers ++ Array[(String, Handler)](
("/static", createStaticHandler(WorkerWebUI.STATIC_RESOURCE_DIR)),
("/log", (request: HttpServletRequest) => log(request)),
("/logPage", (request: HttpServletRequest) => logPage(request)),
@@ -111,30 +113,37 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
if (startByte > 0) {
<a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s"
.format(appId, executorId, logType, math.max(startByte-byteLength, 0),
- byteLength)}>
- <button>Previous {Utils.memoryBytesToString(math.min(byteLength, startByte))}</button>
+ byteLength)}>
+ <button type="button" class="btn btn-default">
+ Previous {Utils.bytesToString(math.min(byteLength, startByte))}
+ </button>
</a>
}
else {
- <button disabled="disabled">Previous 0 B</button>
+ <button type="button" class="btn btn-default" disabled="disabled">
+ Previous 0 B
+ </button>
}
val nextButton =
if (endByte < logLength) {
<a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s".
format(appId, executorId, logType, endByte, byteLength)}>
- <button>Next {Utils.memoryBytesToString(math.min(byteLength, logLength-endByte))}</button>
+ <button type="button" class="btn btn-default">
+ Next {Utils.bytesToString(math.min(byteLength, logLength-endByte))}
+ </button>
</a>
}
else {
- <button disabled="disabled">Next 0 B</button>
+ <button type="button" class="btn btn-default" disabled="disabled">
+ Next 0 B
+ </button>
}
val content =
<html>
<body>
{linkToMaster}
- <hr />
<div>
<div style="float:left;width:40%">{backButton}</div>
<div style="float:left;">{range}</div>
@@ -177,6 +186,6 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
}
private[spark] object WorkerWebUI {
- val STATIC_RESOURCE_DIR = "spark/ui/static"
+ val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
val DEFAULT_PORT="8081"
}
diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 2e81151882..d365804994 100644
--- a/core/src/main/scala/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -15,26 +15,30 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
-import java.io.{File, FileOutputStream}
-import java.net.{URI, URL, URLClassLoader}
+import java.io.{File}
+import java.lang.management.ManagementFactory
+import java.nio.ByteBuffer
import java.util.concurrent._
-import org.apache.hadoop.fs.FileUtil
+import scala.collection.JavaConversions._
+import scala.collection.mutable.HashMap
-import scala.collection.mutable.{ArrayBuffer, Map, HashMap}
+import org.apache.spark.scheduler._
+import org.apache.spark._
+import org.apache.spark.util.Utils
-import spark.broadcast._
-import spark.scheduler._
-import spark._
-import java.nio.ByteBuffer
/**
* The Mesos executor for Spark.
*/
-private[spark] class Executor(executorId: String, slaveHostname: String, properties: Seq[(String, String)]) extends Logging {
-
+private[spark] class Executor(
+ executorId: String,
+ slaveHostname: String,
+ properties: Seq[(String, String)])
+ extends Logging
+{
// Application dependencies (added through SparkContext) that we've fetched so far on this node.
// Each map holds the master's timestamp for the version of that file or JAR we got.
private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
@@ -57,6 +61,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
System.setProperty(key, value)
}
+ // If we are in yarn mode, systems can have different disk layouts so we must set it
+ // to what Yarn on this system said was available. This will be used later when SparkEnv
+ // created.
+ if (java.lang.Boolean.valueOf(System.getenv("SPARK_YARN_MODE"))) {
+ System.setProperty("spark.local.dir", getYarnLocalDirs())
+ }
+
// Create our ClassLoader and set it on this thread
private val urlClassLoader = createClassLoader()
private val replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader)
@@ -69,7 +80,7 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
override def uncaughtException(thread: Thread, exception: Throwable) {
try {
logError("Uncaught exception in thread " + thread, exception)
-
+
// We may have been called from a shutdown hook. If so, we must not call System.exit().
// (If we do, we will deadlock.)
if (!Utils.inShutdown()) {
@@ -87,9 +98,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
}
)
+ val executorSource = new ExecutorSource(this)
+
// Initialize Spark environment (using system properties read above)
val env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0, false, false)
SparkEnv.set(env)
+ env.metricsSystem.registerSource(executorSource)
+
private val akkaFrameSize = env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size")
// Start worker thread pool
@@ -100,6 +115,21 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
threadPool.execute(new TaskRunner(context, taskId, serializedTask))
}
+ /** Get the Yarn approved local directories. */
+ private def getYarnLocalDirs(): String = {
+ // Hadoop 0.23 and 2.x have different Environment variable names for the
+ // local dirs, so lets check both. We assume one of the 2 is set.
+ // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+ val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
+ .getOrElse(Option(System.getenv("LOCAL_DIRS"))
+ .getOrElse(""))
+
+ if (localDirs.isEmpty()) {
+ throw new Exception("Yarn Local dirs can't be empty")
+ }
+ return localDirs
+ }
+
class TaskRunner(context: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer)
extends Runnable {
@@ -112,6 +142,9 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
context.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
var attemptedTask: Option[Task[Any]] = None
var taskStart: Long = 0
+ def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum
+ val startGCTime = getTotalGCTime
+
try {
SparkEnv.set(env)
Accumulators.clear()
@@ -119,15 +152,16 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
updateDependencies(taskFiles, taskJars)
val task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)
attemptedTask = Some(task)
- logInfo("Its generation is " + task.generation)
- env.mapOutputTracker.updateGeneration(task.generation)
+ logInfo("Its epoch is " + task.epoch)
+ env.mapOutputTracker.updateEpoch(task.epoch)
taskStart = System.currentTimeMillis()
val value = task.run(taskId.toInt)
val taskFinish = System.currentTimeMillis()
- task.metrics.foreach{ m =>
+ for (m <- task.metrics) {
m.hostname = Utils.localHostName
m.executorDeserializeTime = (taskStart - startTime).toInt
m.executorRunTime = (taskFinish - taskStart).toInt
+ m.jvmGCTime = getTotalGCTime - startGCTime
}
//TODO I'd also like to track the time it takes to serialize the task results, but that is huge headache, b/c
// we need to serialize the task metrics first. If TaskMetrics had a custom serialized format, we could
@@ -151,7 +185,10 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
case t: Throwable => {
val serviceTime = (System.currentTimeMillis() - taskStart).toInt
val metrics = attemptedTask.flatMap(t => t.metrics)
- metrics.foreach{m => m.executorRunTime = serviceTime}
+ for (m <- metrics) {
+ m.executorRunTime = serviceTime
+ m.jvmGCTime = getTotalGCTime - startGCTime
+ }
val reason = ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics)
context.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
@@ -189,13 +226,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
if (classUri != null) {
logInfo("Using REPL class URI: " + classUri)
try {
- val klass = Class.forName("spark.repl.ExecutorClassLoader")
+ val klass = Class.forName("org.apache.spark.repl.ExecutorClassLoader")
.asInstanceOf[Class[_ <: ClassLoader]]
val constructor = klass.getConstructor(classOf[String], classOf[ClassLoader])
return constructor.newInstance(classUri, parent)
} catch {
case _: ClassNotFoundException =>
- logError("Could not find spark.repl.ExecutorClassLoader on classpath!")
+ logError("Could not find org.apache.spark.repl.ExecutorClassLoader on classpath!")
System.exit(1)
null
}
diff --git a/core/src/main/scala/spark/executor/ExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala
index 33a6f8a824..ad7dd34c76 100644
--- a/core/src/main/scala/spark/executor/ExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
import java.nio.ByteBuffer
-import spark.TaskState.TaskState
+import org.apache.spark.TaskState.TaskState
/**
* A pluggable interface used by the Executor to send updates to the cluster scheduler.
diff --git a/core/src/main/scala/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 64b9fb88f8..e5c9bbbe28 100644
--- a/core/src/main/scala/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
/**
* These are exit codes that executors should use to provide the master with information about
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
new file mode 100644
index 0000000000..17653cd560
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -0,0 +1,55 @@
+package org.apache.spark.executor
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.fs.LocalFileSystem
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.metrics.source.Source
+
+class ExecutorSource(val executor: Executor) extends Source {
+ private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
+ FileSystem.getAllStatistics().filter(s => s.getScheme.equals(scheme)).headOption
+
+ private def registerFileSystemStat[T](
+ scheme: String, name: String, f: FileSystem.Statistics => T, defaultValue: T) = {
+ metricRegistry.register(MetricRegistry.name("filesystem", scheme, name), new Gauge[T] {
+ override def getValue: T = fileStats(scheme).map(f).getOrElse(defaultValue)
+ })
+ }
+
+ val metricRegistry = new MetricRegistry()
+ val sourceName = "executor"
+
+ // Gauge for executor thread pool's actively executing task counts
+ metricRegistry.register(MetricRegistry.name("threadpool", "activeTask", "count"), new Gauge[Int] {
+ override def getValue: Int = executor.threadPool.getActiveCount()
+ })
+
+ // Gauge for executor thread pool's approximate total number of tasks that have been completed
+ metricRegistry.register(MetricRegistry.name("threadpool", "completeTask", "count"), new Gauge[Long] {
+ override def getValue: Long = executor.threadPool.getCompletedTaskCount()
+ })
+
+ // Gauge for executor thread pool's current number of threads
+ metricRegistry.register(MetricRegistry.name("threadpool", "currentPool", "size"), new Gauge[Int] {
+ override def getValue: Int = executor.threadPool.getPoolSize()
+ })
+
+ // Gauge got executor thread pool's largest number of threads that have ever simultaneously been in th pool
+ metricRegistry.register(MetricRegistry.name("threadpool", "maxPool", "size"), new Gauge[Int] {
+ override def getValue: Int = executor.threadPool.getMaximumPoolSize()
+ })
+
+ // Gauge for file system stats of this executor
+ for (scheme <- Array("hdfs", "file")) {
+ registerFileSystemStat(scheme, "bytesRead", _.getBytesRead(), 0L)
+ registerFileSystemStat(scheme, "bytesWritten", _.getBytesWritten(), 0L)
+ registerFileSystemStat(scheme, "readOps", _.getReadOps(), 0)
+ registerFileSystemStat(scheme, "largeReadOps", _.getLargeReadOps(), 0)
+ registerFileSystemStat(scheme, "writeOps", _.getWriteOps(), 0)
+ }
+}
diff --git a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala
index 09d12fb65b..f9bfe8ed2f 100644
--- a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
import java.net.{URLClassLoader, URL}
diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 4961c42fad..da62091980 100644
--- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -15,15 +15,16 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
import java.nio.ByteBuffer
import org.apache.mesos.{Executor => MesosExecutor, MesosExecutorDriver, MesosNativeLibrary, ExecutorDriver}
import org.apache.mesos.Protos.{TaskState => MesosTaskState, TaskStatus => MesosTaskStatus, _}
-import spark.TaskState.TaskState
+import org.apache.spark.TaskState.TaskState
import com.google.protobuf.ByteString
-import spark.{Utils, Logging}
-import spark.TaskState
+import org.apache.spark.{Logging}
+import org.apache.spark.TaskState
+import org.apache.spark.util.Utils
private[spark] class MesosExecutorBackend
extends MesosExecutor
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala
index f4003da732..7839023868 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala
@@ -15,22 +15,18 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
import java.nio.ByteBuffer
-import spark.Logging
-import spark.TaskState.TaskState
-import spark.util.AkkaUtils
+
import akka.actor.{ActorRef, Actor, Props, Terminated}
import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
-import java.util.concurrent.{TimeUnit, ThreadPoolExecutor, SynchronousQueue}
-import spark.scheduler.cluster._
-import spark.scheduler.cluster.RegisteredExecutor
-import spark.scheduler.cluster.LaunchTask
-import spark.scheduler.cluster.RegisterExecutorFailed
-import spark.scheduler.cluster.RegisterExecutor
-import spark.Utils
-import spark.deploy.SparkHadoopUtil
+
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._
+import org.apache.spark.util.{Utils, AkkaUtils}
+
private[spark] class StandaloneExecutorBackend(
driverUrl: String,
@@ -85,19 +81,6 @@ private[spark] class StandaloneExecutorBackend(
private[spark] object StandaloneExecutorBackend {
def run(driverUrl: String, executorId: String, hostname: String, cores: Int) {
- SparkHadoopUtil.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores))
- }
-
- // This will be run 'as' the user
- def run0(args: Product) {
- assert(4 == args.productArity)
- runImpl(args.productElement(0).asInstanceOf[String],
- args.productElement(1).asInstanceOf[String],
- args.productElement(2).asInstanceOf[String],
- args.productElement(3).asInstanceOf[Int])
- }
-
- private def runImpl(driverUrl: String, executorId: String, hostname: String, cores: Int) {
// Debug code
Utils.checkHost(hostname)
diff --git a/core/src/main/scala/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 3151627839..f311141148 100644
--- a/core/src/main/scala/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.executor
+package org.apache.spark.executor
class TaskMetrics extends Serializable {
/**
@@ -31,7 +31,7 @@ class TaskMetrics extends Serializable {
/**
* Time the executor spends actually running the task (including fetching shuffle data)
*/
- var executorRunTime:Int = _
+ var executorRunTime: Int = _
/**
* The number of bytes this task transmitted back to the driver as the TaskResult
@@ -39,6 +39,11 @@ class TaskMetrics extends Serializable {
var resultSize: Long = _
/**
+ * Amount of time the JVM spent in garbage collection while executing this task
+ */
+ var jvmGCTime: Long = _
+
+ /**
* If this task reads from shuffle output, metrics on getting shuffle data will be collected here
*/
var shuffleReadMetrics: Option[ShuffleReadMetrics] = None
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
new file mode 100644
index 0000000000..90a0420caf
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.{InputStream, OutputStream}
+
+import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
+
+import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
+
+
+/**
+ * CompressionCodec allows the customization of choosing different compression implementations
+ * to be used in block storage.
+ */
+trait CompressionCodec {
+
+ def compressedOutputStream(s: OutputStream): OutputStream
+
+ def compressedInputStream(s: InputStream): InputStream
+}
+
+
+private[spark] object CompressionCodec {
+
+ def createCodec(): CompressionCodec = {
+ // Set the default codec to Snappy since the LZF implementation initializes a pretty large
+ // buffer for every stream, which results in a lot of memory overhead when the number of
+ // shuffle reduce buckets are large.
+ createCodec(classOf[SnappyCompressionCodec].getName)
+ }
+
+ def createCodec(codecName: String): CompressionCodec = {
+ Class.forName(
+ System.getProperty("spark.io.compression.codec", codecName),
+ true,
+ Thread.currentThread.getContextClassLoader).newInstance().asInstanceOf[CompressionCodec]
+ }
+}
+
+
+/**
+ * LZF implementation of [[org.apache.spark.io.CompressionCodec]].
+ */
+class LZFCompressionCodec extends CompressionCodec {
+
+ override def compressedOutputStream(s: OutputStream): OutputStream = {
+ new LZFOutputStream(s).setFinishBlockOnFlush(true)
+ }
+
+ override def compressedInputStream(s: InputStream): InputStream = new LZFInputStream(s)
+}
+
+
+/**
+ * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
+ * Block size can be configured by spark.io.compression.snappy.block.size.
+ */
+class SnappyCompressionCodec extends CompressionCodec {
+
+ override def compressedOutputStream(s: OutputStream): OutputStream = {
+ val blockSize = System.getProperty("spark.io.compression.snappy.block.size", "32768").toInt
+ new SnappyOutputStream(s, blockSize)
+ }
+
+ override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
new file mode 100644
index 0000000000..0f9c4e00b1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import java.util.Properties
+import java.io.{File, FileInputStream, InputStream, IOException}
+
+import scala.collection.mutable
+import scala.util.matching.Regex
+
+import org.apache.spark.Logging
+
+private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {
+ initLogging()
+
+ val DEFAULT_PREFIX = "*"
+ val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
+ val METRICS_CONF = "metrics.properties"
+
+ val properties = new Properties()
+ var propertyCategories: mutable.HashMap[String, Properties] = null
+
+ private def setDefaultProperties(prop: Properties) {
+ prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
+ prop.setProperty("*.sink.servlet.uri", "/metrics/json")
+ prop.setProperty("*.sink.servlet.sample", "false")
+ prop.setProperty("master.sink.servlet.uri", "/metrics/master/json")
+ prop.setProperty("applications.sink.servlet.uri", "/metrics/applications/json")
+ }
+
+ def initialize() {
+ //Add default properties in case there's no properties file
+ setDefaultProperties(properties)
+
+ // If spark.metrics.conf is not set, try to get file in class path
+ var is: InputStream = null
+ try {
+ is = configFile match {
+ case Some(f) => new FileInputStream(f)
+ case None => getClass.getClassLoader.getResourceAsStream(METRICS_CONF)
+ }
+
+ if (is != null) {
+ properties.load(is)
+ }
+ } catch {
+ case e: Exception => logError("Error loading configure file", e)
+ } finally {
+ if (is != null) is.close()
+ }
+
+ propertyCategories = subProperties(properties, INSTANCE_REGEX)
+ if (propertyCategories.contains(DEFAULT_PREFIX)) {
+ import scala.collection.JavaConversions._
+
+ val defaultProperty = propertyCategories(DEFAULT_PREFIX)
+ for { (inst, prop) <- propertyCategories
+ if (inst != DEFAULT_PREFIX)
+ (k, v) <- defaultProperty
+ if (prop.getProperty(k) == null) } {
+ prop.setProperty(k, v)
+ }
+ }
+ }
+
+ def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
+ val subProperties = new mutable.HashMap[String, Properties]
+ import scala.collection.JavaConversions._
+ prop.foreach { kv =>
+ if (regex.findPrefixOf(kv._1) != None) {
+ val regex(prefix, suffix) = kv._1
+ subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
+ }
+ }
+ subProperties
+ }
+
+ def getInstance(inst: String): Properties = {
+ propertyCategories.get(inst) match {
+ case Some(s) => s
+ case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
+ }
+ }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
new file mode 100644
index 0000000000..bec0c83be8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry}
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable
+
+import org.apache.spark.Logging
+import org.apache.spark.metrics.sink.{MetricsServlet, Sink}
+import org.apache.spark.metrics.source.Source
+
+/**
+ * Spark Metrics System, created by specific "instance", combined by source,
+ * sink, periodically poll source metrics data to sink destinations.
+ *
+ * "instance" specify "who" (the role) use metrics system. In spark there are several roles
+ * like master, worker, executor, client driver, these roles will create metrics system
+ * for monitoring. So instance represents these roles. Currently in Spark, several instances
+ * have already implemented: master, worker, executor, driver, applications.
+ *
+ * "source" specify "where" (source) to collect metrics data. In metrics system, there exists
+ * two kinds of source:
+ * 1. Spark internal source, like MasterSource, WorkerSource, etc, which will collect
+ * Spark component's internal state, these sources are related to instance and will be
+ * added after specific metrics system is created.
+ * 2. Common source, like JvmSource, which will collect low level state, is configured by
+ * configuration and loaded through reflection.
+ *
+ * "sink" specify "where" (destination) to output metrics data to. Several sinks can be
+ * coexisted and flush metrics to all these sinks.
+ *
+ * Metrics configuration format is like below:
+ * [instance].[sink|source].[name].[options] = xxxx
+ *
+ * [instance] can be "master", "worker", "executor", "driver", "applications" which means only
+ * the specified instance has this property.
+ * wild card "*" can be used to replace instance name, which means all the instances will have
+ * this property.
+ *
+ * [sink|source] means this property belongs to source or sink. This field can only be source or sink.
+ *
+ * [name] specify the name of sink or source, it is custom defined.
+ *
+ * [options] is the specific property of this source or sink.
+ */
+private[spark] class MetricsSystem private (val instance: String) extends Logging {
+ initLogging()
+
+ val confFile = System.getProperty("spark.metrics.conf")
+ val metricsConfig = new MetricsConfig(Option(confFile))
+
+ val sinks = new mutable.ArrayBuffer[Sink]
+ val sources = new mutable.ArrayBuffer[Source]
+ val registry = new MetricRegistry()
+
+ // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
+ private var metricsServlet: Option[MetricsServlet] = None
+
+ /** Get any UI handlers used by this metrics system. */
+ def getServletHandlers = metricsServlet.map(_.getHandlers).getOrElse(Array())
+
+ metricsConfig.initialize()
+ registerSources()
+ registerSinks()
+
+ def start() {
+ sinks.foreach(_.start)
+ }
+
+ def stop() {
+ sinks.foreach(_.stop)
+ }
+
+ def registerSource(source: Source) {
+ sources += source
+ try {
+ registry.register(source.sourceName, source.metricRegistry)
+ } catch {
+ case e: IllegalArgumentException => logInfo("Metrics already registered", e)
+ }
+ }
+
+ def removeSource(source: Source) {
+ sources -= source
+ registry.removeMatching(new MetricFilter {
+ def matches(name: String, metric: Metric): Boolean = name.startsWith(source.sourceName)
+ })
+ }
+
+ def registerSources() {
+ val instConfig = metricsConfig.getInstance(instance)
+ val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX)
+
+ // Register all the sources related to instance
+ sourceConfigs.foreach { kv =>
+ val classPath = kv._2.getProperty("class")
+ try {
+ val source = Class.forName(classPath).newInstance()
+ registerSource(source.asInstanceOf[Source])
+ } catch {
+ case e: Exception => logError("Source class " + classPath + " cannot be instantialized", e)
+ }
+ }
+ }
+
+ def registerSinks() {
+ val instConfig = metricsConfig.getInstance(instance)
+ val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX)
+
+ sinkConfigs.foreach { kv =>
+ val classPath = kv._2.getProperty("class")
+ try {
+ val sink = Class.forName(classPath)
+ .getConstructor(classOf[Properties], classOf[MetricRegistry])
+ .newInstance(kv._2, registry)
+ if (kv._1 == "servlet") {
+ metricsServlet = Some(sink.asInstanceOf[MetricsServlet])
+ } else {
+ sinks += sink.asInstanceOf[Sink]
+ }
+ } catch {
+ case e: Exception => logError("Sink class " + classPath + " cannot be instantialized", e)
+ }
+ }
+ }
+}
+
+private[spark] object MetricsSystem {
+ val SINK_REGEX = "^sink\\.(.+)\\.(.+)".r
+ val SOURCE_REGEX = "^source\\.(.+)\\.(.+)".r
+
+ val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
+ val MINIMAL_POLL_PERIOD = 1
+
+ def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) {
+ val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit)
+ if (period < MINIMAL_POLL_PERIOD) {
+ throw new IllegalArgumentException("Polling period " + pollPeriod + " " + pollUnit +
+ " below than minimal polling period ")
+ }
+ }
+
+ def createMetricsSystem(instance: String): MetricsSystem = new MetricsSystem(instance)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
new file mode 100644
index 0000000000..bce257d6e6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.{ConsoleReporter, MetricRegistry}
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.metrics.MetricsSystem
+
+class ConsoleSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+ val CONSOLE_DEFAULT_PERIOD = 10
+ val CONSOLE_DEFAULT_UNIT = "SECONDS"
+
+ val CONSOLE_KEY_PERIOD = "period"
+ val CONSOLE_KEY_UNIT = "unit"
+
+ val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
+ case Some(s) => s.toInt
+ case None => CONSOLE_DEFAULT_PERIOD
+ }
+
+ val pollUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
+ case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+ case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
+ }
+
+ MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+ val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
+ .convertDurationsTo(TimeUnit.MILLISECONDS)
+ .convertRatesTo(TimeUnit.SECONDS)
+ .build()
+
+ override def start() {
+ reporter.start(pollPeriod, pollUnit)
+ }
+
+ override def stop() {
+ reporter.stop()
+ }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
new file mode 100644
index 0000000000..3d1a06a395
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.{CsvReporter, MetricRegistry}
+
+import java.io.File
+import java.util.{Locale, Properties}
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.metrics.MetricsSystem
+
+class CsvSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+ val CSV_KEY_PERIOD = "period"
+ val CSV_KEY_UNIT = "unit"
+ val CSV_KEY_DIR = "directory"
+
+ val CSV_DEFAULT_PERIOD = 10
+ val CSV_DEFAULT_UNIT = "SECONDS"
+ val CSV_DEFAULT_DIR = "/tmp/"
+
+ val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
+ case Some(s) => s.toInt
+ case None => CSV_DEFAULT_PERIOD
+ }
+
+ val pollUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
+ case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+ case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
+ }
+
+ MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+ val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
+ case Some(s) => s
+ case None => CSV_DEFAULT_DIR
+ }
+
+ val reporter: CsvReporter = CsvReporter.forRegistry(registry)
+ .formatFor(Locale.US)
+ .convertDurationsTo(TimeUnit.MILLISECONDS)
+ .convertRatesTo(TimeUnit.SECONDS)
+ .build(new File(pollDir))
+
+ override def start() {
+ reporter.start(pollPeriod, pollUnit)
+ }
+
+ override def stop() {
+ reporter.stop()
+ }
+}
+
diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
index 25386b2796..621d086d41 100644
--- a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
@@ -15,13 +15,21 @@
* limitations under the License.
*/
-package org.apache.hadoop.mapred
+package org.apache.spark.metrics.sink
-trait HadoopMapRedUtil {
- def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContext(conf, jobId)
+import com.codahale.metrics.{JmxReporter, MetricRegistry}
- def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId)
+import java.util.Properties
+
+class JmxSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+ val reporter: JmxReporter = JmxReporter.forRegistry(registry).build()
+
+ override def start() {
+ reporter.start()
+ }
+
+ override def stop() {
+ reporter.stop()
+ }
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier,
- jobId, isMap, taskId, attemptId)
}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
new file mode 100644
index 0000000000..4e90dd4323
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.json.MetricsModule
+
+import com.fasterxml.jackson.databind.ObjectMapper
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+import javax.servlet.http.HttpServletRequest
+
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.ui.JettyUtils
+
+class MetricsServlet(val property: Properties, val registry: MetricRegistry) extends Sink {
+ val SERVLET_KEY_URI = "uri"
+ val SERVLET_KEY_SAMPLE = "sample"
+
+ val servletURI = property.getProperty(SERVLET_KEY_URI)
+
+ val servletShowSample = property.getProperty(SERVLET_KEY_SAMPLE).toBoolean
+
+ val mapper = new ObjectMapper().registerModule(
+ new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))
+
+ def getHandlers = Array[(String, Handler)](
+ (servletURI, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
+ )
+
+ def getMetricsSnapshot(request: HttpServletRequest): String = {
+ mapper.writeValueAsString(registry)
+ }
+
+ override def start() { }
+
+ override def stop() { }
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
index 4b3e3e50e1..3a739aa563 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
@@ -15,10 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.metrics.sink
-object SchedulingMode extends Enumeration("FAIR","FIFO"){
-
- type SchedulingMode = Value
- val FAIR,FIFO = Value
+trait Sink {
+ def start: Unit
+ def stop: Unit
}
diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
index 4b3d84670c..75cb2b8973 100644
--- a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
@@ -15,13 +15,18 @@
* limitations under the License.
*/
-package org.apache.hadoop.mapred
+package org.apache.spark.metrics.source
-trait HadoopMapRedUtil {
- def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId)
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet}
- def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+class JvmSource extends Source {
+ val sourceName = "jvm"
+ val metricRegistry = new MetricRegistry()
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier,
- jobId, isMap, taskId, attemptId)
+ val gcMetricSet = new GarbageCollectorMetricSet
+ val memGaugeSet = new MemoryUsageGaugeSet
+
+ metricRegistry.registerAll(gcMetricSet)
+ metricRegistry.registerAll(memGaugeSet)
}
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/Source.scala b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala
new file mode 100644
index 0000000000..3fee55cc6d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import com.codahale.metrics.MetricRegistry
+
+trait Source {
+ def sourceName: String
+ def metricRegistry: MetricRegistry
+}
diff --git a/core/src/main/scala/spark/network/BufferMessage.scala b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
index e566aeac13..f736bb3713 100644
--- a/core/src/main/scala/spark/network/BufferMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.nio.ByteBuffer
import scala.collection.mutable.ArrayBuffer
-import spark.storage.BlockManager
+import org.apache.spark.storage.BlockManager
private[spark]
diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala
index b66c00b58c..95cb0206ac 100644
--- a/core/src/main/scala/spark/network/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/Connection.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
-import spark._
+import org.apache.spark._
import scala.collection.mutable.{HashMap, Queue, ArrayBuffer}
@@ -45,12 +45,15 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
channel.socket.setKeepAlive(true)
/*channel.socket.setReceiveBufferSize(32768) */
+ @volatile private var closed = false
var onCloseCallback: Connection => Unit = null
var onExceptionCallback: (Connection, Exception) => Unit = null
var onKeyInterestChangeCallback: (Connection, Int) => Unit = null
val remoteAddress = getRemoteAddress()
+ def resetForceReregister(): Boolean
+
// Read channels typically do not register for write and write does not for read
// Now, we do have write registering for read too (temporarily), but this is to detect
// channel close NOT to actually read/consume data on it !
@@ -95,6 +98,7 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
}
def close() {
+ closed = true
val k = key()
if (k != null) {
k.cancel()
@@ -103,6 +107,8 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
callOnCloseCallback()
}
+ protected def isClosed: Boolean = closed
+
def onClose(callback: Connection => Unit) {
onCloseCallback = callback
}
@@ -168,7 +174,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
remoteId_ : ConnectionManagerId)
extends Connection(SocketChannel.open, selector_, remoteId_) {
- class Outbox(fair: Int = 0) {
+ private class Outbox(fair: Int = 0) {
val messages = new Queue[Message]()
val defaultChunkSize = 65536 //32768 //16384
var nextMessageToBeUsed = 0
@@ -245,7 +251,17 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
}
}
+ // outbox is used as a lock - ensure that it is always used as a leaf (since methods which
+ // lock it are invoked in context of other locks)
private val outbox = new Outbox(1)
+ /*
+ This is orthogonal to whether we have pending bytes to write or not - and satisfies a slightly
+ different purpose. This flag is to see if we need to force reregister for write even when we
+ do not have any pending bytes to write to socket.
+ This can happen due to a race between adding pending buffers, and checking for existing of
+ data as detailed in https://github.com/mesos/spark/pull/791
+ */
+ private var needForceReregister = false
val currentBuffers = new ArrayBuffer[ByteBuffer]()
/*channel.socket.setSendBufferSize(256 * 1024)*/
@@ -267,9 +283,19 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
def send(message: Message) {
outbox.synchronized {
outbox.addMessage(message)
- if (channel.isConnected) {
- registerInterest()
- }
+ needForceReregister = true
+ }
+ if (channel.isConnected) {
+ registerInterest()
+ }
+ }
+
+ // return previous value after resetting it.
+ def resetForceReregister(): Boolean = {
+ outbox.synchronized {
+ val result = needForceReregister
+ needForceReregister = false
+ result
}
}
@@ -322,7 +348,11 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
outbox.synchronized {
outbox.getChunk() match {
case Some(chunk) => {
- currentBuffers ++= chunk.buffers
+ val buffers = chunk.buffers
+ // If we have 'seen' pending messages, then reset flag - since we handle that as normal
+ // registering of event (below)
+ if (needForceReregister && buffers.exists(_.remaining() > 0)) resetForceReregister()
+ currentBuffers ++= buffers
}
case None => {
// changeConnectionKeyInterest(0)
@@ -384,7 +414,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
override def changeInterestForRead(): Boolean = false
- override def changeInterestForWrite(): Boolean = true
+ override def changeInterestForWrite(): Boolean = ! isClosed
}
@@ -534,6 +564,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S
def onReceive(callback: (Connection, Message) => Unit) {onReceiveCallback = callback}
+ // override def changeInterestForRead(): Boolean = ! isClosed
override def changeInterestForRead(): Boolean = true
override def changeInterestForWrite(): Boolean = {
@@ -549,4 +580,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S
override def unregisterInterest() {
changeConnectionKeyInterest(0)
}
+
+ // For read conn, always false.
+ override def resetForceReregister(): Boolean = false
}
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index 6c4e7dc03e..e15a839c4e 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
-import spark._
+import org.apache.spark._
import java.nio._
import java.nio.channels._
@@ -34,6 +34,7 @@ import scala.collection.mutable.ArrayBuffer
import akka.dispatch.{Await, Promise, ExecutionContext, Future}
import akka.util.Duration
import akka.util.duration._
+import org.apache.spark.util.Utils
private[spark] class ConnectionManager(port: Int) extends Logging {
@@ -123,7 +124,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
} finally {
writeRunnableStarted.synchronized {
writeRunnableStarted -= key
- if (register && conn.changeInterestForWrite()) {
+ val needReregister = register || conn.resetForceReregister()
+ if (needReregister && conn.changeInterestForWrite()) {
conn.registerInterest()
}
}
diff --git a/core/src/main/scala/spark/network/ConnectionManagerId.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
index 9d5c518293..50dd9bc2d1 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.net.InetSocketAddress
-import spark.Utils
+import org.apache.spark.util.Utils
private[spark] case class ConnectionManagerId(host: String, port: Int) {
diff --git a/core/src/main/scala/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
index 9e3827aaf5..8d9ad9604d 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
-import spark._
-import spark.SparkContext._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
import scala.io.Source
diff --git a/core/src/main/scala/spark/network/Message.scala b/core/src/main/scala/org/apache/spark/network/Message.scala
index a25457ea35..f2ecc6d439 100644
--- a/core/src/main/scala/spark/network/Message.scala
+++ b/core/src/main/scala/org/apache/spark/network/Message.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.nio.ByteBuffer
import java.net.InetSocketAddress
diff --git a/core/src/main/scala/spark/network/MessageChunk.scala b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala
index 784db5ab62..e0fe57b80d 100644
--- a/core/src/main/scala/spark/network/MessageChunk.scala
+++ b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.nio.ByteBuffer
diff --git a/core/src/main/scala/spark/network/MessageChunkHeader.scala b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
index 18d0cbcc14..235fbc39b3 100644
--- a/core/src/main/scala/spark/network/MessageChunkHeader.scala
+++ b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.net.InetAddress
import java.net.InetSocketAddress
diff --git a/core/src/main/scala/spark/network/ReceiverTest.scala b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
index 2bbc736f40..781715108b 100644
--- a/core/src/main/scala/spark/network/ReceiverTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.nio.ByteBuffer
import java.net.InetAddress
diff --git a/core/src/main/scala/spark/network/SenderTest.scala b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
index 542c54c36b..777574980f 100644
--- a/core/src/main/scala/spark/network/SenderTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network
+package org.apache.spark.network
import java.nio.ByteBuffer
import java.net.InetAddress
diff --git a/core/src/main/scala/spark/network/netty/FileHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
index bf46d32aa3..3c29700920 100644
--- a/core/src/main/scala/spark/network/netty/FileHeader.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.network.netty
+package org.apache.spark.network.netty
import io.netty.buffer._
-import spark.Logging
+import org.apache.spark.Logging
private[spark] class FileHeader (
val fileLen: Int,
diff --git a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
index b01f6369f6..9493ccffd9 100644
--- a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.network.netty
+package org.apache.spark.network.netty
import java.util.concurrent.Executors
@@ -23,8 +23,8 @@ import io.netty.buffer.ByteBuf
import io.netty.channel.ChannelHandlerContext
import io.netty.util.CharsetUtil
-import spark.Logging
-import spark.network.ConnectionManagerId
+import org.apache.spark.Logging
+import org.apache.spark.network.ConnectionManagerId
import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
index cdf88b03a0..537f225469 100644
--- a/core/src/main/scala/spark/network/netty/ShuffleSender.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.network.netty
+package org.apache.spark.network.netty
import java.io.File
-import spark.Logging
+import org.apache.spark.Logging
private[spark] class ShuffleSender(portIn: Int, val pResolver: PathResolver) extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
new file mode 100644
index 0000000000..c0ec527339
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -0,0 +1,35 @@
+import org.apache.spark.rdd.{SequenceFileRDDFunctions, DoubleRDDFunctions, PairRDDFunctions}
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Core Spark functionality. [[org.apache.spark.SparkContext]] serves as the main entry point to
+ * Spark, while [[org.apache.spark.rdd.RDD]] is the data type representing a distributed collection,
+ * and provides most parallel operations.
+ *
+ * In addition, [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs
+ * of key-value pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]]
+ * contains operations available only on RDDs of Doubles; and
+ * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that can
+ * be saved as SequenceFiles. These operations are automatically available on any RDD of the right
+ * type (e.g. RDD[(Int, Int)] through implicit conversions when you
+ * `import org.apache.spark.SparkContext._`.
+ */
+package object spark {
+ // For package docs only
+}
diff --git a/core/src/main/scala/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
index 691d939150..d71069444a 100644
--- a/core/src/main/scala/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
@@ -15,10 +15,11 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
-import spark._
-import spark.scheduler.JobListener
+import org.apache.spark._
+import org.apache.spark.scheduler.JobListener
+import org.apache.spark.rdd.RDD
/**
* A JobListener for an approximate single-result action, such as count() or non-parallel reduce().
diff --git a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala
index 5eae144dfb..9c2859c8b9 100644
--- a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
/**
* An object that computes a function incrementally by merging in results of type U from multiple
diff --git a/core/src/main/scala/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
index 8bdbe6c012..5f4450859c 100644
--- a/core/src/main/scala/spark/partial/BoundedDouble.scala
+++ b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
/**
* A Double with error bars on it.
diff --git a/core/src/main/scala/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
index 6aa92094eb..3155dfe165 100644
--- a/core/src/main/scala/spark/partial/CountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import cern.jet.stat.Probability
diff --git a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
index ebe2e5a1e3..e519e3a548 100644
--- a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import java.util.{HashMap => JHashMap}
import java.util.{Map => JMap}
diff --git a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala
index 2dadbbd5fb..cf8a5680b6 100644
--- a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import java.util.{HashMap => JHashMap}
import java.util.{Map => JMap}
@@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap
import scala.collection.Map
import scala.collection.JavaConversions.mapAsScalaMap
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
/**
* An ApproximateEvaluator for means by key. Returns a map of key to confidence interval.
diff --git a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala
index ae2b63f7cb..8225a5d933 100644
--- a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import java.util.{HashMap => JHashMap}
import java.util.{Map => JMap}
@@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap
import scala.collection.Map
import scala.collection.JavaConversions.mapAsScalaMap
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
/**
* An ApproximateEvaluator for sums by key. Returns a map of key to confidence interval.
diff --git a/core/src/main/scala/spark/partial/MeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
index 5ddcad7075..d24959cba8 100644
--- a/core/src/main/scala/spark/partial/MeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import cern.jet.stat.Probability
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
/**
* An ApproximateEvaluator for means.
diff --git a/core/src/main/scala/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
index 922a9f9bc6..5ce49b8100 100644
--- a/core/src/main/scala/spark/partial/PartialResult.scala
+++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
class PartialResult[R](initialVal: R, isFinal: Boolean) {
private var finalValue: Option[R] = if (isFinal) Some(initialVal) else None
diff --git a/core/src/main/scala/spark/partial/StudentTCacher.scala b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
index f3bb987d46..92915ee66d 100644
--- a/core/src/main/scala/spark/partial/StudentTCacher.scala
+++ b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import cern.jet.stat.Probability
diff --git a/core/src/main/scala/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
index 4083abef03..a74f800944 100644
--- a/core/src/main/scala/spark/partial/SumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.partial
+package org.apache.spark.partial
import cern.jet.stat.Probability
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
/**
* An ApproximateEvaluator for sums. It estimates the mean and the cont and multiplies them
diff --git a/core/src/main/scala/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 0ebb722d73..bca6956a18 100644
--- a/core/src/main/scala/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext}
-import spark.storage.BlockManager
+import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
+import org.apache.spark.storage.BlockManager
private[spark] class BlockRDDPartition(val blockId: String, idx: Int) extends Partition {
val index = idx
@@ -28,13 +28,12 @@ private[spark]
class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[String])
extends RDD[T](sc, Nil) {
- @transient lazy val locations_ = BlockManager.blockIdsToExecutorLocations(blockIds, SparkEnv.get)
+ @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get)
override def getPartitions: Array[Partition] = (0 until blockIds.size).map(i => {
new BlockRDDPartition(blockIds(i), i).asInstanceOf[Partition]
}).toArray
-
override def compute(split: Partition, context: TaskContext): Iterator[T] = {
val blockManager = SparkEnv.get.blockManager
val blockId = split.asInstanceOf[BlockRDDPartition].blockId
@@ -45,8 +44,8 @@ class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[St
}
}
- override def getPreferredLocations(split: Partition): Seq[String] =
+ override def getPreferredLocations(split: Partition): Seq[String] = {
locations_(split.asInstanceOf[BlockRDDPartition].blockId)
-
+ }
}
diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 150e5bca29..9b0c882481 100644
--- a/core/src/main/scala/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.io.{ObjectOutputStream, IOException}
-import spark._
+import org.apache.spark._
private[spark]
@@ -64,7 +64,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
override def getPreferredLocations(split: Partition): Seq[String] = {
val currSplit = split.asInstanceOf[CartesianPartition]
- rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)
+ (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
}
override def compute(split: Partition, context: TaskContext) = {
diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 6794e0e201..3311757189 100644
--- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark._
+import org.apache.spark._
import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.{NullWritable, BytesWritable}
@@ -25,7 +25,6 @@ import org.apache.hadoop.util.ReflectionUtils
import org.apache.hadoop.fs.Path
import java.io.{File, IOException, EOFException}
import java.text.NumberFormat
-import spark.deploy.SparkHadoopUtil
private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {}
@@ -82,8 +81,9 @@ private[spark] object CheckpointRDD extends Logging {
}
def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
+ val env = SparkEnv.get
val outputDir = new Path(path)
- val fs = outputDir.getFileSystem(SparkHadoopUtil.newConfiguration())
+ val fs = outputDir.getFileSystem(env.hadoop.newConfiguration())
val finalOutputName = splitIdToFile(ctx.splitId)
val finalOutputPath = new Path(outputDir, finalOutputName)
@@ -101,7 +101,7 @@ private[spark] object CheckpointRDD extends Logging {
// This is mainly for testing purpose
fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication, blockSize)
}
- val serializer = SparkEnv.get.serializer.newInstance()
+ val serializer = env.serializer.newInstance()
val serializeStream = serializer.serializeStream(fileOutputStream)
serializeStream.writeAll(iterator)
serializeStream.close()
@@ -121,10 +121,11 @@ private[spark] object CheckpointRDD extends Logging {
}
def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = {
- val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+ val env = SparkEnv.get
+ val fs = path.getFileSystem(env.hadoop.newConfiguration())
val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
val fileInputStream = fs.open(path, bufferSize)
- val serializer = SparkEnv.get.serializer.newInstance()
+ val serializer = env.serializer.newInstance()
val deserializeStream = serializer.deserializeStream(fileInputStream)
// Register an on-task-completion callback to close the input stream.
@@ -137,13 +138,14 @@ private[spark] object CheckpointRDD extends Logging {
// each split file having multiple blocks. This needs to be run on a
// cluster (mesos or standalone) using HDFS.
def main(args: Array[String]) {
- import spark._
+ import org.apache.spark._
val Array(cluster, hdfsPath) = args
+ val env = SparkEnv.get
val sc = new SparkContext(cluster, "CheckpointRDD Test")
val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000)
val path = new Path(hdfsPath, "temp")
- val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+ val fs = path.getFileSystem(env.hadoop.newConfiguration())
sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _)
val cpRDD = new CheckpointRDD[Int](sc, path.toString)
assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same")
diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index c540cd36eb..0187256a8e 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.io.{ObjectOutputStream, IOException}
import java.util.{HashMap => JHashMap}
@@ -23,8 +23,8 @@ import java.util.{HashMap => JHashMap}
import scala.collection.JavaConversions
import scala.collection.mutable.ArrayBuffer
-import spark.{Aggregator, Partition, Partitioner, RDD, SparkEnv, TaskContext}
-import spark.{Dependency, OneToOneDependency, ShuffleDependency}
+import org.apache.spark.{Partition, Partitioner, SparkEnv, TaskContext}
+import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
private[spark] sealed trait CoGroupSplitDep extends Serializable
@@ -52,13 +52,6 @@ class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
override def hashCode(): Int = idx
}
-private[spark] class CoGroupAggregator
- extends Aggregator[Any, Any, ArrayBuffer[Any]](
- { x => ArrayBuffer(x) },
- { (b, x) => b += x },
- { (b1, b2) => b1 ++ b2 })
- with Serializable
-
/**
* A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
@@ -66,34 +59,25 @@ private[spark] class CoGroupAggregator
*
* @param rdds parent RDDs.
* @param part partitioner used to partition the shuffle output.
- * @param mapSideCombine flag indicating whether to merge values before shuffle step. If the flag
- * is on, Spark does an extra pass over the data on the map side to merge
- * all values belonging to the same key together. This can reduce the amount
- * of data shuffled if and only if the number of distinct keys is very small,
- * and the ratio of key size to value size is also very small.
*/
-class CoGroupedRDD[K](
- @transient var rdds: Seq[RDD[(K, _)]],
- part: Partitioner,
- val mapSideCombine: Boolean = false,
- val serializerClass: String = null)
+class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
- private val aggr = new CoGroupAggregator
+ private var serializerClass: String = null
+
+ def setSerializer(cls: String): CoGroupedRDD[K] = {
+ serializerClass = cls
+ this
+ }
override def getDependencies: Seq[Dependency[_]] = {
- rdds.map { rdd =>
+ rdds.map { rdd: RDD[_ <: Product2[K, _]] =>
if (rdd.partitioner == Some(part)) {
- logInfo("Adding one-to-one dependency with " + rdd)
+ logDebug("Adding one-to-one dependency with " + rdd)
new OneToOneDependency(rdd)
} else {
- logInfo("Adding shuffle dependency with " + rdd)
- if (mapSideCombine) {
- val mapSideCombinedRDD = rdd.mapPartitions(aggr.combineValuesByKey(_), true)
- new ShuffleDependency[Any, ArrayBuffer[Any]](mapSideCombinedRDD, part, serializerClass)
- } else {
- new ShuffleDependency[Any, Any](rdd.asInstanceOf[RDD[(Any, Any)]], part, serializerClass)
- }
+ logDebug("Adding shuffle dependency with " + rdd)
+ new ShuffleDependency[Any, Any](rdd, part, serializerClass)
}
}
}
@@ -138,23 +122,15 @@ class CoGroupedRDD[K](
for ((dep, depNum) <- split.deps.zipWithIndex) dep match {
case NarrowCoGroupSplitDep(rdd, _, itsSplit) => {
// Read them from the parent
- for ((k, v) <- rdd.iterator(itsSplit, context)) {
- getSeq(k.asInstanceOf[K])(depNum) += v
+ rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]].foreach { kv =>
+ getSeq(kv._1)(depNum) += kv._2
}
}
case ShuffleCoGroupSplitDep(shuffleId) => {
// Read map outputs of shuffle
val fetcher = SparkEnv.get.shuffleFetcher
- if (mapSideCombine) {
- // With map side combine on, for each key, the shuffle fetcher returns a list of values.
- fetcher.fetch[K, Seq[Any]](shuffleId, split.index, context.taskMetrics, ser).foreach {
- case (key, values) => getSeq(key)(depNum) ++= values
- }
- } else {
- // With map side combine off, for each key the shuffle fetcher returns a single value.
- fetcher.fetch[K, Any](shuffleId, split.index, context.taskMetrics, ser).foreach {
- case (key, value) => getSeq(key)(depNum) += value
- }
+ fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context.taskMetrics, ser).foreach {
+ kv => getSeq(kv._1)(depNum) += kv._2
}
}
}
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
new file mode 100644
index 0000000000..c5de6362a9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.spark._
+import java.io.{ObjectOutputStream, IOException}
+import scala.collection.mutable
+import scala.Some
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * Class that captures a coalesced RDD by essentially keeping track of parent partitions
+ * @param index of this coalesced partition
+ * @param rdd which it belongs to
+ * @param parentsIndices list of indices in the parent that have been coalesced into this partition
+ * @param preferredLocation the preferred location for this partition
+ */
+case class CoalescedRDDPartition(
+ index: Int,
+ @transient rdd: RDD[_],
+ parentsIndices: Array[Int],
+ @transient preferredLocation: String = ""
+ ) extends Partition {
+ var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
+
+ @throws(classOf[IOException])
+ private def writeObject(oos: ObjectOutputStream) {
+ // Update the reference to parent partition at the time of task serialization
+ parents = parentsIndices.map(rdd.partitions(_))
+ oos.defaultWriteObject()
+ }
+
+ /**
+ * Computes how many of the parents partitions have getPreferredLocation
+ * as one of their preferredLocations
+ * @return locality of this coalesced partition between 0 and 1
+ */
+ def localFraction: Double = {
+ val loc = parents.count(p =>
+ rdd.context.getPreferredLocs(rdd, p.index).map(tl => tl.host).contains(preferredLocation))
+
+ if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
+ }
+}
+
+/**
+ * Represents a coalesced RDD that has fewer partitions than its parent RDD
+ * This class uses the PartitionCoalescer class to find a good partitioning of the parent RDD
+ * so that each new partition has roughly the same number of parent partitions and that
+ * the preferred location of each new partition overlaps with as many preferred locations of its
+ * parent partitions
+ * @param prev RDD to be coalesced
+ * @param maxPartitions number of desired partitions in the coalesced RDD
+ * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance
+ */
+class CoalescedRDD[T: ClassManifest](
+ @transient var prev: RDD[T],
+ maxPartitions: Int,
+ balanceSlack: Double = 0.10)
+ extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies
+
+ override def getPartitions: Array[Partition] = {
+ val pc = new PartitionCoalescer(maxPartitions, prev, balanceSlack)
+
+ pc.run().zipWithIndex.map {
+ case (pg, i) =>
+ val ids = pg.arr.map(_.index).toArray
+ new CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
+ }
+ }
+
+ override def compute(partition: Partition, context: TaskContext): Iterator[T] = {
+ partition.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentPartition =>
+ firstParent[T].iterator(parentPartition, context)
+ }
+ }
+
+ override def getDependencies: Seq[Dependency[_]] = {
+ Seq(new NarrowDependency(prev) {
+ def getParents(id: Int): Seq[Int] =
+ partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices
+ })
+ }
+
+ override def clearDependencies() {
+ super.clearDependencies()
+ prev = null
+ }
+
+ /**
+ * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition,
+ * then the preferred machine will be one which most parent splits prefer too.
+ * @param partition
+ * @return the machine most preferred by split
+ */
+ override def getPreferredLocations(partition: Partition): Seq[String] = {
+ List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation)
+ }
+}
+
+/**
+ * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of
+ * this RDD computes one or more of the parent ones. It will produce exactly `maxPartitions` if the
+ * parent had more than maxPartitions, or fewer if the parent had fewer.
+ *
+ * This transformation is useful when an RDD with many partitions gets filtered into a smaller one,
+ * or to avoid having a large number of small tasks when processing a directory with many files.
+ *
+ * If there is no locality information (no preferredLocations) in the parent, then the coalescing
+ * is very simple: chunk parents that are close in the Array in chunks.
+ * If there is locality information, it proceeds to pack them with the following four goals:
+ *
+ * (1) Balance the groups so they roughly have the same number of parent partitions
+ * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer
+ * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard)
+ * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine
+ *
+ * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000.
+ * We assume the final number of desired partitions is small, e.g. less than 1000.
+ *
+ * The algorithm tries to assign unique preferred machines to each partition. If the number of
+ * desired partitions is greater than the number of preferred machines (can happen), it needs to
+ * start picking duplicate preferred machines. This is determined using coupon collector estimation
+ * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist:
+ * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two
+ * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions
+ * according to locality. (contact alig for questions)
+ *
+ */
+
+private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) {
+
+ def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size
+ def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
+ if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get)
+
+ val rnd = new scala.util.Random(7919) // keep this class deterministic
+
+ // each element of groupArr represents one coalesced partition
+ val groupArr = ArrayBuffer[PartitionGroup]()
+
+ // hash used to check whether some machine is already in groupArr
+ val groupHash = mutable.Map[String, ArrayBuffer[PartitionGroup]]()
+
+ // hash used for the first maxPartitions (to avoid duplicates)
+ val initialHash = mutable.Set[Partition]()
+
+ // determines the tradeoff between load-balancing the partitions sizes and their locality
+ // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality
+ val slack = (balanceSlack * prev.partitions.size).toInt
+
+ var noLocality = true // if true if no preferredLocations exists for parent RDD
+
+ // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
+ def currPrefLocs(part: Partition): Seq[String] = {
+ prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host)
+ }
+
+ // this class just keeps iterating and rotating infinitely over the partitions of the RDD
+ // next() returns the next preferred machine that a partition is replicated on
+ // the rotator first goes through the first replica copy of each partition, then second, third
+ // the iterators return type is a tuple: (replicaString, partition)
+ class LocationIterator(prev: RDD[_]) extends Iterator[(String, Partition)] {
+
+ var it: Iterator[(String, Partition)] = resetIterator()
+
+ override val isEmpty = !it.hasNext
+
+ // initializes/resets to start iterating from the beginning
+ def resetIterator() = {
+ val iterators = (0 to 2).map( x =>
+ prev.partitions.iterator.flatMap(p => {
+ if (currPrefLocs(p).size > x) Some((currPrefLocs(p)(x), p)) else None
+ } )
+ )
+ iterators.reduceLeft((x, y) => x ++ y)
+ }
+
+ // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD
+ def hasNext(): Boolean = { !isEmpty }
+
+ // return the next preferredLocation of some partition of the RDD
+ def next(): (String, Partition) = {
+ if (it.hasNext)
+ it.next()
+ else {
+ it = resetIterator() // ran out of preferred locations, reset and rotate to the beginning
+ it.next()
+ }
+ }
+ }
+
+ /**
+ * Sorts and gets the least element of the list associated with key in groupHash
+ * The returned PartitionGroup is the least loaded of all groups that represent the machine "key"
+ * @param key string representing a partitioned group on preferred machine key
+ * @return Option of PartitionGroup that has least elements for key
+ */
+ def getLeastGroupHash(key: String): Option[PartitionGroup] = {
+ groupHash.get(key).map(_.sortWith(compare).head)
+ }
+
+ def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = {
+ if (!initialHash.contains(part)) {
+ pgroup.arr += part // already assign this element
+ initialHash += part // needed to avoid assigning partitions to multiple buckets
+ true
+ } else { false }
+ }
+
+ /**
+ * Initializes targetLen partition groups and assigns a preferredLocation
+ * This uses coupon collector to estimate how many preferredLocations it must rotate through
+ * until it has seen most of the preferred locations (2 * n log(n))
+ * @param targetLen
+ */
+ def setupGroups(targetLen: Int) {
+ val rotIt = new LocationIterator(prev)
+
+ // deal with empty case, just create targetLen partition groups with no preferred location
+ if (!rotIt.hasNext()) {
+ (1 to targetLen).foreach(x => groupArr += PartitionGroup())
+ return
+ }
+
+ noLocality = false
+
+ // number of iterations needed to be certain that we've seen most preferred locations
+ val expectedCoupons2 = 2 * (math.log(targetLen)*targetLen + targetLen + 0.5).toInt
+ var numCreated = 0
+ var tries = 0
+
+ // rotate through until either targetLen unique/distinct preferred locations have been created
+ // OR we've rotated expectedCoupons2, in which case we have likely seen all preferred locations,
+ // i.e. likely targetLen >> number of preferred locations (more buckets than there are machines)
+ while (numCreated < targetLen && tries < expectedCoupons2) {
+ tries += 1
+ val (nxt_replica, nxt_part) = rotIt.next()
+ if (!groupHash.contains(nxt_replica)) {
+ val pgroup = PartitionGroup(nxt_replica)
+ groupArr += pgroup
+ addPartToPGroup(nxt_part, pgroup)
+ groupHash += (nxt_replica -> (ArrayBuffer(pgroup))) // list in case we have multiple
+ numCreated += 1
+ }
+ }
+
+ while (numCreated < targetLen) { // if we don't have enough partition groups, create duplicates
+ var (nxt_replica, nxt_part) = rotIt.next()
+ val pgroup = PartitionGroup(nxt_replica)
+ groupArr += pgroup
+ groupHash.get(nxt_replica).get += pgroup
+ var tries = 0
+ while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure at least one part
+ nxt_part = rotIt.next()._2
+ tries += 1
+ }
+ numCreated += 1
+ }
+
+ }
+
+ /**
+ * Takes a parent RDD partition and decides which of the partition groups to put it in
+ * Takes locality into account, but also uses power of 2 choices to load balance
+ * It strikes a balance between the two use the balanceSlack variable
+ * @param p partition (ball to be thrown)
+ * @return partition group (bin to be put in)
+ */
+ def pickBin(p: Partition): PartitionGroup = {
+ val pref = currPrefLocs(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded pref locs
+ val prefPart = if (pref == Nil) None else pref.head
+
+ val r1 = rnd.nextInt(groupArr.size)
+ val r2 = rnd.nextInt(groupArr.size)
+ val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2)
+ if (prefPart== None) // if no preferred locations, just use basic power of two
+ return minPowerOfTwo
+
+ val prefPartActual = prefPart.get
+
+ if (minPowerOfTwo.size + slack <= prefPartActual.size) // more imbalance than the slack allows
+ return minPowerOfTwo // prefer balance over locality
+ else {
+ return prefPartActual // prefer locality over balance
+ }
+ }
+
+ def throwBalls() {
+ if (noLocality) { // no preferredLocations in parent RDD, no randomization needed
+ if (maxPartitions > groupArr.size) { // just return prev.partitions
+ for ((p,i) <- prev.partitions.zipWithIndex) {
+ groupArr(i).arr += p
+ }
+ } else { // no locality available, then simply split partitions based on positions in array
+ for(i <- 0 until maxPartitions) {
+ val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt
+ val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt
+ (rangeStart until rangeEnd).foreach{ j => groupArr(i).arr += prev.partitions(j) }
+ }
+ }
+ } else {
+ for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into group
+ pickBin(p).arr += p
+ }
+ }
+ }
+
+ def getPartitions: Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray
+
+ /**
+ * Runs the packing algorithm and returns an array of PartitionGroups that if possible are
+ * load balanced and grouped by locality
+ * @return array of partition groups
+ */
+ def run(): Array[PartitionGroup] = {
+ setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins)
+ throwBalls() // assign partitions (balls) to each group (bins)
+ getPartitions
+ }
+}
+
+private[spark] case class PartitionGroup(prefLoc: String = "") {
+ var arr = mutable.ArrayBuffer[Partition]()
+
+ def size = arr.size
+}
diff --git a/core/src/main/scala/spark/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index 93ef097702..a4bec41752 100644
--- a/core/src/main/scala/spark/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -15,17 +15,18 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
-import spark.partial.BoundedDouble
-import spark.partial.MeanEvaluator
-import spark.partial.PartialResult
-import spark.partial.SumEvaluator
-import spark.util.StatCounter
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.MeanEvaluator
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.partial.SumEvaluator
+import org.apache.spark.util.StatCounter
+import org.apache.spark.{TaskContext, Logging}
/**
* Extra functions available on RDDs of Doubles through an implicit conversion.
- * Import `spark.SparkContext._` at the top of your program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
*/
class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
/** Add up the elements in this RDD. */
@@ -34,7 +35,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
}
/**
- * Return a [[spark.util.StatCounter]] object that captures the mean, variance and count
+ * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count
* of the RDD's elements in one operation.
*/
def stats(): StatCounter = {
@@ -54,7 +55,13 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
* Compute the sample standard deviation of this RDD's elements (which corrects for bias in
* estimating the standard deviation by dividing by N-1 instead of N).
*/
- def sampleStdev(): Double = stats().stdev
+ def sampleStdev(): Double = stats().sampleStdev
+
+ /**
+ * Compute the sample variance of this RDD's elements (which corrects for bias in
+ * estimating the variance by dividing by N-1 instead of N).
+ */
+ def sampleVariance(): Double = stats().sampleVariance
/** (Experimental) Approximate operation to return the mean within a timeout. */
def meanApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = {
diff --git a/core/src/main/scala/spark/rdd/EmptyRDD.scala b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
index d7d4db5d30..c8900d1a93 100644
--- a/core/src/main/scala/spark/rdd/EmptyRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext}
+import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
/**
diff --git a/core/src/main/scala/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
index 783508cfd1..5312dc0b59 100644
--- a/core/src/main/scala/spark/rdd/FilteredRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{OneToOneDependency, RDD, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, Partition, TaskContext}
private[spark] class FilteredRDD[T: ClassManifest](
prev: RDD[T],
diff --git a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
index ed75eac3ff..cbdf6d84c0 100644
--- a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
private[spark]
diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
index 0f972b7a0b..82000bac09 100644
--- a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
@@ -1,4 +1,3 @@
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,15 +15,22 @@
* limitations under the License.
*/
-package org.apache.hadoop.mapred
+package org.apache.spark.rdd
+
+import org.apache.spark.{TaskContext, Partition}
+
-import org.apache.hadoop.mapreduce.TaskType
+private[spark]
+class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U])
+ extends RDD[(K, U)](prev) {
-trait HadoopMapRedUtil {
- def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId)
+ override def getPartitions = firstParent[Product2[K, V]].partitions
- def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+ override val partitioner = firstParent[Product2[K, V]].partitioner
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) =
- new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId)
+ override def compute(split: Partition, context: TaskContext) = {
+ firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) =>
+ f(v).map(x => (k, x))
+ }
+ }
}
diff --git a/core/src/main/scala/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
index 1573f8a289..829545d7b0 100644
--- a/core/src/main/scala/spark/rdd/GlommedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
private[spark] class GlommedRDD[T: ClassManifest](prev: RDD[T])
extends RDD[Array[T]](prev) {
diff --git a/core/src/main/scala/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index d0fdeb741e..2cb6734e41 100644
--- a/core/src/main/scala/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -15,27 +15,20 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.io.EOFException
-import java.util.NoSuchElementException
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapred.FileInputFormat
import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.mapred.InputSplit
import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.TextInputFormat
import org.apache.hadoop.mapred.RecordReader
import org.apache.hadoop.mapred.Reporter
import org.apache.hadoop.util.ReflectionUtils
-import spark.deploy.SparkHadoopUtil
-import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext}
-import spark.util.NextIterator
-import org.apache.hadoop.conf.Configurable
+import org.apache.spark.{Logging, Partition, SerializableWritable, SparkContext, SparkEnv, TaskContext}
+import org.apache.spark.util.NextIterator
+import org.apache.hadoop.conf.{Configuration, Configurable}
/**
@@ -68,7 +61,8 @@ class HadoopRDD[K, V](
private val confBroadcast = sc.broadcast(new SerializableWritable(conf))
override def getPartitions: Array[Partition] = {
- SparkHadoopUtil.addCredentials(conf);
+ val env = SparkEnv.get
+ env.hadoop.addCredentials(conf)
val inputFormat = createInputFormat(conf)
if (inputFormat.isInstanceOf[Configurable]) {
inputFormat.asInstanceOf[Configurable].setConf(conf)
@@ -88,6 +82,7 @@ class HadoopRDD[K, V](
override def compute(theSplit: Partition, context: TaskContext) = new NextIterator[(K, V)] {
val split = theSplit.asInstanceOf[HadoopPartition]
+ logInfo("Input split: " + split.inputSplit)
var reader: RecordReader[K, V] = null
val conf = confBroadcast.value.value
@@ -131,4 +126,6 @@ class HadoopRDD[K, V](
override def checkpoint() {
// Do nothing. Hadoop RDD should not be checkpointed.
}
+
+ def getConf: Configuration = confBroadcast.value.value
}
diff --git a/core/src/main/scala/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 59132437d2..aca0146884 100644
--- a/core/src/main/scala/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.sql.{Connection, ResultSet}
-import spark.{Logging, Partition, RDD, SparkContext, TaskContext}
-import spark.util.NextIterator
+import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.NextIterator
private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition {
override def index = idx
diff --git a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index af8f0a112f..203179c4ea 100644
--- a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
private[spark]
diff --git a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala
index 3b4e9518fd..3ed8339010 100644
--- a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
/**
diff --git a/core/src/main/scala/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
index 8b411dd85d..e8be1c4816 100644
--- a/core/src/main/scala/spark/rdd/MappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
private[spark]
class MappedRDD[U: ClassManifest, T: ClassManifest](prev: RDD[T], f: T => U)
diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
index b1002e0cac..d33c1af581 100644
--- a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
@@ -15,15 +15,20 @@
* limitations under the License.
*/
-package org.apache.hadoop.mapreduce
+package org.apache.spark.rdd
-import org.apache.hadoop.conf.Configuration
-trait HadoopMapReduceUtil {
- def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContext(conf, jobId)
+import org.apache.spark.{TaskContext, Partition}
- def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId)
+private[spark]
+class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U)
+ extends RDD[(K, U)](prev) {
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier,
- jobId, isMap, taskId, attemptId)
+ override def getPartitions = firstParent[Product2[K, U]].partitions
+
+ override val partitioner = firstParent[Product2[K, U]].partitioner
+
+ override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = {
+ firstParent[Product2[K, V]].iterator(split, context).map { case Product2(k ,v) => (k, f(v)) }
+ }
}
diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 17fe805fd4..7b3a89f7e0 100644
--- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.text.SimpleDateFormat
import java.util.Date
@@ -24,7 +24,7 @@ import org.apache.hadoop.conf.{Configurable, Configuration}
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapreduce._
-import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext}
+import org.apache.spark.{Dependency, Logging, Partition, SerializableWritable, SparkContext, TaskContext}
private[spark]
@@ -43,7 +43,7 @@ class NewHadoopRDD[K, V](
valueClass: Class[V],
@transient conf: Configuration)
extends RDD[(K, V)](sc, Nil)
- with HadoopMapReduceUtil
+ with SparkHadoopMapReduceUtil
with Logging {
// A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
@@ -73,6 +73,7 @@ class NewHadoopRDD[K, V](
override def compute(theSplit: Partition, context: TaskContext) = new Iterator[(K, V)] {
val split = theSplit.asInstanceOf[NewHadoopPartition]
+ logInfo("Input split: " + split.serializableHadoopSplit)
val conf = confBroadcast.value.value
val attemptId = newTaskAttemptID(jobtrackerId, id, true, split.index, 0)
val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
@@ -119,4 +120,7 @@ class NewHadoopRDD[K, V](
val theSplit = split.asInstanceOf[NewHadoopPartition]
theSplit.serializableHadoopSplit.value.getLocations.filter(_ != "localhost")
}
+
+ def getConf: Configuration = confBroadcast.value.value
}
+
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
new file mode 100644
index 0000000000..697be8b997
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.spark.{RangePartitioner, Logging}
+
+/**
+ * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
+ * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to
+ * use these functions. They will work with any key type that has a `scala.math.Ordered`
+ * implementation.
+ */
+class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest,
+ V: ClassManifest,
+ P <: Product2[K, V] : ClassManifest](
+ self: RDD[P])
+ extends Logging with Serializable {
+
+ /**
+ * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
+ * `collect` or `save` on the resulting RDD will return or output an ordered list of records
+ * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
+ * order of the keys).
+ */
+ def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[P] = {
+ val part = new RangePartitioner(numPartitions, self, ascending)
+ val shuffled = new ShuffledRDD[K, V, P](self, part)
+ shuffled.mapPartitions(iter => {
+ val buf = iter.toArray
+ if (ascending) {
+ buf.sortWith((x, y) => x._1 < y._1).iterator
+ } else {
+ buf.sortWith((x, y) => x._1 > y._1).iterator
+ }
+ }, preservesPartitioning = true)
+ }
+}
diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 6b0cc2fbf1..a47c512275 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -15,45 +15,44 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import java.nio.ByteBuffer
-import java.util.{Date, HashMap => JHashMap}
+import java.util.Date
import java.text.SimpleDateFormat
+import java.util.{HashMap => JHashMap}
-import scala.collection.Map
+import scala.collection.{mutable, Map}
import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
import scala.collection.JavaConversions._
+import org.apache.hadoop.mapred._
+import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
-import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.io.SequenceFile.CompressionType
-import org.apache.hadoop.mapred.FileOutputCommitter
import org.apache.hadoop.mapred.FileOutputFormat
-import org.apache.hadoop.mapred.HadoopWriter
-import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.OutputFormat
-
+import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
-import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, HadoopMapReduceUtil}
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob}
+import org.apache.hadoop.mapreduce.{RecordWriter => NewRecordWriter}
-import spark.partial.BoundedDouble
-import spark.partial.PartialResult
-import spark.rdd._
-import spark.SparkContext._
-import spark.Partitioner._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.partial.{BoundedDouble, PartialResult}
+import org.apache.spark.Aggregator
+import org.apache.spark.Partitioner
+import org.apache.spark.Partitioner.defaultPartitioner
/**
* Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
- * Import `spark.SparkContext._` at the top of your program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
*/
-class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
- self: RDD[(K, V)])
+class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
extends Logging
- with HadoopMapReduceUtil
+ with SparkHadoopMapReduceUtil
with Serializable {
/**
@@ -85,17 +84,18 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
}
val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners)
if (self.partitioner == Some(partitioner)) {
- self.mapPartitions(aggregator.combineValuesByKey(_), true)
+ self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
} else if (mapSideCombine) {
- val mapSideCombined = self.mapPartitions(aggregator.combineValuesByKey(_), true)
- val partitioned = new ShuffledRDD[K, C](mapSideCombined, partitioner, serializerClass)
- partitioned.mapPartitions(aggregator.combineCombinersByKey(_), true)
+ val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
+ val partitioned = new ShuffledRDD[K, C, (K, C)](combined, partitioner)
+ .setSerializer(serializerClass)
+ partitioned.mapPartitions(aggregator.combineCombinersByKey, preservesPartitioning = true)
} else {
// Don't apply map-side combiner.
// A sanity check to make sure mergeCombiners is not defined.
assert(mergeCombiners == null)
- val values = new ShuffledRDD[K, V](self, partitioner, serializerClass)
- values.mapPartitions(aggregator.combineValuesByKey(_), true)
+ val values = new ShuffledRDD[K, V, (K, V)](self, partitioner).setSerializer(serializerClass)
+ values.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
}
}
@@ -167,7 +167,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = {
val map = new JHashMap[K, V]
- for ((k, v) <- iter) {
+ iter.foreach { case (k, v) =>
val old = map.get(k)
map.put(k, if (old == null) v else func(old, v))
}
@@ -175,11 +175,11 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
}
def mergeMaps(m1: JHashMap[K, V], m2: JHashMap[K, V]): JHashMap[K, V] = {
- for ((k, v) <- m2) {
+ m2.foreach { case (k, v) =>
val old = m1.get(k)
m1.put(k, if (old == null) v else func(old, v))
}
- return m1
+ m1
}
self.mapPartitions(reducePartition).reduce(mergeMaps)
@@ -233,31 +233,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
}
/**
- * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine`
- * is true, Spark will group values of the same key together on the map side before the
- * repartitioning, to only send each key over the network once. If a large number of
- * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should
- * be set to true.
+ * Return a copy of the RDD partitioned using the specified partitioner.
*/
- def partitionBy(partitioner: Partitioner, mapSideCombine: Boolean = false): RDD[(K, V)] = {
- if (getKeyClass().isArray) {
- if (mapSideCombine) {
- throw new SparkException("Cannot use map-side combining with array keys.")
- }
- if (partitioner.isInstanceOf[HashPartitioner]) {
- throw new SparkException("Default partitioner cannot partition array keys.")
- }
- }
- if (mapSideCombine) {
- def createCombiner(v: V) = ArrayBuffer(v)
- def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v
- def mergeCombiners(b1: ArrayBuffer[V], b2: ArrayBuffer[V]) = b1 ++= b2
- val bufs = combineByKey[ArrayBuffer[V]](
- createCombiner _, mergeValue _, mergeCombiners _, partitioner)
- bufs.flatMapValues(buf => buf)
- } else {
- new ShuffledRDD[K, V](self, partitioner)
+ def partitionBy(partitioner: Partitioner): RDD[(K, V)] = {
+ if (getKeyClass().isArray && partitioner.isInstanceOf[HashPartitioner]) {
+ throw new SparkException("Default partitioner cannot partition array keys.")
}
+ new ShuffledRDD[K, V, (K, V)](self, partitioner)
}
/**
@@ -266,9 +248,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
* (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD.
*/
def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = {
- this.cogroup(other, partitioner).flatMapValues {
- case (vs, ws) =>
- for (v <- vs.iterator; w <- ws.iterator) yield (v, w)
+ this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+ for (v <- vs.iterator; w <- ws.iterator) yield (v, w)
}
}
@@ -279,13 +260,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
* partition the output RDD.
*/
def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = {
- this.cogroup(other, partitioner).flatMapValues {
- case (vs, ws) =>
- if (ws.isEmpty) {
- vs.iterator.map(v => (v, None))
- } else {
- for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w))
- }
+ this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+ if (ws.isEmpty) {
+ vs.iterator.map(v => (v, None))
+ } else {
+ for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w))
+ }
}
}
@@ -297,13 +277,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
*/
def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner)
: RDD[(K, (Option[V], W))] = {
- this.cogroup(other, partitioner).flatMapValues {
- case (vs, ws) =>
- if (vs.isEmpty) {
- ws.iterator.map(w => (None, w))
- } else {
- for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w)
- }
+ this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+ if (vs.isEmpty) {
+ ws.iterator.map(w => (None, w))
+ } else {
+ for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w)
+ }
}
}
@@ -395,7 +374,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
/**
* Return the key-value pairs in this RDD to the master as a Map.
*/
- def collectAsMap(): Map[K, V] = HashMap(self.collect(): _*)
+ def collectAsMap(): Map[K, V] = {
+ val data = self.toArray()
+ val map = new mutable.HashMap[K, V]
+ map.sizeHint(data.length)
+ data.foreach { case (k, v) => map.put(k, v) }
+ map
+ }
/**
* Pass each value in the key-value pair RDD through a map function without changing the keys;
@@ -423,13 +408,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
throw new SparkException("Default partitioner cannot partition array keys.")
}
- val cg = new CoGroupedRDD[K](
- Seq(self.asInstanceOf[RDD[(K, _)]], other.asInstanceOf[RDD[(K, _)]]),
- partitioner)
+ val cg = new CoGroupedRDD[K](Seq(self, other), partitioner)
val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
- prfs.mapValues {
- case Seq(vs, ws) =>
- (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
+ prfs.mapValues { case Seq(vs, ws) =>
+ (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
}
}
@@ -442,15 +424,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
throw new SparkException("Default partitioner cannot partition array keys.")
}
- val cg = new CoGroupedRDD[K](
- Seq(self.asInstanceOf[RDD[(K, _)]],
- other1.asInstanceOf[RDD[(K, _)]],
- other2.asInstanceOf[RDD[(K, _)]]),
- partitioner)
+ val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner)
val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
- prfs.mapValues {
- case Seq(vs, w1s, w2s) =>
- (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
+ prfs.mapValues { case Seq(vs, w1s, w2s) =>
+ (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
}
}
@@ -582,7 +559,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
val formatter = new SimpleDateFormat("yyyyMMddHHmm")
val jobtrackerID = formatter.format(new Date())
val stageId = self.id
- def writeShard(context: spark.TaskContext, iter: Iterator[(K,V)]): Int = {
+ def writeShard(context: TaskContext, iter: Iterator[(K,V)]): Int = {
// Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
// around by taking a mod. We expect that no task will be attempted 2 billion times.
val attemptNumber = (context.attemptId % Int.MaxValue).toInt
@@ -594,7 +571,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
committer.setupTask(hadoopContext)
val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]]
while (iter.hasNext) {
- val (k, v) = iter.next
+ val (k, v) = iter.next()
writer.write(k, v)
}
writer.close(hadoopContext)
@@ -652,7 +629,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
conf.set("mapred.output.compression.type", CompressionType.BLOCK.toString)
}
conf.setOutputCommitter(classOf[FileOutputCommitter])
- FileOutputFormat.setOutputPath(conf, HadoopWriter.createPathFromString(path, conf))
+ FileOutputFormat.setOutputPath(conf, SparkHadoopWriter.createPathFromString(path, conf))
saveAsHadoopDataset(conf)
}
@@ -678,10 +655,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
logInfo("Saving as hadoop file of type (" + keyClass.getSimpleName+ ", " + valueClass.getSimpleName+ ")")
- val writer = new HadoopWriter(conf)
+ val writer = new SparkHadoopWriter(conf)
writer.preSetup()
- def writeToFile(context: TaskContext, iter: Iterator[(K,V)]) {
+ def writeToFile(context: TaskContext, iter: Iterator[(K, V)]) {
// Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
// around by taking a mod. We expect that no task will be attempted 2 billion times.
val attemptNumber = (context.attemptId % Int.MaxValue).toInt
@@ -720,55 +697,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure
}
-/**
- * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
- * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these
- * functions. They will work with any key type that has a `scala.math.Ordered` implementation.
- */
-class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
- self: RDD[(K, V)])
- extends Logging
- with Serializable {
-
- /**
- * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
- * `collect` or `save` on the resulting RDD will return or output an ordered list of records
- * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
- * order of the keys).
- */
- def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[(K,V)] = {
- val shuffled =
- new ShuffledRDD[K, V](self, new RangePartitioner(numPartitions, self, ascending))
- shuffled.mapPartitions(iter => {
- val buf = iter.toArray
- if (ascending) {
- buf.sortWith((x, y) => x._1 < y._1).iterator
- } else {
- buf.sortWith((x, y) => x._1 > y._1).iterator
- }
- }, true)
- }
-}
-
-private[spark]
-class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U) extends RDD[(K, U)](prev) {
- override def getPartitions = firstParent[(K, V)].partitions
- override val partitioner = firstParent[(K, V)].partitioner
- override def compute(split: Partition, context: TaskContext) =
- firstParent[(K, V)].iterator(split, context).map{ case (k, v) => (k, f(v)) }
-}
-
-private[spark]
-class FlatMappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => TraversableOnce[U])
- extends RDD[(K, U)](prev) {
-
- override def getPartitions = firstParent[(K, V)].partitions
- override val partitioner = firstParent[(K, V)].partitioner
- override def compute(split: Partition, context: TaskContext) = {
- firstParent[(K, V)].iterator(split, context).flatMap { case (k, v) => f(v).map(x => (k, x)) }
- }
-}
-
private[spark] object Manifests {
val seqSeqManifest = classManifest[Seq[Seq[_]]]
}
diff --git a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 16ba0c26f8..6dbd4309aa 100644
--- a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -15,18 +15,22 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import scala.collection.immutable.NumericRange
import scala.collection.mutable.ArrayBuffer
import scala.collection.Map
-import spark.{RDD, TaskContext, SparkContext, Partition}
+import org.apache.spark._
+import java.io._
+import scala.Serializable
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.util.Utils
private[spark] class ParallelCollectionPartition[T: ClassManifest](
- val rddId: Long,
- val slice: Int,
- values: Seq[T])
- extends Partition with Serializable {
+ var rddId: Long,
+ var slice: Int,
+ var values: Seq[T])
+ extends Partition with Serializable {
def iterator: Iterator[T] = values.iterator
@@ -37,15 +41,49 @@ private[spark] class ParallelCollectionPartition[T: ClassManifest](
case _ => false
}
- override val index: Int = slice
+ override def index: Int = slice
+
+ @throws(classOf[IOException])
+ private def writeObject(out: ObjectOutputStream): Unit = {
+
+ val sfactory = SparkEnv.get.serializer
+
+ // Treat java serializer with default action rather than going thru serialization, to avoid a
+ // separate serialization header.
+
+ sfactory match {
+ case js: JavaSerializer => out.defaultWriteObject()
+ case _ =>
+ out.writeLong(rddId)
+ out.writeInt(slice)
+
+ val ser = sfactory.newInstance()
+ Utils.serializeViaNestedStream(out, ser)(_.writeObject(values))
+ }
+ }
+
+ @throws(classOf[IOException])
+ private def readObject(in: ObjectInputStream): Unit = {
+
+ val sfactory = SparkEnv.get.serializer
+ sfactory match {
+ case js: JavaSerializer => in.defaultReadObject()
+ case _ =>
+ rddId = in.readLong()
+ slice = in.readInt()
+
+ val ser = sfactory.newInstance()
+ Utils.deserializeViaNestedStream(in, ser)(ds => values = ds.readObject())
+ }
+ }
}
private[spark] class ParallelCollectionRDD[T: ClassManifest](
@transient sc: SparkContext,
@transient data: Seq[T],
numSlices: Int,
- locationPrefs: Map[Int,Seq[String]])
- extends RDD[T](sc, Nil) {
+ locationPrefs: Map[Int, Seq[String]])
+ extends RDD[T](sc, Nil) {
// TODO: Right now, each split sends along its full data, even if later down the RDD chain it gets
// cached. It might be worthwhile to write the data to a file in the DFS and read it in the split
// instead.
@@ -82,16 +120,17 @@ private object ParallelCollectionRDD {
1
}
slice(new Range(
- r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
+ r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
}
case r: Range => {
(0 until numSlices).map(i => {
val start = ((i * r.length.toLong) / numSlices).toInt
- val end = (((i+1) * r.length.toLong) / numSlices).toInt
+ val end = (((i + 1) * r.length.toLong) / numSlices).toInt
new Range(r.start + start * r.step, r.start + end * r.step, r.step)
}).asInstanceOf[Seq[Seq[T]]]
}
- case nr: NumericRange[_] => { // For ranges of Long, Double, BigInteger, etc
+ case nr: NumericRange[_] => {
+ // For ranges of Long, Double, BigInteger, etc
val slices = new ArrayBuffer[Seq[T]](numSlices)
val sliceSize = (nr.size + numSlices - 1) / numSlices // Round up to catch everything
var r = nr
@@ -102,10 +141,10 @@ private object ParallelCollectionRDD {
slices
}
case _ => {
- val array = seq.toArray // To prevent O(n^2) operations for List etc
+ val array = seq.toArray // To prevent O(n^2) operations for List etc
(0 until numSlices).map(i => {
val start = ((i * array.length.toLong) / numSlices).toInt
- val end = (((i+1) * array.length.toLong) / numSlices).toInt
+ val end = (((i + 1) * array.length.toLong) / numSlices).toInt
array.slice(start, end).toSeq
})
}
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
index 191cfde565..165cd412fc 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{NarrowDependency, RDD, SparkEnv, Partition, TaskContext}
+import org.apache.spark.{NarrowDependency, SparkEnv, Partition, TaskContext}
class PartitionPruningRDDPartition(idx: Int, val parentSplit: Partition) extends Partition {
@@ -33,8 +33,9 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo
extends NarrowDependency[T](rdd) {
@transient
- val partitions: Array[Partition] = rdd.partitions.filter(s => partitionFilterFunc(s.index))
- .zipWithIndex.map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition }
+ val partitions: Array[Partition] = rdd.partitions.zipWithIndex
+ .filter(s => partitionFilterFunc(s._2))
+ .map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition }
override def getParents(partitionId: Int) = List(partitions(partitionId).index)
}
diff --git a/core/src/main/scala/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 2cefdc78b0..d5304ab0ae 100644
--- a/core/src/main/scala/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.io.PrintWriter
import java.util.StringTokenizer
@@ -25,8 +25,8 @@ import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
-import spark.{RDD, SparkEnv, Partition, TaskContext}
-import spark.broadcast.Broadcast
+import org.apache.spark.{SparkEnv, Partition, TaskContext}
+import org.apache.spark.broadcast.Broadcast
/**
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index ca7cdd622a..e143ecd096 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import java.util.Random
@@ -31,43 +31,28 @@ import org.apache.hadoop.mapred.TextOutputFormat
import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
-import spark.broadcast.Broadcast
-import spark.Partitioner._
-import spark.partial.BoundedDouble
-import spark.partial.CountEvaluator
-import spark.partial.GroupedCountEvaluator
-import spark.partial.PartialResult
-import spark.rdd.CoalescedRDD
-import spark.rdd.CartesianRDD
-import spark.rdd.FilteredRDD
-import spark.rdd.FlatMappedRDD
-import spark.rdd.GlommedRDD
-import spark.rdd.MappedRDD
-import spark.rdd.MapPartitionsRDD
-import spark.rdd.MapPartitionsWithIndexRDD
-import spark.rdd.PipedRDD
-import spark.rdd.SampledRDD
-import spark.rdd.ShuffledRDD
-import spark.rdd.UnionRDD
-import spark.rdd.ZippedRDD
-import spark.rdd.ZippedPartitionsRDD2
-import spark.rdd.ZippedPartitionsRDD3
-import spark.rdd.ZippedPartitionsRDD4
-import spark.storage.StorageLevel
-import spark.util.BoundedPriorityQueue
-
-import SparkContext._
+import org.apache.spark.Partitioner._
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.CountEvaluator
+import org.apache.spark.partial.GroupedCountEvaluator
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{Utils, BoundedPriorityQueue}
+
+import org.apache.spark.SparkContext._
+import org.apache.spark._
/**
* A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
* partitioned collection of elements that can be operated on in parallel. This class contains the
* basic operations available on all RDDs, such as `map`, `filter`, and `persist`. In addition,
- * [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value pairs, such
- * as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations available only on
- * RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations available on RDDs
- * that can be saved as SequenceFiles. These operations are automatically available on any RDD of
- * the right type (e.g. RDD[(Int, Int)] through implicit conversions when you
- * `import spark.SparkContext._`.
+ * [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs of key-value
+ * pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]] contains
+ * operations available only on RDDs of Doubles; and [[org.apache.spark.rdd.SequenceFileRDDFunctions]]
+ * contains operations available on RDDs that can be saved as SequenceFiles. These operations are
+ * automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit
+ * conversions when you `import org.apache.spark.SparkContext._`.
*
* Internally, each RDD is characterized by five main properties:
*
@@ -220,8 +205,8 @@ abstract class RDD[T: ClassManifest](
}
/**
- * Get the preferred location of a split, taking into account whether the
- * RDD is checkpointed or not.
+ * Get the preferred locations of a partition (as hostnames), taking into account whether the
+ * RDD is checkpointed.
*/
final def preferredLocations(split: Partition): Seq[String] = {
checkpointRDD.map(_.getPreferredLocations(split)).getOrElse {
@@ -286,7 +271,10 @@ abstract class RDD[T: ClassManifest](
def coalesce(numPartitions: Int, shuffle: Boolean = false): RDD[T] = {
if (shuffle) {
// include a shuffle step so that our upstream tasks are still distributed
- new CoalescedRDD(new ShuffledRDD(map(x => (x, null)), new HashPartitioner(numPartitions)), numPartitions).keys
+ new CoalescedRDD(
+ new ShuffledRDD[T, Null, (T, Null)](map(x => (x, null)),
+ new HashPartitioner(numPartitions)),
+ numPartitions).keys
} else {
new CoalescedRDD(this, numPartitions)
}
@@ -301,8 +289,8 @@ abstract class RDD[T: ClassManifest](
def takeSample(withReplacement: Boolean, num: Int, seed: Int): Array[T] = {
var fraction = 0.0
var total = 0
- var multiplier = 3.0
- var initialCount = this.count()
+ val multiplier = 3.0
+ val initialCount = this.count()
var maxSelected = 0
if (num < 0) {
@@ -514,22 +502,19 @@ abstract class RDD[T: ClassManifest](
* *same number of partitions*, but does *not* require them to have the same number
* of elements in each partition.
*/
- def zipPartitions[B: ClassManifest, V: ClassManifest](
- f: (Iterator[T], Iterator[B]) => Iterator[V],
- rdd2: RDD[B]): RDD[V] =
+ def zipPartitions[B: ClassManifest, V: ClassManifest]
+ (rdd2: RDD[B])
+ (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] =
new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2)
- def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest](
- f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V],
- rdd2: RDD[B],
- rdd3: RDD[C]): RDD[V] =
+ def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest]
+ (rdd2: RDD[B], rdd3: RDD[C])
+ (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] =
new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3)
- def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest](
- f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
- rdd2: RDD[B],
- rdd3: RDD[C],
- rdd4: RDD[D]): RDD[V] =
+ def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest]
+ (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])
+ (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] =
new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4)
@@ -893,7 +878,7 @@ abstract class RDD[T: ClassManifest](
dependencies.head.rdd.asInstanceOf[RDD[U]]
}
- /** The [[spark.SparkContext]] that this RDD was created on. */
+ /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
def context = sc
// Avoid handling doCheckpoint multiple times to prevent excessive recursion
@@ -929,7 +914,7 @@ abstract class RDD[T: ClassManifest](
* Clears the dependencies of this RDD. This method must ensure that all references
* to the original parent RDDs is removed to enable the parent RDDs to be garbage
* collected. Subclasses of RDD may override this method for implementing their own cleaning
- * logic. See [[spark.rdd.UnionRDD]] for an example.
+ * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example.
*/
protected def clearDependencies() {
dependencies_ = null
@@ -950,4 +935,8 @@ abstract class RDD[T: ClassManifest](
id,
origin)
+ def toJavaRDD() : JavaRDD[T] = {
+ new JavaRDD(this)(elementClassManifest)
+ }
+
}
diff --git a/core/src/main/scala/spark/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index b615f820eb..6009a41570 100644
--- a/core/src/main/scala/spark/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -15,12 +15,13 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
-import rdd.{CheckpointRDD, CoalescedRDD}
-import scheduler.{ResultTask, ShuffleMapTask}
+
+import org.apache.spark.{Partition, SparkException, Logging}
+import org.apache.spark.scheduler.{ResultTask, ShuffleMapTask}
/**
* Enumeration to manage state transitions of an RDD through checkpointing
diff --git a/core/src/main/scala/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index 574c9b141d..2c5253ae30 100644
--- a/core/src/main/scala/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.util.Random
import cern.jet.random.Poisson
import cern.jet.random.engine.DRand
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
private[spark]
class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable {
diff --git a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 9f30b7f22f..5fe4676029 100644
--- a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -15,40 +15,22 @@
* limitations under the License.
*/
-package spark
-
-import java.io.EOFException
-import java.net.URL
-import java.io.ObjectInputStream
-import java.util.concurrent.atomic.AtomicLong
-import java.util.HashSet
-import java.util.Random
-import java.util.Date
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.Map
-import scala.collection.mutable.HashMap
+package org.apache.spark.rdd
import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.OutputFormat
-import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapred.SequenceFileOutputFormat
-import org.apache.hadoop.mapred.OutputCommitter
-import org.apache.hadoop.mapred.FileOutputCommitter
import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.io.Writable
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.BytesWritable
-import org.apache.hadoop.io.Text
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
+import org.apache.spark.Logging
/**
* Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile,
* through an implicit conversion. Note that this can't be part of PairRDDFunctions because
* we need more implicit parameters to convert our keys and values to Writable.
*
- * Users should import `spark.SparkContext._` at the top of their program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of their program to use these functions.
*/
class SequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable : ClassManifest](
self: RDD[(K, V)])
diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 0137f80953..9537152335 100644
--- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -15,10 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{Partitioner, RDD, SparkEnv, ShuffleDependency, Partition, TaskContext}
-import spark.SparkContext._
+import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext}
private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
@@ -30,15 +29,24 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
* The resulting RDD from a shuffle (e.g. repartitioning of data).
* @param prev the parent RDD.
* @param part the partitioner used to partition the RDD
- * @param serializerClass class name of the serializer to use.
* @tparam K the key class.
* @tparam V the value class.
*/
-class ShuffledRDD[K, V](
- @transient prev: RDD[(K, V)],
- part: Partitioner,
- serializerClass: String = null)
- extends RDD[(K, V)](prev.context, List(new ShuffleDependency(prev, part, serializerClass))) {
+class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest](
+ @transient var prev: RDD[P],
+ part: Partitioner)
+ extends RDD[P](prev.context, Nil) {
+
+ private var serializerClass: String = null
+
+ def setSerializer(cls: String): ShuffledRDD[K, V, P] = {
+ serializerClass = cls
+ this
+ }
+
+ override def getDependencies: Seq[Dependency[_]] = {
+ List(new ShuffleDependency(prev, part, serializerClass))
+ }
override val partitioner = Some(part)
@@ -46,9 +54,14 @@ class ShuffledRDD[K, V](
Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i))
}
- override def compute(split: Partition, context: TaskContext): Iterator[(K, V)] = {
+ override def compute(split: Partition, context: TaskContext): Iterator[P] = {
val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId
- SparkEnv.get.shuffleFetcher.fetch[K, V](shuffledId, split.index, context.taskMetrics,
+ SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context.taskMetrics,
SparkEnv.get.serializerManager.get(serializerClass))
}
+
+ override def clearDependencies() {
+ super.clearDependencies()
+ prev = null
+ }
}
diff --git a/core/src/main/scala/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 0402b9f250..8c1a29dfff 100644
--- a/core/src/main/scala/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -15,19 +15,18 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import java.util.{HashMap => JHashMap}
import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
-import spark.RDD
-import spark.Partitioner
-import spark.Dependency
-import spark.TaskContext
-import spark.Partition
-import spark.SparkEnv
-import spark.ShuffleDependency
-import spark.OneToOneDependency
+import org.apache.spark.Partitioner
+import org.apache.spark.Dependency
+import org.apache.spark.TaskContext
+import org.apache.spark.Partition
+import org.apache.spark.SparkEnv
+import org.apache.spark.ShuffleDependency
+import org.apache.spark.OneToOneDependency
/**
@@ -47,20 +46,26 @@ import spark.OneToOneDependency
* out of memory because of the size of `rdd2`.
*/
private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassManifest](
- @transient var rdd1: RDD[(K, V)],
- @transient var rdd2: RDD[(K, W)],
- part: Partitioner,
- val serializerClass: String = null)
+ @transient var rdd1: RDD[_ <: Product2[K, V]],
+ @transient var rdd2: RDD[_ <: Product2[K, W]],
+ part: Partitioner)
extends RDD[(K, V)](rdd1.context, Nil) {
+ private var serializerClass: String = null
+
+ def setSerializer(cls: String): SubtractedRDD[K, V, W] = {
+ serializerClass = cls
+ this
+ }
+
override def getDependencies: Seq[Dependency[_]] = {
Seq(rdd1, rdd2).map { rdd =>
if (rdd.partitioner == Some(part)) {
- logInfo("Adding one-to-one dependency with " + rdd)
+ logDebug("Adding one-to-one dependency with " + rdd)
new OneToOneDependency(rdd)
} else {
- logInfo("Adding shuffle dependency with " + rdd)
- new ShuffleDependency(rdd.asInstanceOf[RDD[(K, Any)]], part, serializerClass)
+ logDebug("Adding shuffle dependency with " + rdd)
+ new ShuffleDependency(rdd, part, serializerClass)
}
}
}
@@ -97,16 +102,14 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM
seq
}
}
- def integrate(dep: CoGroupSplitDep, op: ((K, V)) => Unit) = dep match {
+ def integrate(dep: CoGroupSplitDep, op: Product2[K, V] => Unit) = dep match {
case NarrowCoGroupSplitDep(rdd, _, itsSplit) => {
- for (t <- rdd.iterator(itsSplit, context))
- op(t.asInstanceOf[(K, V)])
+ rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, V]]].foreach(op)
}
case ShuffleCoGroupSplitDep(shuffleId) => {
- val iter = SparkEnv.get.shuffleFetcher.fetch(shuffleId, partition.index,
+ val iter = SparkEnv.get.shuffleFetcher.fetch[Product2[K, V]](shuffleId, partition.index,
context.taskMetrics, serializer)
- for (t <- iter)
- op(t.asInstanceOf[(K, V)])
+ iter.foreach(op)
}
}
// the first dep is rdd1; add all values to the map
diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 2776826f18..ae8a9f36a6 100644
--- a/core/src/main/scala/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import scala.collection.mutable.ArrayBuffer
-import spark.{Dependency, RangeDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{Dependency, RangeDependency, SparkContext, Partition, TaskContext}
import java.io.{ObjectOutputStream, IOException}
private[spark] class UnionPartition[T: ClassManifest](idx: Int, rdd: RDD[T], splitIndex: Int)
diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index 6a4fa13ad6..31e6fd519d 100644
--- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
import java.io.{ObjectOutputStream, IOException}
private[spark] class ZippedPartitionsPartition(
@@ -55,27 +55,15 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest](
}
override def getPreferredLocations(s: Partition): Seq[String] = {
- // Note that as number of rdd's increase and/or number of slaves in cluster increase, the computed preferredLocations below
- // become diminishingly small : so we might need to look at alternate strategies to alleviate this.
- // If there are no (or very small number of preferred locations), we will end up transferred the blocks to 'any' node in the
- // cluster - paying with n/w and cache cost.
- // Maybe pick a node which figures max amount of time ?
- // Choose node which is hosting 'larger' of some subset of blocks ?
- // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible)
- val splits = s.asInstanceOf[ZippedPartitionsPartition].partitions
- val rddSplitZip = rdds.zip(splits)
-
- // exact match.
- val exactMatchPreferredLocations = rddSplitZip.map(x => x._1.preferredLocations(x._2))
- val exactMatchLocations = exactMatchPreferredLocations.reduce((x, y) => x.intersect(y))
-
- // Remove exact match and then do host local match.
- val exactMatchHosts = exactMatchLocations.map(Utils.parseHostPort(_)._1)
- val matchPreferredHosts = exactMatchPreferredLocations.map(locs => locs.map(Utils.parseHostPort(_)._1))
- .reduce((x, y) => x.intersect(y))
- val otherNodeLocalLocations = matchPreferredHosts.filter { s => !exactMatchHosts.contains(s) }
-
- otherNodeLocalLocations ++ exactMatchLocations
+ val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions
+ val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) }
+ // Check whether there are any hosts that match all RDDs; otherwise return the union
+ val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
+ if (!exactMatchLocations.isEmpty) {
+ exactMatchLocations
+ } else {
+ prefs.flatten.distinct
+ }
}
override def clearDependencies() {
diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
index b1c43b3195..567b67dfee 100644
--- a/core/src/main/scala/spark/rdd/ZippedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
-import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
import java.io.{ObjectOutputStream, IOException}
@@ -65,27 +65,16 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
}
override def getPreferredLocations(s: Partition): Seq[String] = {
- // Note that as number of slaves in cluster increase, the computed preferredLocations can become small : so we might need
- // to look at alternate strategies to alleviate this. (If there are no (or very small number of preferred locations), we
- // will end up transferred the blocks to 'any' node in the cluster - paying with n/w and cache cost.
- // Maybe pick one or the other ? (so that atleast one block is local ?).
- // Choose node which is hosting 'larger' of the blocks ?
- // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible)
val (partition1, partition2) = s.asInstanceOf[ZippedPartition[T, U]].partitions
val pref1 = rdd1.preferredLocations(partition1)
val pref2 = rdd2.preferredLocations(partition2)
-
- // exact match - instance local and host local.
+ // Check whether there are any hosts that match both RDDs; otherwise return the union
val exactMatchLocations = pref1.intersect(pref2)
-
- // remove locations which are already handled via exactMatchLocations, and intersect where both partitions are node local.
- val otherNodeLocalPref1 = pref1.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1)
- val otherNodeLocalPref2 = pref2.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1)
- val otherNodeLocalLocations = otherNodeLocalPref1.intersect(otherNodeLocalPref2)
-
-
- // Can have mix of instance local (hostPort) and node local (host) locations as preference !
- exactMatchLocations ++ otherNodeLocalLocations
+ if (!exactMatchLocations.isEmpty) {
+ exactMatchLocations
+ } else {
+ (pref1 ++ pref2).distinct
+ }
}
override def clearDependencies() {
diff --git a/core/src/main/scala/spark/scheduler/ActiveJob.scala b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala
index 71cc94edb6..0b04607d01 100644
--- a/core/src/main/scala/spark/scheduler/ActiveJob.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.TaskContext
+import org.apache.spark.TaskContext
import java.util.Properties
@@ -25,7 +25,7 @@ import java.util.Properties
* Tracks information about an active job in the DAGScheduler.
*/
private[spark] class ActiveJob(
- val runId: Int,
+ val jobId: Int,
val finalStage: Stage,
val func: (TaskContext, Iterator[_]) => _,
val partitions: Array[Int],
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 29e879aa42..92add5b073 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -15,29 +15,40 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import cluster.TaskInfo
-import java.util.concurrent.atomic.AtomicInteger
-import java.util.concurrent.LinkedBlockingQueue
-import java.util.concurrent.TimeUnit
+import java.io.NotSerializableException
import java.util.Properties
+import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
+import java.util.concurrent.atomic.AtomicInteger
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
-import spark._
-import spark.executor.TaskMetrics
-import spark.partial.ApproximateActionListener
-import spark.partial.ApproximateEvaluator
-import spark.partial.PartialResult
-import spark.storage.{BlockManager, BlockManagerMaster}
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.storage.{BlockManager, BlockManagerMaster}
+import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap}
/**
- * A Scheduler subclass that implements stage-oriented scheduling. It computes a DAG of stages for
- * each job, keeps track of which RDDs and stage outputs are materialized, and computes a minimal
- * schedule to run the job. Subclasses only need to implement the code to send a task to the cluster
- * and to report fetch failures (the submitTasks method, and code to add CompletionEvents).
+ * The high-level scheduling layer that implements stage-oriented scheduling. It computes a DAG of
+ * stages for each job, keeps track of which RDDs and stage outputs are materialized, and finds a
+ * minimal schedule to run the job. It then submits stages as TaskSets to an underlying
+ * TaskScheduler implementation that runs them on the cluster.
+ *
+ * In addition to coming up with a DAG of stages, this class also determines the preferred
+ * locations to run each task on, based on the current cache status, and passes these to the
+ * low-level TaskScheduler. Furthermore, it handles failures due to shuffle output files being
+ * lost, in which case old stages may need to be resubmitted. Failures *within* a stage that are
+ * not caused by shuffie file loss are handled by the TaskScheduler, which will retry each task
+ * a small number of times before cancelling the whole stage.
+ *
+ * THREADING: This class runs all its logic in a single thread executing the run() method, to which
+ * events are submitted using a synchonized queue (eventQueue). The public API methods, such as
+ * runJob, taskEnded and executorLost, post events asynchronously to this queue. All other methods
+ * should be private.
*/
private[spark]
class DAGScheduler(
@@ -52,6 +63,11 @@ class DAGScheduler(
}
taskSched.setListener(this)
+ // Called by TaskScheduler to report task's starting.
+ override def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+ eventQueue.put(BeginEvent(task, taskInfo))
+ }
+
// Called by TaskScheduler to report task completions or failures.
override def taskEnded(
task: Task[_],
@@ -69,8 +85,8 @@ class DAGScheduler(
}
// Called by TaskScheduler when a host is added
- override def executorGained(execId: String, hostPort: String) {
- eventQueue.put(ExecutorGained(execId, hostPort))
+ override def executorGained(execId: String, host: String) {
+ eventQueue.put(ExecutorGained(execId, host))
}
// Called by TaskScheduler to cancel an entire TaskSet due to repeated failures.
@@ -89,27 +105,28 @@ class DAGScheduler(
private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent]
- val nextRunId = new AtomicInteger(0)
+ val nextJobId = new AtomicInteger(0)
val nextStageId = new AtomicInteger(0)
- val idToStage = new TimeStampedHashMap[Int, Stage]
+ val stageIdToStage = new TimeStampedHashMap[Int, Stage]
val shuffleToMapStage = new TimeStampedHashMap[Int, Stage]
private[spark] val stageToInfos = new TimeStampedHashMap[Stage, StageInfo]
- private[spark] val sparkListeners = ArrayBuffer[SparkListener]()
+ private val listenerBus = new SparkListenerBus()
- var cacheLocs = new HashMap[Int, Array[List[String]]]
+ // Contains the locations that each RDD's partitions are cached on
+ private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]]
- // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
- // sent with every task. When we detect a node failing, we note the current generation number
- // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask
- // results.
- // TODO: Garbage collect information about failure generations when we know there are no more
+ // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with
+ // every task. When we detect a node failing, we note the current epoch number and failed
+ // executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask results.
+ //
+ // TODO: Garbage collect information about failure epochs when we know there are no more
// stray messages to detect.
- val failedGeneration = new HashMap[String, Long]
+ val failedEpoch = new HashMap[String, Long]
val idToActiveJob = new HashMap[Int, ActiveJob]
@@ -134,11 +151,17 @@ class DAGScheduler(
}.start()
}
- private def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
+ def addSparkListener(listener: SparkListener) {
+ listenerBus.addListener(listener)
+ }
+
+ private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = {
if (!cacheLocs.contains(rdd.id)) {
val blockIds = rdd.partitions.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray
- val locs = BlockManager.blockIdsToExecutorLocations(blockIds, env, blockManagerMaster)
- cacheLocs(rdd.id) = blockIds.map(locs.getOrElse(_, Nil))
+ val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster)
+ cacheLocs(rdd.id) = blockIds.map { id =>
+ locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId))
+ }
}
cacheLocs(rdd.id)
}
@@ -149,14 +172,14 @@ class DAGScheduler(
/**
* Get or create a shuffle map stage for the given shuffle dependency's map side.
- * The priority value passed in will be used if the stage doesn't already exist with
- * a lower priority (we assume that priorities always increase across jobs for now).
+ * The jobId value passed in will be used if the stage doesn't already exist with
+ * a lower jobId (jobId always increases across jobs.)
*/
- private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = {
+ private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], jobId: Int): Stage = {
shuffleToMapStage.get(shuffleDep.shuffleId) match {
case Some(stage) => stage
case None =>
- val stage = newStage(shuffleDep.rdd, Some(shuffleDep), priority)
+ val stage = newStage(shuffleDep.rdd, Some(shuffleDep), jobId)
shuffleToMapStage(shuffleDep.shuffleId) = stage
stage
}
@@ -164,13 +187,13 @@ class DAGScheduler(
/**
* Create a Stage for the given RDD, either as a shuffle map stage (for a ShuffleDependency) or
- * as a result stage for the final RDD used directly in an action. The stage will also be given
- * the provided priority.
+ * as a result stage for the final RDD used directly in an action. The stage will also be
+ * associated with the provided jobId.
*/
private def newStage(
rdd: RDD[_],
shuffleDep: Option[ShuffleDependency[_,_]],
- priority: Int,
+ jobId: Int,
callSite: Option[String] = None)
: Stage =
{
@@ -181,17 +204,17 @@ class DAGScheduler(
mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.partitions.size)
}
val id = nextStageId.getAndIncrement()
- val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, priority), priority, callSite)
- idToStage(id) = stage
+ val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, jobId), jobId, callSite)
+ stageIdToStage(id) = stage
stageToInfos(stage) = StageInfo(stage)
stage
}
/**
* Get or create the list of parent stages for a given RDD. The stages will be assigned the
- * provided priority if they haven't already been created with a lower priority.
+ * provided jobId if they haven't already been created with a lower jobId.
*/
- private def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = {
+ private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = {
val parents = new HashSet[Stage]
val visited = new HashSet[RDD[_]]
def visit(r: RDD[_]) {
@@ -202,7 +225,7 @@ class DAGScheduler(
for (dep <- r.dependencies) {
dep match {
case shufDep: ShuffleDependency[_,_] =>
- parents += getShuffleMapStage(shufDep, priority)
+ parents += getShuffleMapStage(shufDep, jobId)
case _ =>
visit(dep.rdd)
}
@@ -223,7 +246,7 @@ class DAGScheduler(
for (dep <- rdd.dependencies) {
dep match {
case shufDep: ShuffleDependency[_,_] =>
- val mapStage = getShuffleMapStage(shufDep, stage.priority)
+ val mapStage = getShuffleMapStage(shufDep, stage.jobId)
if (!mapStage.isAvailable) {
missing += mapStage
}
@@ -258,8 +281,9 @@ class DAGScheduler(
assert(partitions.size > 0)
val waiter = new JobWaiter(partitions.size, resultHandler)
val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
- val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties)
- return (toSubmit, waiter)
+ val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter,
+ properties)
+ (toSubmit, waiter)
}
def runJob[T, U: ClassManifest](
@@ -283,7 +307,7 @@ class DAGScheduler(
"Total number of partitions: " + maxPartitions)
}
- val (toSubmit, waiter) = prepareJob(
+ val (toSubmit: JobSubmitted, waiter: JobWaiter[_]) = prepareJob(
finalRdd, func, partitions, callSite, allowLocal, resultHandler, properties)
eventQueue.put(toSubmit)
waiter.awaitResult() match {
@@ -306,8 +330,8 @@ class DAGScheduler(
val listener = new ApproximateActionListener(rdd, func, evaluator, timeout)
val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
val partitions = (0 until rdd.partitions.size).toArray
- eventQueue.put(JobSubmitted(rdd, func2, partitions, false, callSite, listener, properties))
- return listener.awaitResult() // Will throw an exception if the job fails
+ eventQueue.put(JobSubmitted(rdd, func2, partitions, allowLocal = false, callSite, listener, properties))
+ listener.awaitResult() // Will throw an exception if the job fails
}
/**
@@ -317,11 +341,11 @@ class DAGScheduler(
private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
event match {
case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener, properties) =>
- val runId = nextRunId.getAndIncrement()
- val finalStage = newStage(finalRDD, None, runId, Some(callSite))
- val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener, properties)
+ val jobId = nextJobId.getAndIncrement()
+ val finalStage = newStage(finalRDD, None, jobId, Some(callSite))
+ val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)
clearCacheLocs()
- logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
+ logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length +
" output partitions (allowLocal=" + allowLocal + ")")
logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
logInfo("Parents of final stage: " + finalStage.parents)
@@ -330,37 +354,40 @@ class DAGScheduler(
// Compute very short actions like first() or take() with no parent stages locally.
runLocally(job)
} else {
- sparkListeners.foreach(_.onJobStart(SparkListenerJobStart(job, properties)))
- idToActiveJob(runId) = job
+ listenerBus.post(SparkListenerJobStart(job, properties))
+ idToActiveJob(jobId) = job
activeJobs += job
resultStageToJob(finalStage) = job
submitStage(finalStage)
}
- case ExecutorGained(execId, hostPort) =>
- handleExecutorGained(execId, hostPort)
+ case ExecutorGained(execId, host) =>
+ handleExecutorGained(execId, host)
case ExecutorLost(execId) =>
handleExecutorLost(execId)
+ case begin: BeginEvent =>
+ listenerBus.post(SparkListenerTaskStart(begin.task, begin.taskInfo))
+
case completion: CompletionEvent =>
- sparkListeners.foreach(_.onTaskEnd(SparkListenerTaskEnd(completion.task,
- completion.reason, completion.taskInfo, completion.taskMetrics)))
+ listenerBus.post(SparkListenerTaskEnd(
+ completion.task, completion.reason, completion.taskInfo, completion.taskMetrics))
handleTaskCompletion(completion)
case TaskSetFailed(taskSet, reason) =>
- abortStage(idToStage(taskSet.stageId), reason)
+ abortStage(stageIdToStage(taskSet.stageId), reason)
case StopDAGScheduler =>
// Cancel any active jobs
for (job <- activeJobs) {
val error = new SparkException("Job cancelled because SparkContext was shut down")
job.listener.jobFailed(error)
- sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, None))))
+ listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, None)))
}
return true
}
- return false
+ false
}
/**
@@ -372,7 +399,7 @@ class DAGScheduler(
clearCacheLocs()
val failed2 = failed.toArray
failed.clear()
- for (stage <- failed2.sortBy(_.priority)) {
+ for (stage <- failed2.sortBy(_.jobId)) {
submitStage(stage)
}
}
@@ -390,7 +417,7 @@ class DAGScheduler(
logTrace("failed: " + failed)
val waiting2 = waiting.toArray
waiting.clear()
- for (stage <- waiting2.sortBy(_.priority)) {
+ for (stage <- waiting2.sortBy(_.jobId)) {
submitStage(stage)
}
}
@@ -409,23 +436,24 @@ class DAGScheduler(
if (event != null) {
logDebug("Got event of type " + event.getClass.getName)
}
-
- if (event != null) {
- if (processEvent(event)) {
- return
+ this.synchronized { // needed in case other threads makes calls into methods of this class
+ if (event != null) {
+ if (processEvent(event)) {
+ return
+ }
}
- }
- val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
- // Periodically resubmit failed stages if some map output fetches have failed and we have
- // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
- // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
- // the same time, so we want to make sure we've identified all the reduce tasks that depend
- // on the failed node.
- if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
- resubmitFailedStages()
- } else {
- submitWaitingStages()
+ val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
+ // Periodically resubmit failed stages if some map output fetches have failed and we have
+ // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
+ // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
+ // the same time, so we want to make sure we've identified all the reduce tasks that depend
+ // on the failed node.
+ if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
+ resubmitFailedStages()
+ } else {
+ submitWaitingStages()
+ }
}
}
}
@@ -437,7 +465,7 @@ class DAGScheduler(
*/
protected def runLocally(job: ActiveJob) {
logInfo("Computing the requested partition locally")
- new Thread("Local computation of job " + job.runId) {
+ new Thread("Local computation of job " + job.jobId) {
override def run() {
runLocallyWithinThread(job)
}
@@ -497,20 +525,36 @@ class DAGScheduler(
} else {
// This is a final stage; figure out its job's missing partitions
val job = resultStageToJob(stage)
- for (id <- 0 until job.numPartitions if (!job.finished(id))) {
+ for (id <- 0 until job.numPartitions if !job.finished(id)) {
val partition = job.partitions(id)
val locs = getPreferredLocs(stage.rdd, partition)
tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id)
}
}
+ // must be run listener before possible NotSerializableException
+ // should be "StageSubmitted" first and then "JobEnded"
+ val properties = idToActiveJob(stage.jobId).properties
+ listenerBus.post(SparkListenerStageSubmitted(stage, tasks.size, properties))
+
if (tasks.size > 0) {
- sparkListeners.foreach(_.onStageSubmitted(SparkListenerStageSubmitted(stage, tasks.size)))
+ // Preemptively serialize a task to make sure it can be serialized. We are catching this
+ // exception here because it would be fairly hard to catch the non-serializable exception
+ // down the road, where we have several different implementations for local scheduler and
+ // cluster schedulers.
+ try {
+ SparkEnv.get.closureSerializer.newInstance().serialize(tasks.head)
+ } catch {
+ case e: NotSerializableException =>
+ abortStage(stage, e.toString)
+ running -= stage
+ return
+ }
+
logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
myPending ++= tasks
logDebug("New pending tasks: " + myPending)
- val properties = idToActiveJob(stage.priority).properties
taskSched.submitTasks(
- new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority, properties))
+ new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties))
if (!stage.submissionTime.isDefined) {
stage.submissionTime = Some(System.currentTimeMillis())
}
@@ -527,7 +571,7 @@ class DAGScheduler(
*/
private def handleTaskCompletion(event: CompletionEvent) {
val task = event.task
- val stage = idToStage(task.stageId)
+ val stage = stageIdToStage(task.stageId)
def markStageAsFinished(stage: Stage) = {
val serviceTime = stage.submissionTime match {
@@ -536,8 +580,7 @@ class DAGScheduler(
}
logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
stage.completionTime = Some(System.currentTimeMillis)
- val stageComp = StageCompleted(stageToInfos(stage))
- sparkListeners.foreach{_.onStageCompleted(stageComp)}
+ listenerBus.post(StageCompleted(stageToInfos(stage)))
running -= stage
}
event.reason match {
@@ -557,11 +600,11 @@ class DAGScheduler(
job.numFinished += 1
// If the whole job has finished, remove it
if (job.numFinished == job.numPartitions) {
- idToActiveJob -= stage.priority
+ idToActiveJob -= stage.jobId
activeJobs -= job
resultStageToJob -= stage
markStageAsFinished(stage)
- sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobSucceeded)))
+ listenerBus.post(SparkListenerJobEnd(job, JobSucceeded))
}
job.listener.taskSucceeded(rt.outputId, event.result)
}
@@ -573,7 +616,7 @@ class DAGScheduler(
val status = event.result.asInstanceOf[MapStatus]
val execId = status.location.executorId
logDebug("ShuffleMapTask finished on " + execId)
- if (failedGeneration.contains(execId) && smt.generation <= failedGeneration(execId)) {
+ if (failedEpoch.contains(execId) && smt.epoch <= failedEpoch(execId)) {
logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + execId)
} else {
stage.addOutputLoc(smt.partition, status)
@@ -585,16 +628,16 @@ class DAGScheduler(
logInfo("waiting: " + waiting)
logInfo("failed: " + failed)
if (stage.shuffleDep != None) {
- // We supply true to increment the generation number here in case this is a
+ // We supply true to increment the epoch number here in case this is a
// recomputation of the map outputs. In that case, some nodes may have cached
// locations with holes (from when we detected the error) and will need the
- // generation incremented to refetch them.
- // TODO: Only increment the generation number if this is not the first time
+ // epoch incremented to refetch them.
+ // TODO: Only increment the epoch number if this is not the first time
// we registered these map outputs.
mapOutputTracker.registerMapOutputs(
stage.shuffleDep.get.shuffleId,
stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray,
- true)
+ changeEpoch = true)
}
clearCacheLocs()
if (stage.outputLocs.count(_ == Nil) != 0) {
@@ -628,7 +671,7 @@ class DAGScheduler(
case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
// Mark the stage that the reducer was in as unrunnable
- val failedStage = idToStage(task.stageId)
+ val failedStage = stageIdToStage(task.stageId)
running -= failedStage
failed += failedStage
// TODO: Cancel running tasks in the stage
@@ -648,7 +691,7 @@ class DAGScheduler(
lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock
// TODO: mark the executor as failed only if there were lots of fetch failures on it
if (bmAddress != null) {
- handleExecutorLost(bmAddress.executorId, Some(task.generation))
+ handleExecutorLost(bmAddress.executorId, Some(task.epoch))
}
case ExceptionFailure(className, description, stackTrace, metrics) =>
@@ -656,7 +699,7 @@ class DAGScheduler(
case other =>
// Unrecognized failure - abort all jobs depending on this stage
- abortStage(idToStage(task.stageId), task + " failed: " + other)
+ abortStage(stageIdToStage(task.stageId), task + " failed: " + other)
}
}
@@ -664,36 +707,36 @@ class DAGScheduler(
* Responds to an executor being lost. This is called inside the event loop, so it assumes it can
* modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
*
- * Optionally the generation during which the failure was caught can be passed to avoid allowing
+ * Optionally the epoch during which the failure was caught can be passed to avoid allowing
* stray fetch failures from possibly retriggering the detection of a node as lost.
*/
- private def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) {
- val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
- if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
- failedGeneration(execId) = currentGeneration
- logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration))
+ private def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) {
+ val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
+ if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) {
+ failedEpoch(execId) = currentEpoch
+ logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch))
blockManagerMaster.removeExecutor(execId)
// TODO: This will be really slow if we keep accumulating shuffle map stages
for ((shuffleId, stage) <- shuffleToMapStage) {
stage.removeOutputsOnExecutor(execId)
val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
- mapOutputTracker.registerMapOutputs(shuffleId, locs, true)
+ mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
}
if (shuffleToMapStage.isEmpty) {
- mapOutputTracker.incrementGeneration()
+ mapOutputTracker.incrementEpoch()
}
clearCacheLocs()
} else {
logDebug("Additional executor lost message for " + execId +
- "(generation " + currentGeneration + ")")
+ "(epoch " + currentEpoch + ")")
}
}
- private def handleExecutorGained(execId: String, hostPort: String) {
- // remove from failedGeneration(execId) ?
- if (failedGeneration.contains(execId)) {
- logInfo("Host gained which was in lost list earlier: " + hostPort)
- failedGeneration -= execId
+ private def handleExecutorGained(execId: String, host: String) {
+ // remove from failedEpoch(execId) ?
+ if (failedEpoch.contains(execId)) {
+ logInfo("Host gained which was in lost list earlier: " + host)
+ failedEpoch -= execId
}
}
@@ -708,8 +751,8 @@ class DAGScheduler(
val job = resultStageToJob(resultStage)
val error = new SparkException("Job failed: " + reason)
job.listener.jobFailed(error)
- sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage)))))
- idToActiveJob -= resultStage.priority
+ listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage))))
+ idToActiveJob -= resultStage.jobId
activeJobs -= job
resultStageToJob -= resultStage
}
@@ -733,7 +776,7 @@ class DAGScheduler(
for (dep <- rdd.dependencies) {
dep match {
case shufDep: ShuffleDependency[_,_] =>
- val mapStage = getShuffleMapStage(shufDep, stage.priority)
+ val mapStage = getShuffleMapStage(shufDep, stage.jobId)
if (!mapStage.isAvailable) {
visitedStages += mapStage
visit(mapStage.rdd)
@@ -748,16 +791,23 @@ class DAGScheduler(
visitedRdds.contains(target.rdd)
}
- private def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = {
+ /**
+ * Synchronized method that might be called from other threads.
+ * @param rdd whose partitions are to be looked at
+ * @param partition to lookup locality information for
+ * @return list of machines that are preferred by the partition
+ */
+ private[spark]
+ def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized {
// If the partition is cached, return the cache locations
val cached = getCacheLocs(rdd)(partition)
- if (cached != Nil) {
+ if (!cached.isEmpty) {
return cached
}
// If the RDD has some placement preferences (as is the case for input RDDs), get those
val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList
- if (rddPrefs != Nil) {
- return rddPrefs
+ if (!rddPrefs.isEmpty) {
+ return rddPrefs.map(host => TaskLocation(host))
}
// If the RDD has narrow dependencies, pick the first partition of the first narrow dep
// that has any placement preferences. Ideally we would choose based on transfer sizes,
@@ -771,13 +821,13 @@ class DAGScheduler(
}
case _ =>
})
- return Nil
+ Nil
}
private def cleanup(cleanupTime: Long) {
- var sizeBefore = idToStage.size
- idToStage.clearOldValues(cleanupTime)
- logInfo("idToStage " + sizeBefore + " --> " + idToStage.size)
+ var sizeBefore = stageIdToStage.size
+ stageIdToStage.clearOldValues(cleanupTime)
+ logInfo("stageIdToStage " + sizeBefore + " --> " + stageIdToStage.size)
sizeBefore = shuffleToMapStage.size
shuffleToMapStage.clearOldValues(cleanupTime)
diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index 506c87f65b..0d99670648 100644
--- a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -15,15 +15,16 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.util.Properties
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
import scala.collection.mutable.Map
-import spark._
-import spark.executor.TaskMetrics
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
/**
* Types of events that can be handled by the DAGScheduler. The DAGScheduler uses an event queue
@@ -43,6 +44,8 @@ private[spark] case class JobSubmitted(
properties: Properties = null)
extends DAGSchedulerEvent
+private[spark] case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent
+
private[spark] case class CompletionEvent(
task: Task[_],
reason: TaskEndReason,
@@ -52,9 +55,7 @@ private[spark] case class CompletionEvent(
taskMetrics: TaskMetrics)
extends DAGSchedulerEvent
-private[spark] case class ExecutorGained(execId: String, hostPort: String) extends DAGSchedulerEvent {
- Utils.checkHostPort(hostPort, "Required hostport")
-}
+private[spark] case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent
private[spark] case class ExecutorLost(execId: String) extends DAGSchedulerEvent
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
new file mode 100644
index 0000000000..ce0dc9093d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
@@ -0,0 +1,30 @@
+package org.apache.spark.scheduler
+
+import com.codahale.metrics.{Gauge,MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler) extends Source {
+ val metricRegistry = new MetricRegistry()
+ val sourceName = "DAGScheduler"
+
+ metricRegistry.register(MetricRegistry.name("stage", "failedStages", "number"), new Gauge[Int] {
+ override def getValue: Int = dagScheduler.failed.size
+ })
+
+ metricRegistry.register(MetricRegistry.name("stage", "runningStages", "number"), new Gauge[Int] {
+ override def getValue: Int = dagScheduler.running.size
+ })
+
+ metricRegistry.register(MetricRegistry.name("stage", "waitingStages", "number"), new Gauge[Int] {
+ override def getValue: Int = dagScheduler.waiting.size
+ })
+
+ metricRegistry.register(MetricRegistry.name("job", "allJobs", "number"), new Gauge[Int] {
+ override def getValue: Int = dagScheduler.nextJobId.get()
+ })
+
+ metricRegistry.register(MetricRegistry.name("job", "activeJobs", "number"), new Gauge[Int] {
+ override def getValue: Int = dagScheduler.activeJobs.size
+ })
+}
diff --git a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index 65f8c3200e..370ccd183c 100644
--- a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.Logging
+import org.apache.spark.{Logging, SparkEnv}
import scala.collection.immutable.Set
import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
import org.apache.hadoop.security.UserGroupInformation
@@ -26,7 +26,6 @@ import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.conf.Configuration
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import scala.collection.JavaConversions._
-import spark.deploy.SparkHadoopUtil
/**
@@ -88,8 +87,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
// This method does not expect failures, since validate has already passed ...
private def prefLocsFromMapreduceInputFormat(): Set[SplitInfo] = {
+ val env = SparkEnv.get
val conf = new JobConf(configuration)
- SparkHadoopUtil.addCredentials(conf);
+ env.hadoop.addCredentials(conf)
FileInputFormat.setInputPaths(conf, path)
val instance: org.apache.hadoop.mapreduce.InputFormat[_, _] =
@@ -108,8 +108,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
// This method does not expect failures, since validate has already passed ...
private def prefLocsFromMapredInputFormat(): Set[SplitInfo] = {
+ val env = SparkEnv.get
val jobConf = new JobConf(configuration)
- SparkHadoopUtil.addCredentials(jobConf);
+ env.hadoop.addCredentials(jobConf)
FileInputFormat.setInputPaths(jobConf, path)
val instance: org.apache.hadoop.mapred.InputFormat[_, _] =
diff --git a/core/src/main/scala/spark/scheduler/JobListener.scala b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala
index af108b8fec..50c2b9acd6 100644
--- a/core/src/main/scala/spark/scheduler/JobListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
/**
* Interface used to listen for job completion or failure events after submitting a job to the
diff --git a/core/src/main/scala/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 85b5ddd4a8..c8b78bf00a 100644
--- a/core/src/main/scala/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.io.PrintWriter
import java.io.File
@@ -23,11 +23,14 @@ import java.io.FileNotFoundException
import java.text.SimpleDateFormat
import java.util.{Date, Properties}
import java.util.concurrent.LinkedBlockingQueue
+
import scala.collection.mutable.{Map, HashMap, ListBuffer}
import scala.io.Source
-import spark._
-import spark.executor.TaskMetrics
-import spark.scheduler.cluster.TaskInfo
+
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
// Used to record runtime information for each job, including RDD graph
// tasks' start/stop shuffle information and information from outside
@@ -53,29 +56,6 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
def getJobIDToStages = jobIDToStages
def getEventQueue = eventQueue
- new Thread("JobLogger") {
- setDaemon(true)
- override def run() {
- while (true) {
- val event = eventQueue.take
- logDebug("Got event of type " + event.getClass.getName)
- event match {
- case SparkListenerJobStart(job, properties) =>
- processJobStartEvent(job, properties)
- case SparkListenerStageSubmitted(stage, taskSize) =>
- processStageSubmittedEvent(stage, taskSize)
- case StageCompleted(stageInfo) =>
- processStageCompletedEvent(stageInfo)
- case SparkListenerJobEnd(job, result) =>
- processJobEndEvent(job, result)
- case SparkListenerTaskEnd(task, reason, taskInfo, taskMetrics) =>
- processTaskEndEvent(task, reason, taskInfo, taskMetrics)
- case _ =>
- }
- }
- }
- }.start()
-
// Create a folder for log files, the folder's name is the creation time of the jobLogger
protected def createLogDir() {
val dir = new File(logDir + "/" + logDirName + "/")
@@ -123,7 +103,7 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime))
protected def buildJobDep(jobID: Int, stage: Stage) {
- if (stage.priority == jobID) {
+ if (stage.jobId == jobID) {
jobIDToStages.get(jobID) match {
case Some(stageList) => stageList += stage
case None => val stageList = new ListBuffer[Stage]
@@ -199,12 +179,12 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
}else{
stageInfo = "STAGE_ID=" + stage.id + " RESULT_STAGE"
}
- if (stage.priority == jobID) {
+ if (stage.jobId == jobID) {
jobLogInfo(jobID, indentString(indent) + stageInfo, false)
recordRddInStageGraph(jobID, stage.rdd, indent)
stage.parents.foreach(recordStageDepGraph(jobID, _, indent + 2))
} else
- jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.priority, false)
+ jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false)
}
// Record task metrics into job log files
@@ -236,37 +216,32 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
}
override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
- eventQueue.put(stageSubmitted)
- }
-
- protected def processStageSubmittedEvent(stage: Stage, taskSize: Int) {
- stageLogInfo(stage.id, "STAGE_ID=" + stage.id + " STATUS=SUBMITTED" + " TASK_SIZE=" + taskSize)
+ stageLogInfo(
+ stageSubmitted.stage.id,
+ "STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format(
+ stageSubmitted.stage.id, stageSubmitted.taskSize))
}
override def onStageCompleted(stageCompleted: StageCompleted) {
- eventQueue.put(stageCompleted)
- }
-
- protected def processStageCompletedEvent(stageInfo: StageInfo) {
- stageLogInfo(stageInfo.stage.id, "STAGE_ID=" +
- stageInfo.stage.id + " STATUS=COMPLETED")
+ stageLogInfo(
+ stageCompleted.stageInfo.stage.id,
+ "STAGE_ID=%d STATUS=COMPLETED".format(stageCompleted.stageInfo.stage.id))
}
-
- override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
- eventQueue.put(taskEnd)
- }
- protected def processTaskEndEvent(task: Task[_], reason: TaskEndReason,
- taskInfo: TaskInfo, taskMetrics: TaskMetrics) {
+ override def onTaskStart(taskStart: SparkListenerTaskStart) { }
+
+ override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+ val task = taskEnd.task
+ val taskInfo = taskEnd.taskInfo
var taskStatus = ""
task match {
case resultTask: ResultTask[_, _] => taskStatus = "TASK_TYPE=RESULT_TASK"
case shuffleMapTask: ShuffleMapTask => taskStatus = "TASK_TYPE=SHUFFLE_MAP_TASK"
}
- reason match {
+ taskEnd.reason match {
case Success => taskStatus += " STATUS=SUCCESS"
- recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskMetrics)
+ recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskEnd.taskMetrics)
case Resubmitted =>
taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId +
" STAGE_ID=" + task.stageId
@@ -285,39 +260,34 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
}
override def onJobEnd(jobEnd: SparkListenerJobEnd) {
- eventQueue.put(jobEnd)
- }
-
- protected def processJobEndEvent(job: ActiveJob, reason: JobResult) {
- var info = "JOB_ID=" + job.runId
- reason match {
+ val job = jobEnd.job
+ var info = "JOB_ID=" + job.jobId
+ jobEnd.jobResult match {
case JobSucceeded => info += " STATUS=SUCCESS"
case JobFailed(exception, _) =>
info += " STATUS=FAILED REASON="
exception.getMessage.split("\\s+").foreach(info += _ + "_")
case _ =>
}
- jobLogInfo(job.runId, info.substring(0, info.length - 1).toUpperCase)
- closeLogWriter(job.runId)
+ jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase)
+ closeLogWriter(job.jobId)
}
protected def recordJobProperties(jobID: Int, properties: Properties) {
if(properties != null) {
- val annotation = properties.getProperty("spark.job.annotation", "")
- jobLogInfo(jobID, annotation, false)
+ val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "")
+ jobLogInfo(jobID, description, false)
}
}
override def onJobStart(jobStart: SparkListenerJobStart) {
- eventQueue.put(jobStart)
- }
-
- protected def processJobStartEvent(job: ActiveJob, properties: Properties) {
- createLogWriter(job.runId)
- recordJobProperties(job.runId, properties)
- buildJobDep(job.runId, job.finalStage)
- recordStageDep(job.runId)
- recordStageDepGraph(job.runId, job.finalStage)
- jobLogInfo(job.runId, "JOB_ID=" + job.runId + " STATUS=STARTED")
+ val job = jobStart.job
+ val properties = jobStart.properties
+ createLogWriter(job.jobId)
+ recordJobProperties(job.jobId, properties)
+ buildJobDep(job.jobId, job.finalStage)
+ recordStageDep(job.jobId)
+ recordStageDepGraph(job.jobId, job.finalStage)
+ jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED")
}
}
diff --git a/core/src/main/scala/spark/scheduler/JobResult.scala b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala
index a61b335152..c381348a8d 100644
--- a/core/src/main/scala/spark/scheduler/JobResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
/**
* A result of a job in the DAGScheduler.
diff --git a/core/src/main/scala/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index 69cd161c1f..200d881799 100644
--- a/core/src/main/scala/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 2f6a68ee85..1c61687f28 100644
--- a/core/src/main/scala/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.storage.BlockManagerId
+import org.apache.spark.storage.BlockManagerId
import java.io.{ObjectOutput, ObjectInput, Externalizable}
/**
diff --git a/core/src/main/scala/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 361b1e6b91..2b007cbe82 100644
--- a/core/src/main/scala/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -15,13 +15,16 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark._
import java.io._
-import util.{MetadataCleaner, TimeStampedHashMap}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.RDDCheckpointData
+import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap}
+
private[spark] object ResultTask {
// A simple map between the stage id to the serialized byte array of a task.
@@ -51,15 +54,13 @@ private[spark] object ResultTask {
}
def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], (TaskContext, Iterator[_]) => _) = {
- synchronized {
- val loader = Thread.currentThread.getContextClassLoader
- val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
- val ser = SparkEnv.get.closureSerializer.newInstance
- val objIn = ser.deserializeStream(in)
- val rdd = objIn.readObject().asInstanceOf[RDD[_]]
- val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _]
- return (rdd, func)
- }
+ val loader = Thread.currentThread.getContextClassLoader
+ val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
+ val ser = SparkEnv.get.closureSerializer.newInstance
+ val objIn = ser.deserializeStream(in)
+ val rdd = objIn.readObject().asInstanceOf[RDD[_]]
+ val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _]
+ return (rdd, func)
}
def clearCache() {
@@ -75,7 +76,7 @@ private[spark] class ResultTask[T, U](
var rdd: RDD[T],
var func: (TaskContext, Iterator[T]) => U,
var partition: Int,
- @transient locs: Seq[String],
+ @transient locs: Seq[TaskLocation],
val outputId: Int)
extends Task[U](stageId) with Externalizable {
@@ -87,11 +88,8 @@ private[spark] class ResultTask[T, U](
rdd.partitions(partition)
}
- private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq
-
- {
- // DEBUG code
- preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs))
+ @transient private val preferredLocs: Seq[TaskLocation] = {
+ if (locs == null) Nil else locs.toSet.toSeq
}
override def run(attemptId: Long): U = {
@@ -104,7 +102,7 @@ private[spark] class ResultTask[T, U](
}
}
- override def preferredLocations: Seq[String] = preferredLocs
+ override def preferredLocations: Seq[TaskLocation] = preferredLocs
override def toString = "ResultTask(" + stageId + ", " + partition + ")"
@@ -118,6 +116,7 @@ private[spark] class ResultTask[T, U](
out.write(bytes)
out.writeInt(partition)
out.writeInt(outputId)
+ out.writeLong(epoch)
out.writeObject(split)
}
}
@@ -132,6 +131,7 @@ private[spark] class ResultTask[T, U](
func = func_.asInstanceOf[(TaskContext, Iterator[T]) => U]
partition = in.readInt()
val outputId = in.readInt()
+ epoch = in.readLong()
split = in.readObject().asInstanceOf[Partition]
}
}
diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 1c25605f75..764775fede 100644
--- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -15,24 +15,19 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.io._
-import java.util.{HashMap => JHashMap}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-import scala.collection.JavaConversions._
+import scala.collection.mutable.HashMap
-import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
-
-import com.ning.compress.lzf.LZFInputStream
-import com.ning.compress.lzf.LZFOutputStream
-
-import spark._
-import spark.executor.ShuffleWriteMetrics
-import spark.storage._
-import spark.util.{TimeStampedHashMap, MetadataCleaner}
+import org.apache.spark._
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.storage._
+import org.apache.spark.util.{TimeStampedHashMap, MetadataCleaner}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.RDDCheckpointData
private[spark] object ShuffleMapTask {
@@ -95,25 +90,18 @@ private[spark] class ShuffleMapTask(
var rdd: RDD[_],
var dep: ShuffleDependency[_,_],
var partition: Int,
- @transient private var locs: Seq[String])
+ @transient private var locs: Seq[TaskLocation])
extends Task[MapStatus](stageId)
with Externalizable
with Logging {
protected def this() = this(0, null, null, 0, null)
- @transient private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq
-
- {
- // DEBUG code
- preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs))
+ @transient private val preferredLocs: Seq[TaskLocation] = {
+ if (locs == null) Nil else locs.toSet.toSeq
}
- var split = if (rdd == null) {
- null
- } else {
- rdd.partitions(partition)
- }
+ var split = if (rdd == null) null else rdd.partitions(partition)
override def writeExternal(out: ObjectOutput) {
RDDCheckpointData.synchronized {
@@ -123,7 +111,7 @@ private[spark] class ShuffleMapTask(
out.writeInt(bytes.length)
out.write(bytes)
out.writeInt(partition)
- out.writeLong(generation)
+ out.writeLong(epoch)
out.writeObject(split)
}
}
@@ -137,7 +125,7 @@ private[spark] class ShuffleMapTask(
rdd = rdd_
dep = dep_
partition = in.readInt()
- generation = in.readLong()
+ epoch = in.readLong()
split = in.readObject().asInstanceOf[Partition]
}
@@ -159,7 +147,7 @@ private[spark] class ShuffleMapTask(
// Write the map output to its associated buckets.
for (elem <- rdd.iterator(split, taskContext)) {
- val pair = elem.asInstanceOf[(Any, Any)]
+ val pair = elem.asInstanceOf[Product2[Any, Any]]
val bucketId = dep.partitioner.getPartition(pair._1)
buckets.writers(bucketId).write(pair)
}
@@ -197,7 +185,7 @@ private[spark] class ShuffleMapTask(
}
}
- override def preferredLocations: Seq[String] = preferredLocs
+ override def preferredLocations: Seq[TaskLocation] = preferredLocs
override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partition)
}
diff --git a/core/src/main/scala/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 4fb1c5d42d..c3cf4b8907 100644
--- a/core/src/main/scala/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -15,27 +15,30 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.util.Properties
-import spark.scheduler.cluster.TaskInfo
-import spark.util.Distribution
-import spark.{Logging, SparkContext, TaskEndReason, Utils}
-import spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.util.{Utils, Distribution}
+import org.apache.spark.{Logging, SparkContext, TaskEndReason}
+import org.apache.spark.executor.TaskMetrics
sealed trait SparkListenerEvents
-case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int) extends SparkListenerEvents
+case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int, properties: Properties)
+ extends SparkListenerEvents
case class StageCompleted(val stageInfo: StageInfo) extends SparkListenerEvents
+case class SparkListenerTaskStart(task: Task[_], taskInfo: TaskInfo) extends SparkListenerEvents
+
case class SparkListenerTaskEnd(task: Task[_], reason: TaskEndReason, taskInfo: TaskInfo,
taskMetrics: TaskMetrics) extends SparkListenerEvents
-case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null)
+case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null)
extends SparkListenerEvents
-case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult)
+case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult)
extends SparkListenerEvents
trait SparkListener {
@@ -43,12 +46,17 @@ trait SparkListener {
* Called when a stage is completed, with information on the completed stage
*/
def onStageCompleted(stageCompleted: StageCompleted) { }
-
+
/**
* Called when a stage is submitted
*/
def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { }
-
+
+ /**
+ * Called when a task starts
+ */
+ def onTaskStart(taskEnd: SparkListenerTaskStart) { }
+
/**
* Called when a task ends
*/
@@ -58,12 +66,12 @@ trait SparkListener {
* Called when a job starts
*/
def onJobStart(jobStart: SparkListenerJobStart) { }
-
+
/**
* Called when a job ends
*/
def onJobEnd(jobEnd: SparkListenerJobEnd) { }
-
+
}
/**
@@ -71,7 +79,7 @@ trait SparkListener {
*/
class StatsReportListener extends SparkListener with Logging {
override def onStageCompleted(stageCompleted: StageCompleted) {
- import spark.scheduler.StatsReportListener._
+ import org.apache.spark.scheduler.StatsReportListener._
implicit val sc = stageCompleted
this.logInfo("Finished stage: " + stageCompleted.stageInfo)
showMillisDistribution("task runtime:", (info, _) => Some(info.duration))
@@ -145,7 +153,7 @@ object StatsReportListener extends Logging {
}
def showBytesDistribution(heading: String, dist: Distribution) {
- showDistribution(heading, dist, (d => Utils.memoryBytesToString(d.toLong)): Double => String)
+ showDistribution(heading, dist, (d => Utils.bytesToString(d.toLong)): Double => String)
}
def showMillisDistribution(heading: String, dOpt: Option[Distribution]) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
new file mode 100644
index 0000000000..a65e1ecd6d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import org.apache.spark.Logging
+
+/** Asynchronously passes SparkListenerEvents to registered SparkListeners. */
+private[spark] class SparkListenerBus() extends Logging {
+ private val sparkListeners = new ArrayBuffer[SparkListener]() with SynchronizedBuffer[SparkListener]
+
+ /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
+ * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
+ private val EVENT_QUEUE_CAPACITY = 10000
+ private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents](EVENT_QUEUE_CAPACITY)
+ private var queueFullErrorMessageLogged = false
+
+ new Thread("SparkListenerBus") {
+ setDaemon(true)
+ override def run() {
+ while (true) {
+ val event = eventQueue.take
+ event match {
+ case stageSubmitted: SparkListenerStageSubmitted =>
+ sparkListeners.foreach(_.onStageSubmitted(stageSubmitted))
+ case stageCompleted: StageCompleted =>
+ sparkListeners.foreach(_.onStageCompleted(stageCompleted))
+ case jobStart: SparkListenerJobStart =>
+ sparkListeners.foreach(_.onJobStart(jobStart))
+ case jobEnd: SparkListenerJobEnd =>
+ sparkListeners.foreach(_.onJobEnd(jobEnd))
+ case taskStart: SparkListenerTaskStart =>
+ sparkListeners.foreach(_.onTaskStart(taskStart))
+ case taskEnd: SparkListenerTaskEnd =>
+ sparkListeners.foreach(_.onTaskEnd(taskEnd))
+ case _ =>
+ }
+ }
+ }
+ }.start()
+
+ def addListener(listener: SparkListener) {
+ sparkListeners += listener
+ }
+
+ def post(event: SparkListenerEvents) {
+ val eventAdded = eventQueue.offer(event)
+ if (!eventAdded && !queueFullErrorMessageLogged) {
+ logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
+ "This likely means one of the SparkListeners is too slow and cannot keep up with the " +
+ "rate at which tasks are being started by the scheduler.")
+ queueFullErrorMessageLogged = true
+ }
+ }
+}
+
diff --git a/core/src/main/scala/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
index 4e3661ec5d..5b40a3eb29 100644
--- a/core/src/main/scala/spark/scheduler/SplitInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 5428daeb94..aa293dc6b3 100644
--- a/core/src/main/scala/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -15,12 +15,11 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import java.net.URI
-
-import spark._
-import spark.storage.BlockManagerId
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.BlockManagerId
/**
* A stage is a set of independent tasks all computing the same function that need to run as part
@@ -33,15 +32,16 @@ import spark.storage.BlockManagerId
* initiated a job (e.g. count(), save(), etc). For shuffle map stages, we also track the nodes
* that each output partition is on.
*
- * Each Stage also has a priority, which is (by default) based on the job it was submitted in.
- * This allows Stages from earlier jobs to be computed first or recovered faster on failure.
+ * Each Stage also has a jobId, identifying the job that first submitted the stage. When FIFO
+ * scheduling is used, this allows Stages from earlier jobs to be computed first or recovered
+ * faster on failure.
*/
private[spark] class Stage(
val id: Int,
val rdd: RDD[_],
val shuffleDep: Option[ShuffleDependency[_,_]], // Output shuffle if stage is a map stage
val parents: List[Stage],
- val priority: Int,
+ val jobId: Int,
callSite: Option[String])
extends Logging {
diff --git a/core/src/main/scala/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index c4026f995a..72cb1c9ce8 100644
--- a/core/src/main/scala/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
import scala.collection._
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
case class StageInfo(
val stage: Stage,
diff --git a/core/src/main/scala/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 50768d43e0..598d91752a 100644
--- a/core/src/main/scala/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -15,24 +15,24 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.serializer.SerializerInstance
+import org.apache.spark.serializer.SerializerInstance
import java.io.{DataInputStream, DataOutputStream}
import java.nio.ByteBuffer
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.ByteBufferInputStream
import scala.collection.mutable.HashMap
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
/**
* A task to execute on a worker node.
*/
private[spark] abstract class Task[T](val stageId: Int) extends Serializable {
def run(attemptId: Long): T
- def preferredLocations: Seq[String] = Nil
+ def preferredLocations: Seq[TaskLocation] = Nil
- var generation: Long = -1 // Map output tracker generation. Will be set by TaskScheduler.
+ var epoch: Long = -1 // Map output tracker epoch. Will be set by TaskScheduler.
var metrics: Option[TaskMetrics] = None
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
new file mode 100644
index 0000000000..67c9a6760b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+/**
+ * A location where a task should run. This can either be a host or a (host, executorID) pair.
+ * In the latter case, we will prefer to launch the task on that executorID, but our next level
+ * of preference will be executors on the same host if this is not possible.
+ */
+private[spark]
+class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable {
+ override def toString: String = "TaskLocation(" + host + ", " + executorId + ")"
+}
+
+private[spark] object TaskLocation {
+ def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId))
+
+ def apply(host: String) = new TaskLocation(host, None)
+}
diff --git a/core/src/main/scala/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index dc0621ea7b..5c7e5bb977 100644
--- a/core/src/main/scala/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -15,22 +15,33 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.io._
import scala.collection.mutable.Map
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.{SparkEnv}
+import java.nio.ByteBuffer
+import org.apache.spark.util.Utils
// Task result. Also contains updates to accumulator variables.
// TODO: Use of distributed cache to return result is a hack to get around
// what seems to be a bug with messages over 60KB in libprocess; fix it
private[spark]
-class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) extends Externalizable {
+class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics)
+ extends Externalizable
+{
def this() = this(null.asInstanceOf[T], null, null)
override def writeExternal(out: ObjectOutput) {
- out.writeObject(value)
+
+ val objectSer = SparkEnv.get.serializer.newInstance()
+ val bb = objectSer.serialize(value)
+
+ out.writeInt(bb.remaining())
+ Utils.writeByteBuffer(bb, out)
+
out.writeInt(accumUpdates.size)
for ((key, value) <- accumUpdates) {
out.writeLong(key)
@@ -40,7 +51,14 @@ class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics:
}
override def readExternal(in: ObjectInput) {
- value = in.readObject().asInstanceOf[T]
+
+ val objectSer = SparkEnv.get.serializer.newInstance()
+
+ val blen = in.readInt()
+ val byteVal = new Array[Byte](blen)
+ in.readFully(byteVal)
+ value = objectSer.deserialize(ByteBuffer.wrap(byteVal))
+
val numUpdates = in.readInt
if (numUpdates == 0) {
accumUpdates = null
diff --git a/core/src/main/scala/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 5188308006..63be8ba3f5 100644
--- a/core/src/main/scala/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -15,8 +15,10 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
+import org.apache.spark.scheduler.cluster.Pool
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
/**
* Low-level task scheduler interface, implemented by both ClusterScheduler and LocalScheduler.
* These schedulers get sets of tasks submitted to them from the DAGScheduler for each stage,
@@ -25,6 +27,11 @@ package spark.scheduler
* the TaskSchedulerListener interface.
*/
private[spark] trait TaskScheduler {
+
+ def rootPool: Pool
+
+ def schedulingMode: SchedulingMode
+
def start(): Unit
// Invoked after system has successfully initialized (typically in spark context).
diff --git a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala
index 245e7ccb52..83be051c1a 100644
--- a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala
@@ -15,24 +15,27 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
import scala.collection.mutable.Map
-import spark.TaskEndReason
-import spark.executor.TaskMetrics
+import org.apache.spark.TaskEndReason
+import org.apache.spark.executor.TaskMetrics
/**
* Interface for getting events back from the TaskScheduler.
*/
private[spark] trait TaskSchedulerListener {
+ // A task has started.
+ def taskStarted(task: Task[_], taskInfo: TaskInfo)
+
// A task has finished or failed.
def taskEnded(task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Map[Long, Any],
taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit
// A node was added to the cluster.
- def executorGained(execId: String, hostPort: String): Unit
+ def executorGained(execId: String, host: String): Unit
// A node was lost from the cluster.
def executorLost(execId: String): Unit
diff --git a/core/src/main/scala/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index dc3550dd0b..c3ad325156 100644
--- a/core/src/main/scala/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.util.Properties
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
new file mode 100644
index 0000000000..3196ab5022
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.lang.{Boolean => JBoolean}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+
+import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import java.nio.ByteBuffer
+import java.util.concurrent.atomic.AtomicLong
+import java.util.{TimerTask, Timer}
+
+/**
+ * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call
+ * initialize() and start(), then submit task sets through the runTasks method.
+ *
+ * This class can work with multiple types of clusters by acting through a SchedulerBackend.
+ * It handles common logic, like determining a scheduling order across jobs, waking up to launch
+ * speculative tasks, etc.
+ *
+ * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple
+ * threads, so it needs locks in public API methods to maintain its state. In addition, some
+ * SchedulerBackends sycnchronize on themselves when they want to send events here, and then
+ * acquire a lock on us, so we need to make sure that we don't try to lock the backend while
+ * we are holding a lock on ourselves.
+ */
+private[spark] class ClusterScheduler(val sc: SparkContext)
+ extends TaskScheduler
+ with Logging
+{
+ // How often to check for speculative tasks
+ val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
+
+ // Threshold above which we warn user initial TaskSet may be starved
+ val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
+
+ val activeTaskSets = new HashMap[String, TaskSetManager]
+
+ val taskIdToTaskSetId = new HashMap[Long, String]
+ val taskIdToExecutorId = new HashMap[Long, String]
+ val taskSetTaskIds = new HashMap[String, HashSet[Long]]
+
+ @volatile private var hasReceivedTask = false
+ @volatile private var hasLaunchedTask = false
+ private val starvationTimer = new Timer(true)
+
+ // Incrementing Mesos task IDs
+ val nextTaskId = new AtomicLong(0)
+
+ // Which executor IDs we have executors on
+ val activeExecutorIds = new HashSet[String]
+
+ // The set of executors we have on each host; this is used to compute hostsAlive, which
+ // in turn is used to decide when we can attain data locality on a given host
+ private val executorsByHost = new HashMap[String, HashSet[String]]
+
+ private val executorIdToHost = new HashMap[String, String]
+
+ // JAR server, if any JARs were added by the user to the SparkContext
+ var jarServer: HttpServer = null
+
+ // URIs of JARs to pass to executor
+ var jarUris: String = ""
+
+ // Listener object to pass upcalls into
+ var listener: TaskSchedulerListener = null
+
+ var backend: SchedulerBackend = null
+
+ val mapOutputTracker = SparkEnv.get.mapOutputTracker
+
+ var schedulableBuilder: SchedulableBuilder = null
+ var rootPool: Pool = null
+ // default scheduler is FIFO
+ val schedulingMode: SchedulingMode = SchedulingMode.withName(
+ System.getProperty("spark.cluster.schedulingmode", "FIFO"))
+
+ override def setListener(listener: TaskSchedulerListener) {
+ this.listener = listener
+ }
+
+ def initialize(context: SchedulerBackend) {
+ backend = context
+ // temporarily set rootPool name to empty
+ rootPool = new Pool("", schedulingMode, 0, 0)
+ schedulableBuilder = {
+ schedulingMode match {
+ case SchedulingMode.FIFO =>
+ new FIFOSchedulableBuilder(rootPool)
+ case SchedulingMode.FAIR =>
+ new FairSchedulableBuilder(rootPool)
+ }
+ }
+ schedulableBuilder.buildPools()
+ }
+
+ def newTaskId(): Long = nextTaskId.getAndIncrement()
+
+ override def start() {
+ backend.start()
+
+ if (System.getProperty("spark.speculation", "false").toBoolean) {
+ new Thread("ClusterScheduler speculation check") {
+ setDaemon(true)
+
+ override def run() {
+ logInfo("Starting speculative execution thread")
+ while (true) {
+ try {
+ Thread.sleep(SPECULATION_INTERVAL)
+ } catch {
+ case e: InterruptedException => {}
+ }
+ checkSpeculatableTasks()
+ }
+ }
+ }.start()
+ }
+ }
+
+ override def submitTasks(taskSet: TaskSet) {
+ val tasks = taskSet.tasks
+ logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
+ this.synchronized {
+ val manager = new ClusterTaskSetManager(this, taskSet)
+ activeTaskSets(taskSet.id) = manager
+ schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
+ taskSetTaskIds(taskSet.id) = new HashSet[Long]()
+
+ if (!hasReceivedTask) {
+ starvationTimer.scheduleAtFixedRate(new TimerTask() {
+ override def run() {
+ if (!hasLaunchedTask) {
+ logWarning("Initial job has not accepted any resources; " +
+ "check your cluster UI to ensure that workers are registered " +
+ "and have sufficient memory")
+ } else {
+ this.cancel()
+ }
+ }
+ }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
+ }
+ hasReceivedTask = true
+ }
+ backend.reviveOffers()
+ }
+
+ def taskSetFinished(manager: TaskSetManager) {
+ this.synchronized {
+ activeTaskSets -= manager.taskSet.id
+ manager.parent.removeSchedulable(manager)
+ logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
+ taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
+ taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
+ taskSetTaskIds.remove(manager.taskSet.id)
+ }
+ }
+
+ /**
+ * Called by cluster manager to offer resources on slaves. We respond by asking our active task
+ * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
+ * that tasks are balanced across the cluster.
+ */
+ def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
+ SparkEnv.set(sc.env)
+
+ // Mark each slave as alive and remember its hostname
+ for (o <- offers) {
+ executorIdToHost(o.executorId) = o.host
+ if (!executorsByHost.contains(o.host)) {
+ executorsByHost(o.host) = new HashSet[String]()
+ executorGained(o.executorId, o.host)
+ }
+ }
+
+ // Build a list of tasks to assign to each worker
+ val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
+ val availableCpus = offers.map(o => o.cores).toArray
+ val sortedTaskSets = rootPool.getSortedTaskSetQueue()
+ for (taskSet <- sortedTaskSets) {
+ logDebug("parentName: %s, name: %s, runningTasks: %s".format(
+ taskSet.parent.name, taskSet.name, taskSet.runningTasks))
+ }
+
+ // Take each TaskSet in our scheduling order, and then offer it each node in increasing order
+ // of locality levels so that it gets a chance to launch local tasks on all of them.
+ var launchedTask = false
+ for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
+ do {
+ launchedTask = false
+ for (i <- 0 until offers.size) {
+ val execId = offers(i).executorId
+ val host = offers(i).host
+ for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) {
+ tasks(i) += task
+ val tid = task.taskId
+ taskIdToTaskSetId(tid) = taskSet.taskSet.id
+ taskSetTaskIds(taskSet.taskSet.id) += tid
+ taskIdToExecutorId(tid) = execId
+ activeExecutorIds += execId
+ executorsByHost(host) += execId
+ availableCpus(i) -= 1
+ launchedTask = true
+ }
+ }
+ } while (launchedTask)
+ }
+
+ if (tasks.size > 0) {
+ hasLaunchedTask = true
+ }
+ return tasks
+ }
+
+ def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ var taskSetToUpdate: Option[TaskSetManager] = None
+ var failedExecutor: Option[String] = None
+ var taskFailed = false
+ synchronized {
+ try {
+ if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
+ // We lost this entire executor, so remember that it's gone
+ val execId = taskIdToExecutorId(tid)
+ if (activeExecutorIds.contains(execId)) {
+ removeExecutor(execId)
+ failedExecutor = Some(execId)
+ }
+ }
+ taskIdToTaskSetId.get(tid) match {
+ case Some(taskSetId) =>
+ if (activeTaskSets.contains(taskSetId)) {
+ taskSetToUpdate = Some(activeTaskSets(taskSetId))
+ }
+ if (TaskState.isFinished(state)) {
+ taskIdToTaskSetId.remove(tid)
+ if (taskSetTaskIds.contains(taskSetId)) {
+ taskSetTaskIds(taskSetId) -= tid
+ }
+ taskIdToExecutorId.remove(tid)
+ }
+ if (state == TaskState.FAILED) {
+ taskFailed = true
+ }
+ case None =>
+ logInfo("Ignoring update from TID " + tid + " because its task set is gone")
+ }
+ } catch {
+ case e: Exception => logError("Exception in statusUpdate", e)
+ }
+ }
+ // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock
+ if (taskSetToUpdate != None) {
+ taskSetToUpdate.get.statusUpdate(tid, state, serializedData)
+ }
+ if (failedExecutor != None) {
+ listener.executorLost(failedExecutor.get)
+ backend.reviveOffers()
+ }
+ if (taskFailed) {
+ // Also revive offers if a task had failed for some reason other than host lost
+ backend.reviveOffers()
+ }
+ }
+
+ def error(message: String) {
+ synchronized {
+ if (activeTaskSets.size > 0) {
+ // Have each task set throw a SparkException with the error
+ for ((taskSetId, manager) <- activeTaskSets) {
+ try {
+ manager.error(message)
+ } catch {
+ case e: Exception => logError("Exception in error callback", e)
+ }
+ }
+ } else {
+ // No task sets are active but we still got an error. Just exit since this
+ // must mean the error is during registration.
+ // It might be good to do something smarter here in the future.
+ logError("Exiting due to error from cluster scheduler: " + message)
+ System.exit(1)
+ }
+ }
+ }
+
+ override def stop() {
+ if (backend != null) {
+ backend.stop()
+ }
+ if (jarServer != null) {
+ jarServer.stop()
+ }
+
+ // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out.
+ // TODO: Do something better !
+ Thread.sleep(5000L)
+ }
+
+ override def defaultParallelism() = backend.defaultParallelism()
+
+
+ // Check for speculatable tasks in all our active jobs.
+ def checkSpeculatableTasks() {
+ var shouldRevive = false
+ synchronized {
+ shouldRevive = rootPool.checkSpeculatableTasks()
+ }
+ if (shouldRevive) {
+ backend.reviveOffers()
+ }
+ }
+
+ // Check for pending tasks in all our active jobs.
+ def hasPendingTasks: Boolean = {
+ synchronized {
+ rootPool.hasPendingTasks()
+ }
+ }
+
+ def executorLost(executorId: String, reason: ExecutorLossReason) {
+ var failedExecutor: Option[String] = None
+
+ synchronized {
+ if (activeExecutorIds.contains(executorId)) {
+ val hostPort = executorIdToHost(executorId)
+ logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason))
+ removeExecutor(executorId)
+ failedExecutor = Some(executorId)
+ } else {
+ // We may get multiple executorLost() calls with different loss reasons. For example, one
+ // may be triggered by a dropped connection from the slave while another may be a report
+ // of executor termination from Mesos. We produce log messages for both so we eventually
+ // report the termination reason.
+ logError("Lost an executor " + executorId + " (already removed): " + reason)
+ }
+ }
+ // Call listener.executorLost without holding the lock on this to prevent deadlock
+ if (failedExecutor != None) {
+ listener.executorLost(failedExecutor.get)
+ backend.reviveOffers()
+ }
+ }
+
+ /** Remove an executor from all our data structures and mark it as lost */
+ private def removeExecutor(executorId: String) {
+ activeExecutorIds -= executorId
+ val host = executorIdToHost(executorId)
+ val execs = executorsByHost.getOrElse(host, new HashSet)
+ execs -= executorId
+ if (execs.isEmpty) {
+ executorsByHost -= host
+ }
+ executorIdToHost -= executorId
+ rootPool.executorLost(executorId, host)
+ }
+
+ def executorGained(execId: String, host: String) {
+ listener.executorGained(execId, host)
+ }
+
+ def getExecutorsAliveOnHost(host: String): Option[Set[String]] = synchronized {
+ executorsByHost.get(host).map(_.toSet)
+ }
+
+ def hasExecutorsAliveOnHost(host: String): Boolean = synchronized {
+ executorsByHost.contains(host)
+ }
+
+ def isExecutorAlive(execId: String): Boolean = synchronized {
+ activeExecutorIds.contains(execId)
+ }
+
+ // By default, rack is unknown
+ def getRackForHost(value: String): Option[String] = None
+}
+
+
+object ClusterScheduler {
+ /**
+ * Used to balance containers across hosts.
+ *
+ * Accepts a map of hosts to resource offers for that host, and returns a prioritized list of
+ * resource offers representing the order in which the offers should be used. The resource
+ * offers are ordered such that we'll allocate one container on each host before allocating a
+ * second container on any host, and so on, in order to reduce the damage if a host fails.
+ *
+ * For example, given <h1, [o1, o2, o3]>, <h2, [o4]>, <h1, [o5, o6]>, returns
+ * [o1, o5, o4, 02, o6, o3]
+ */
+ def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = {
+ val _keyList = new ArrayBuffer[K](map.size)
+ _keyList ++= map.keys
+
+ // order keyList based on population of value in map
+ val keyList = _keyList.sortWith(
+ (left, right) => map(left).size > map(right).size
+ )
+
+ val retval = new ArrayBuffer[T](keyList.size * 2)
+ var index = 0
+ var found = true
+
+ while (found) {
+ found = false
+ for (key <- keyList) {
+ val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null)
+ assert(containerList != null)
+ // Get the index'th entry for this host - if present
+ if (index < containerList.size){
+ retval += containerList.apply(index)
+ found = true
+ }
+ }
+ index += 1
+ }
+
+ retval.toList
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
new file mode 100644
index 0000000000..1b31c8c57e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
@@ -0,0 +1,712 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.nio.ByteBuffer
+import java.util.{Arrays, NoSuchElementException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+import scala.math.max
+import scala.math.min
+
+import org.apache.spark.{FetchFailed, Logging, Resubmitted, SparkEnv, Success, TaskEndReason, TaskState}
+import org.apache.spark.{ExceptionFailure, SparkException, TaskResultTooBigFailure}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler._
+import scala.Some
+import org.apache.spark.FetchFailed
+import org.apache.spark.ExceptionFailure
+import org.apache.spark.TaskResultTooBigFailure
+import org.apache.spark.util.{SystemClock, Clock}
+
+
+/**
+ * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of
+ * the status of each task, retries tasks if they fail (up to a limited number of times), and
+ * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces
+ * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node,
+ * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ *
+ * THREADING: This class is designed to only be called from code with a lock on the
+ * ClusterScheduler (e.g. its event handlers). It should not be called from other threads.
+ */
+private[spark] class ClusterTaskSetManager(
+ sched: ClusterScheduler,
+ val taskSet: TaskSet,
+ clock: Clock = SystemClock)
+ extends TaskSetManager
+ with Logging
+{
+ // CPUs to request per task
+ val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt
+
+ // Maximum times a task is allowed to fail before failing the job
+ val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt
+
+ // Quantile of tasks at which to start speculation
+ val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
+ val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble
+
+ // Serializer for closures and tasks.
+ val env = SparkEnv.get
+ val ser = env.closureSerializer.newInstance()
+
+ val tasks = taskSet.tasks
+ val numTasks = tasks.length
+ val copiesRunning = new Array[Int](numTasks)
+ val finished = new Array[Boolean](numTasks)
+ val numFailures = new Array[Int](numTasks)
+ val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
+ var tasksFinished = 0
+
+ var weight = 1
+ var minShare = 0
+ var runningTasks = 0
+ var priority = taskSet.priority
+ var stageId = taskSet.stageId
+ var name = "TaskSet_"+taskSet.stageId.toString
+ var parent: Schedulable = null
+
+ // Set of pending tasks for each executor. These collections are actually
+ // treated as stacks, in which new tasks are added to the end of the
+ // ArrayBuffer and removed from the end. This makes it faster to detect
+ // tasks that repeatedly fail because whenever a task failed, it is put
+ // back at the head of the stack. They are also only cleaned up lazily;
+ // when a task is launched, it remains in all the pending lists except
+ // the one that it was launched from, but gets removed from them later.
+ private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]]
+
+ // Set of pending tasks for each host. Similar to pendingTasksForExecutor,
+ // but at host level.
+ private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
+
+ // Set of pending tasks for each rack -- similar to the above.
+ private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]]
+
+ // Set containing pending tasks with no locality preferences.
+ val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
+
+ // Set containing all pending tasks (also used as a stack, as above).
+ val allPendingTasks = new ArrayBuffer[Int]
+
+ // Tasks that can be speculated. Since these will be a small fraction of total
+ // tasks, we'll just hold them in a HashSet.
+ val speculatableTasks = new HashSet[Int]
+
+ // Task index, start and finish time for each task attempt (indexed by task ID)
+ val taskInfos = new HashMap[Long, TaskInfo]
+
+ // Did the TaskSet fail?
+ var failed = false
+ var causeOfFailure = ""
+
+ // How frequently to reprint duplicate exceptions in full, in milliseconds
+ val EXCEPTION_PRINT_INTERVAL =
+ System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong
+
+ // Map of recent exceptions (identified by string representation and top stack frame) to
+ // duplicate count (how many times the same exception has appeared) and time the full exception
+ // was printed. This should ideally be an LRU map that can drop old exceptions automatically.
+ val recentExceptions = HashMap[String, (Int, Long)]()
+
+ // Figure out the current map output tracker epoch and set it on all tasks
+ val epoch = sched.mapOutputTracker.getEpoch
+ logDebug("Epoch for " + taskSet + ": " + epoch)
+ for (t <- tasks) {
+ t.epoch = epoch
+ }
+
+ // Add all our tasks to the pending lists. We do this in reverse order
+ // of task index so that tasks with low indices get launched first.
+ for (i <- (0 until numTasks).reverse) {
+ addPendingTask(i)
+ }
+
+ // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling
+ val myLocalityLevels = computeValidLocalityLevels()
+ val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level
+
+ // Delay scheduling variables: we keep track of our current locality level and the time we
+ // last launched a task at that level, and move up a level when localityWaits[curLevel] expires.
+ // We then move down if we manage to launch a "more local" task.
+ var currentLocalityIndex = 0 // Index of our current locality level in validLocalityLevels
+ var lastLaunchTime = clock.getTime() // Time we last launched a task at this level
+
+ /**
+ * Add a task to all the pending-task lists that it should be on. If readding is set, we are
+ * re-adding the task so only include it in each list if it's not already there.
+ */
+ private def addPendingTask(index: Int, readding: Boolean = false) {
+ // Utility method that adds `index` to a list only if readding=false or it's not already there
+ def addTo(list: ArrayBuffer[Int]) {
+ if (!readding || !list.contains(index)) {
+ list += index
+ }
+ }
+
+ var hadAliveLocations = false
+ for (loc <- tasks(index).preferredLocations) {
+ for (execId <- loc.executorId) {
+ if (sched.isExecutorAlive(execId)) {
+ addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer))
+ hadAliveLocations = true
+ }
+ }
+ if (sched.hasExecutorsAliveOnHost(loc.host)) {
+ addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer))
+ for (rack <- sched.getRackForHost(loc.host)) {
+ addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer))
+ }
+ hadAliveLocations = true
+ }
+ }
+
+ if (!hadAliveLocations) {
+ // Even though the task might've had preferred locations, all of those hosts or executors
+ // are dead; put it in the no-prefs list so we can schedule it elsewhere right away.
+ addTo(pendingTasksWithNoPrefs)
+ }
+
+ if (!readding) {
+ allPendingTasks += index // No point scanning this whole list to find the old task there
+ }
+ }
+
+ /**
+ * Return the pending tasks list for a given executor ID, or an empty list if
+ * there is no map entry for that host
+ */
+ private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = {
+ pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer())
+ }
+
+ /**
+ * Return the pending tasks list for a given host, or an empty list if
+ * there is no map entry for that host
+ */
+ private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
+ pendingTasksForHost.getOrElse(host, ArrayBuffer())
+ }
+
+ /**
+ * Return the pending rack-local task list for a given rack, or an empty list if
+ * there is no map entry for that rack
+ */
+ private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = {
+ pendingTasksForRack.getOrElse(rack, ArrayBuffer())
+ }
+
+ /**
+ * Dequeue a pending task from the given list and return its index.
+ * Return None if the list is empty.
+ * This method also cleans up any tasks in the list that have already
+ * been launched, since we want that to happen lazily.
+ */
+ private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
+ while (!list.isEmpty) {
+ val index = list.last
+ list.trimEnd(1)
+ if (copiesRunning(index) == 0 && !finished(index)) {
+ return Some(index)
+ }
+ }
+ return None
+ }
+
+ /** Check whether a task is currently running an attempt on a given host */
+ private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
+ !taskAttempts(taskIndex).exists(_.host == host)
+ }
+
+ /**
+ * Return a speculative task for a given executor if any are available. The task should not have
+ * an attempt running on this host, in case the host is slow. In addition, the task should meet
+ * the given locality constraint.
+ */
+ private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
+ : Option[(Int, TaskLocality.Value)] =
+ {
+ speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
+
+ if (!speculatableTasks.isEmpty) {
+ // Check for process-local or preference-less tasks; note that tasks can be process-local
+ // on multiple nodes when we replicate cached blocks, as in Spark Streaming
+ for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+ val prefs = tasks(index).preferredLocations
+ val executors = prefs.flatMap(_.executorId)
+ if (prefs.size == 0 || executors.contains(execId)) {
+ speculatableTasks -= index
+ return Some((index, TaskLocality.PROCESS_LOCAL))
+ }
+ }
+
+ // Check for node-local tasks
+ if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+ for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+ val locations = tasks(index).preferredLocations.map(_.host)
+ if (locations.contains(host)) {
+ speculatableTasks -= index
+ return Some((index, TaskLocality.NODE_LOCAL))
+ }
+ }
+ }
+
+ // Check for rack-local tasks
+ if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+ for (rack <- sched.getRackForHost(host)) {
+ for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+ val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost)
+ if (racks.contains(rack)) {
+ speculatableTasks -= index
+ return Some((index, TaskLocality.RACK_LOCAL))
+ }
+ }
+ }
+ }
+
+ // Check for non-local tasks
+ if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+ for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+ speculatableTasks -= index
+ return Some((index, TaskLocality.ANY))
+ }
+ }
+ }
+
+ return None
+ }
+
+ /**
+ * Dequeue a pending task for a given node and return its index and locality level.
+ * Only search for tasks matching the given locality constraint.
+ */
+ private def findTask(execId: String, host: String, locality: TaskLocality.Value)
+ : Option[(Int, TaskLocality.Value)] =
+ {
+ for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) {
+ return Some((index, TaskLocality.PROCESS_LOCAL))
+ }
+
+ if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+ for (index <- findTaskFromList(getPendingTasksForHost(host))) {
+ return Some((index, TaskLocality.NODE_LOCAL))
+ }
+ }
+
+ if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+ for {
+ rack <- sched.getRackForHost(host)
+ index <- findTaskFromList(getPendingTasksForRack(rack))
+ } {
+ return Some((index, TaskLocality.RACK_LOCAL))
+ }
+ }
+
+ // Look for no-pref tasks after rack-local tasks since they can run anywhere.
+ for (index <- findTaskFromList(pendingTasksWithNoPrefs)) {
+ return Some((index, TaskLocality.PROCESS_LOCAL))
+ }
+
+ if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+ for (index <- findTaskFromList(allPendingTasks)) {
+ return Some((index, TaskLocality.ANY))
+ }
+ }
+
+ // Finally, if all else has failed, find a speculative task
+ return findSpeculativeTask(execId, host, locality)
+ }
+
+ /**
+ * Respond to an offer of a single slave from the scheduler by finding a task
+ */
+ override def resourceOffer(
+ execId: String,
+ host: String,
+ availableCpus: Int,
+ maxLocality: TaskLocality.TaskLocality)
+ : Option[TaskDescription] =
+ {
+ if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) {
+ val curTime = clock.getTime()
+
+ var allowedLocality = getAllowedLocalityLevel(curTime)
+ if (allowedLocality > maxLocality) {
+ allowedLocality = maxLocality // We're not allowed to search for farther-away tasks
+ }
+
+ findTask(execId, host, allowedLocality) match {
+ case Some((index, taskLocality)) => {
+ // Found a task; do some bookkeeping and return a task description
+ val task = tasks(index)
+ val taskId = sched.newTaskId()
+ // Figure out whether this should count as a preferred launch
+ logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
+ taskSet.id, index, taskId, execId, host, taskLocality))
+ // Do various bookkeeping
+ copiesRunning(index) += 1
+ val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality)
+ taskInfos(taskId) = info
+ taskAttempts(index) = info :: taskAttempts(index)
+ // Update our locality level for delay scheduling
+ currentLocalityIndex = getLocalityIndex(taskLocality)
+ lastLaunchTime = curTime
+ // Serialize and return the task
+ val startTime = clock.getTime()
+ // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
+ // we assume the task can be serialized without exceptions.
+ val serializedTask = Task.serializeWithDependencies(
+ task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+ val timeTaken = clock.getTime() - startTime
+ increaseRunningTasks(1)
+ logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
+ taskSet.id, index, serializedTask.limit, timeTaken))
+ val taskName = "task %s:%d".format(taskSet.id, index)
+ if (taskAttempts(index).size == 1)
+ taskStarted(task,info)
+ return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
+ }
+ case _ =>
+ }
+ }
+ return None
+ }
+
+ /**
+ * Get the level we can launch tasks according to delay scheduling, based on current wait time.
+ */
+ private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = {
+ while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) &&
+ currentLocalityIndex < myLocalityLevels.length - 1)
+ {
+ // Jump to the next locality level, and remove our waiting time for the current one since
+ // we don't want to count it again on the next one
+ lastLaunchTime += localityWaits(currentLocalityIndex)
+ currentLocalityIndex += 1
+ }
+ myLocalityLevels(currentLocalityIndex)
+ }
+
+ /**
+ * Find the index in myLocalityLevels for a given locality. This is also designed to work with
+ * localities that are not in myLocalityLevels (in case we somehow get those) by returning the
+ * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY.
+ */
+ def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = {
+ var index = 0
+ while (locality > myLocalityLevels(index)) {
+ index += 1
+ }
+ index
+ }
+
+ /** Called by cluster scheduler when one of our tasks changes state */
+ override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ SparkEnv.set(env)
+ state match {
+ case TaskState.FINISHED =>
+ taskFinished(tid, state, serializedData)
+ case TaskState.LOST =>
+ taskLost(tid, state, serializedData)
+ case TaskState.FAILED =>
+ taskLost(tid, state, serializedData)
+ case TaskState.KILLED =>
+ taskLost(tid, state, serializedData)
+ case _ =>
+ }
+ }
+
+ def taskStarted(task: Task[_], info: TaskInfo) {
+ sched.listener.taskStarted(task, info)
+ }
+
+ def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ val info = taskInfos(tid)
+ if (info.failed) {
+ // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
+ // or even from Mesos itself when acks get delayed.
+ return
+ }
+ val index = info.index
+ info.markSuccessful()
+ decreaseRunningTasks(1)
+ if (!finished(index)) {
+ tasksFinished += 1
+ logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format(
+ tid, info.duration, info.host, tasksFinished, numTasks))
+ // Deserialize task result and pass it to the scheduler
+ try {
+ val result = ser.deserialize[TaskResult[_]](serializedData)
+ result.metrics.resultSize = serializedData.limit()
+ sched.listener.taskEnded(
+ tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
+ } catch {
+ case cnf: ClassNotFoundException =>
+ val loader = Thread.currentThread().getContextClassLoader
+ throw new SparkException("ClassNotFound with classloader: " + loader, cnf)
+ case ex => throw ex
+ }
+ // Mark finished and stop if we've finished all the tasks
+ finished(index) = true
+ if (tasksFinished == numTasks) {
+ sched.taskSetFinished(this)
+ }
+ } else {
+ logInfo("Ignoring task-finished event for TID " + tid +
+ " because task " + index + " is already finished")
+ }
+ }
+
+ def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ val info = taskInfos(tid)
+ if (info.failed) {
+ // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
+ // or even from Mesos itself when acks get delayed.
+ return
+ }
+ val index = info.index
+ info.markFailed()
+ decreaseRunningTasks(1)
+ if (!finished(index)) {
+ logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
+ copiesRunning(index) -= 1
+ // Check if the problem is a map output fetch failure. In that case, this
+ // task will never succeed on any node, so tell the scheduler about it.
+ if (serializedData != null && serializedData.limit() > 0) {
+ val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader)
+ reason match {
+ case fetchFailed: FetchFailed =>
+ logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress)
+ sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null)
+ finished(index) = true
+ tasksFinished += 1
+ sched.taskSetFinished(this)
+ decreaseRunningTasks(runningTasks)
+ return
+
+ case taskResultTooBig: TaskResultTooBigFailure =>
+ logInfo("Loss was due to task %s result exceeding Akka frame size; aborting job".format(
+ tid))
+ abort("Task %s result exceeded Akka frame size".format(tid))
+ return
+
+ case ef: ExceptionFailure =>
+ sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null))
+ val key = ef.description
+ val now = clock.getTime()
+ val (printFull, dupCount) = {
+ if (recentExceptions.contains(key)) {
+ val (dupCount, printTime) = recentExceptions(key)
+ if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
+ recentExceptions(key) = (0, now)
+ (true, 0)
+ } else {
+ recentExceptions(key) = (dupCount + 1, printTime)
+ (false, dupCount + 1)
+ }
+ } else {
+ recentExceptions(key) = (0, now)
+ (true, 0)
+ }
+ }
+ if (printFull) {
+ val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
+ logInfo("Loss was due to %s\n%s\n%s".format(
+ ef.className, ef.description, locs.mkString("\n")))
+ } else {
+ logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
+ }
+
+ case _ => {}
+ }
+ }
+ // On non-fetch failures, re-enqueue the task as pending for a max number of retries
+ addPendingTask(index)
+ // Count failed attempts only on FAILED and LOST state (not on KILLED)
+ if (state == TaskState.FAILED || state == TaskState.LOST) {
+ numFailures(index) += 1
+ if (numFailures(index) > MAX_TASK_FAILURES) {
+ logError("Task %s:%d failed more than %d times; aborting job".format(
+ taskSet.id, index, MAX_TASK_FAILURES))
+ abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
+ }
+ }
+ } else {
+ logInfo("Ignoring task-lost event for TID " + tid +
+ " because task " + index + " is already finished")
+ }
+ }
+
+ override def error(message: String) {
+ // Save the error message
+ abort("Error: " + message)
+ }
+
+ def abort(message: String) {
+ failed = true
+ causeOfFailure = message
+ // TODO: Kill running tasks if we were not terminated due to a Mesos error
+ sched.listener.taskSetFailed(taskSet, message)
+ decreaseRunningTasks(runningTasks)
+ sched.taskSetFinished(this)
+ }
+
+ override def increaseRunningTasks(taskNum: Int) {
+ runningTasks += taskNum
+ if (parent != null) {
+ parent.increaseRunningTasks(taskNum)
+ }
+ }
+
+ override def decreaseRunningTasks(taskNum: Int) {
+ runningTasks -= taskNum
+ if (parent != null) {
+ parent.decreaseRunningTasks(taskNum)
+ }
+ }
+
+ override def getSchedulableByName(name: String): Schedulable = {
+ return null
+ }
+
+ override def addSchedulable(schedulable: Schedulable) {}
+
+ override def removeSchedulable(schedulable: Schedulable) {}
+
+ override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
+ var sortedTaskSetQueue = ArrayBuffer[TaskSetManager](this)
+ sortedTaskSetQueue += this
+ return sortedTaskSetQueue
+ }
+
+ /** Called by cluster scheduler when an executor is lost so we can re-enqueue our tasks */
+ override def executorLost(execId: String, host: String) {
+ logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
+
+ // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a
+ // task that used to have locations on only this host might now go to the no-prefs list. Note
+ // that it's okay if we add a task to the same queue twice (if it had multiple preferred
+ // locations), because findTaskFromList will skip already-running tasks.
+ for (index <- getPendingTasksForExecutor(execId)) {
+ addPendingTask(index, readding=true)
+ }
+ for (index <- getPendingTasksForHost(host)) {
+ addPendingTask(index, readding=true)
+ }
+
+ // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
+ if (tasks(0).isInstanceOf[ShuffleMapTask]) {
+ for ((tid, info) <- taskInfos if info.executorId == execId) {
+ val index = taskInfos(tid).index
+ if (finished(index)) {
+ finished(index) = false
+ copiesRunning(index) -= 1
+ tasksFinished -= 1
+ addPendingTask(index)
+ // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
+ // stage finishes when a total of tasks.size tasks finish.
+ sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null)
+ }
+ }
+ }
+ // Also re-enqueue any tasks that were running on the node
+ for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
+ taskLost(tid, TaskState.KILLED, null)
+ }
+ }
+
+ /**
+ * Check for tasks to be speculated and return true if there are any. This is called periodically
+ * by the ClusterScheduler.
+ *
+ * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
+ * we don't scan the whole task set. It might also help to make this sorted by launch time.
+ */
+ override def checkSpeculatableTasks(): Boolean = {
+ // Can't speculate if we only have one task, or if all tasks have finished.
+ if (numTasks == 1 || tasksFinished == numTasks) {
+ return false
+ }
+ var foundTasks = false
+ val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
+ logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
+ if (tasksFinished >= minFinishedForSpeculation) {
+ val time = clock.getTime()
+ val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
+ Arrays.sort(durations)
+ val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1))
+ val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
+ // TODO: Threshold should also look at standard deviation of task durations and have a lower
+ // bound based on that.
+ logDebug("Task length threshold for speculation: " + threshold)
+ for ((tid, info) <- taskInfos) {
+ val index = info.index
+ if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
+ !speculatableTasks.contains(index)) {
+ logInfo(
+ "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
+ taskSet.id, index, info.host, threshold))
+ speculatableTasks += index
+ foundTasks = true
+ }
+ }
+ }
+ return foundTasks
+ }
+
+ override def hasPendingTasks(): Boolean = {
+ numTasks > 0 && tasksFinished < numTasks
+ }
+
+ private def getLocalityWait(level: TaskLocality.TaskLocality): Long = {
+ val defaultWait = System.getProperty("spark.locality.wait", "3000")
+ level match {
+ case TaskLocality.PROCESS_LOCAL =>
+ System.getProperty("spark.locality.wait.process", defaultWait).toLong
+ case TaskLocality.NODE_LOCAL =>
+ System.getProperty("spark.locality.wait.node", defaultWait).toLong
+ case TaskLocality.RACK_LOCAL =>
+ System.getProperty("spark.locality.wait.rack", defaultWait).toLong
+ case TaskLocality.ANY =>
+ 0L
+ }
+ }
+
+ /**
+ * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been
+ * added to queues using addPendingTask.
+ */
+ private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = {
+ import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY}
+ val levels = new ArrayBuffer[TaskLocality.TaskLocality]
+ if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) {
+ levels += PROCESS_LOCAL
+ }
+ if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) {
+ levels += NODE_LOCAL
+ }
+ if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) {
+ levels += RACK_LOCAL
+ }
+ levels += ANY
+ logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", "))
+ levels.toArray
+ }
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
index 8825f2dd24..5077b2b48b 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import spark.executor.ExecutorExitCode
+import org.apache.spark.executor.ExecutorExitCode
/**
* Represents an explanation for a executor or whole slave failing or exiting.
diff --git a/core/src/main/scala/spark/scheduler/cluster/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala
index 83708f07e1..35b32600da 100644
--- a/core/src/main/scala/spark/scheduler/cluster/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
-import spark.Logging
-import spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.Logging
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
/**
* An Schedulable entity that represent collection of Pools or TaskSetManagers
diff --git a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala
index f557b142c4..f4726450ec 100644
--- a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala
@@ -15,16 +15,20 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import scala.collection.mutable.ArrayBuffer
/**
* An interface for schedulable entities.
* there are two type of Schedulable entities(Pools and TaskSetManagers)
*/
private[spark] trait Schedulable {
var parent: Schedulable
+ // child queues
+ def schedulableQueue: ArrayBuffer[Schedulable]
+ def schedulingMode: SchedulingMode
def weight: Int
def minShare: Int
def runningTasks: Int
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala
index 95554023c0..d04eeb6b98 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala
@@ -15,21 +15,16 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import java.io.{File, FileInputStream, FileOutputStream}
+import java.io.{File, FileInputStream, FileOutputStream, FileNotFoundException}
+import java.util.Properties
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.util.control.Breaks._
-import scala.xml._
+import scala.xml.XML
-import spark.Logging
-import spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.Logging
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
-import java.util.Properties
/**
* An interface to build Schedulable tree
@@ -41,10 +36,11 @@ private[spark] trait SchedulableBuilder {
def addTaskSetManager(manager: Schedulable, properties: Properties)
}
-private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging {
+private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
+ extends SchedulableBuilder with Logging {
override def buildPools() {
- //nothing
+ // nothing
}
override def addTaskSetManager(manager: Schedulable, properties: Properties) {
@@ -52,9 +48,10 @@ private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends Schedula
}
}
-private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging {
+private[spark] class FairSchedulableBuilder(val rootPool: Pool)
+ extends SchedulableBuilder with Logging {
- val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file","unspecified")
+ val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file")
val FAIR_SCHEDULER_PROPERTIES = "spark.scheduler.cluster.fair.pool"
val DEFAULT_POOL_NAME = "default"
val MINIMUM_SHARES_PROPERTY = "minShare"
@@ -67,47 +64,53 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula
val DEFAULT_WEIGHT = 1
override def buildPools() {
+ if (schedulerAllocFile != null) {
val file = new File(schedulerAllocFile)
- if (file.exists()) {
- val xml = XML.loadFile(file)
- for (poolNode <- (xml \\ POOLS_PROPERTY)) {
-
- val poolName = (poolNode \ POOL_NAME_PROPERTY).text
- var schedulingMode = DEFAULT_SCHEDULING_MODE
- var minShare = DEFAULT_MINIMUM_SHARE
- var weight = DEFAULT_WEIGHT
-
- val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text
- if (xmlSchedulingMode != "") {
- try {
- schedulingMode = SchedulingMode.withName(xmlSchedulingMode)
- } catch {
- case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode")
+ if (file.exists()) {
+ val xml = XML.loadFile(file)
+ for (poolNode <- (xml \\ POOLS_PROPERTY)) {
+
+ val poolName = (poolNode \ POOL_NAME_PROPERTY).text
+ var schedulingMode = DEFAULT_SCHEDULING_MODE
+ var minShare = DEFAULT_MINIMUM_SHARE
+ var weight = DEFAULT_WEIGHT
+
+ val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text
+ if (xmlSchedulingMode != "") {
+ try {
+ schedulingMode = SchedulingMode.withName(xmlSchedulingMode)
+ } catch {
+ case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode")
+ }
}
- }
- val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text
- if (xmlMinShare != "") {
- minShare = xmlMinShare.toInt
- }
+ val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text
+ if (xmlMinShare != "") {
+ minShare = xmlMinShare.toInt
+ }
- val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text
- if (xmlWeight != "") {
- weight = xmlWeight.toInt
- }
+ val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text
+ if (xmlWeight != "") {
+ weight = xmlWeight.toInt
+ }
- val pool = new Pool(poolName, schedulingMode, minShare, weight)
- rootPool.addSchedulable(pool)
- logInfo("Create new pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
- poolName, schedulingMode, minShare, weight))
+ val pool = new Pool(poolName, schedulingMode, minShare, weight)
+ rootPool.addSchedulable(pool)
+ logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
+ poolName, schedulingMode, minShare, weight))
+ }
+ } else {
+ throw new java.io.FileNotFoundException(
+ "Fair scheduler allocation file not found: " + schedulerAllocFile)
}
}
- //finally create "default" pool
+ // finally create "default" pool
if (rootPool.getSchedulableByName(DEFAULT_POOL_NAME) == null) {
- val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
+ val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE,
+ DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
rootPool.addSchedulable(pool)
- logInfo("Create default pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
+ logInfo("Created default pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
}
}
@@ -119,10 +122,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula
poolName = properties.getProperty(FAIR_SCHEDULER_PROPERTIES, DEFAULT_POOL_NAME)
parentPool = rootPool.getSchedulableByName(poolName)
if (parentPool == null) {
- //we will create a new pool that user has configured in app instead of being defined in xml file
- parentPool = new Pool(poolName,DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
+ // we will create a new pool that user has configured in app
+ // instead of being defined in xml file
+ parentPool = new Pool(poolName, DEFAULT_SCHEDULING_MODE,
+ DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
rootPool.addSchedulable(parentPool)
- logInfo("Create pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
+ logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
poolName, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
}
}
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
index 4431744ec3..d57eb3276f 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import spark.{SparkContext, Utils}
+import org.apache.spark.{SparkContext}
/**
* A backend interface for cluster scheduling systems that allows plugging in different ones under
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala
index 69e0ac2a6b..cbeed4731a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
/**
* An interface for sort algorithm
diff --git a/core/src/main/scala/spark/SoftReferenceCache.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala
index f41a379582..34811389a0 100644
--- a/core/src/main/scala/spark/SoftReferenceCache.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala
@@ -15,21 +15,15 @@
* limitations under the License.
*/
-package spark
-
-import com.google.common.collect.MapMaker
+package org.apache.spark.scheduler.cluster
/**
- * An implementation of Cache that uses soft references.
+ * "FAIR" and "FIFO" determines which policy is used
+ * to order tasks amongst a Schedulable's sub-queues
+ * "NONE" is used when the a Schedulable has no sub-queues.
*/
-private[spark] class SoftReferenceCache extends Cache {
- val map = new MapMaker().softValues().makeMap[Any, Any]()
-
- override def get(datasetId: Any, partition: Int): Any =
- map.get((datasetId, partition))
+object SchedulingMode extends Enumeration("FAIR", "FIFO", "NONE") {
- override def put(datasetId: Any, partition: Int, value: Any): CachePutResponse = {
- map.put((datasetId, partition), value)
- return CachePutSuccess(0)
- }
+ type SchedulingMode = Value
+ val FAIR,FIFO,NONE = Value
}
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 55d6c0a47e..d003bf1bba 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -15,12 +15,13 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import spark.{Utils, Logging, SparkContext}
-import spark.deploy.client.{Client, ClientListener}
-import spark.deploy.{Command, ApplicationDescription}
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.deploy.client.{Client, ClientListener}
+import org.apache.spark.deploy.{Command, ApplicationDescription}
import scala.collection.mutable.HashMap
+import org.apache.spark.util.Utils
private[spark] class SparkDeploySchedulerBackend(
scheduler: ClusterScheduler,
@@ -45,9 +46,9 @@ private[spark] class SparkDeploySchedulerBackend(
System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
StandaloneSchedulerBackend.ACTOR_NAME)
val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}")
- val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
- val sparkHome = sc.getSparkHome().getOrElse(
- throw new IllegalArgumentException("must supply spark home for spark standalone"))
+ val command = Command(
+ "org.apache.spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
+ val sparkHome = sc.getSparkHome().getOrElse(null)
val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome,
sc.ui.appUIAddress)
@@ -77,7 +78,7 @@ private[spark] class SparkDeploySchedulerBackend(
override def executorAdded(executorId: String, workerId: String, hostPort: String, cores: Int, memory: Int) {
logInfo("Granted executor ID %s on hostPort %s with %d cores, %s RAM".format(
- executorId, hostPort, cores, Utils.memoryMegabytesToString(memory)))
+ executorId, hostPort, cores, Utils.megabytesToString(memory)))
}
override def executorRemoved(executorId: String, message: String, exitStatus: Option[Int]) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala
new file mode 100644
index 0000000000..9c36d221f6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.nio.ByteBuffer
+
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.util.{Utils, SerializableBuffer}
+
+
+private[spark] sealed trait StandaloneClusterMessage extends Serializable
+
+private[spark] object StandaloneClusterMessages {
+
+ // Driver to executors
+ case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
+
+ case class RegisteredExecutor(sparkProperties: Seq[(String, String)])
+ extends StandaloneClusterMessage
+
+ case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage
+
+ // Executors to driver
+ case class RegisterExecutor(executorId: String, hostPort: String, cores: Int)
+ extends StandaloneClusterMessage {
+ Utils.checkHostPort(hostPort, "Expected host port")
+ }
+
+ case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
+ data: SerializableBuffer) extends StandaloneClusterMessage
+
+ object StatusUpdate {
+ /** Alternate factory method that takes a ByteBuffer directly for the data field */
+ def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer)
+ : StatusUpdate = {
+ StatusUpdate(executorId, taskId, state, new SerializableBuffer(data))
+ }
+ }
+
+ // Internal messages in driver
+ case object ReviveOffers extends StandaloneClusterMessage
+
+ case object StopDriver extends StandaloneClusterMessage
+
+ case class RemoveExecutor(executorId: String, reason: String) extends StandaloneClusterMessage
+
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 03a64e0192..b4ea0be415 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -15,19 +15,22 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
+
+import java.util.concurrent.atomic.AtomicInteger
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import akka.actor._
-import akka.util.duration._
+import akka.dispatch.Await
import akka.pattern.ask
+import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
import akka.util.Duration
+import akka.util.duration._
-import spark.{Utils, SparkException, Logging, TaskState}
-import akka.dispatch.Await
-import java.util.concurrent.atomic.AtomicInteger
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
+import org.apache.spark.{SparkException, Logging, TaskState}
+import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._
+import org.apache.spark.util.Utils
/**
* A standalone scheduler backend, which waits for standalone executors to connect to it through
@@ -36,15 +39,15 @@ import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClient
*/
private[spark]
class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem)
- extends SchedulerBackend with Logging {
-
+ extends SchedulerBackend with Logging
+{
// Use an atomic variable to track total number of cores in the cluster for simplicity and speed
var totalCoreCount = new AtomicInteger(0)
class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
private val executorActor = new HashMap[String, ActorRef]
private val executorAddress = new HashMap[String, Address]
- private val executorHostPort = new HashMap[String, String]
+ private val executorHost = new HashMap[String, String]
private val freeCores = new HashMap[String, Int]
private val actorToExecutorId = new HashMap[ActorRef, String]
private val addressToExecutorId = new HashMap[Address, String]
@@ -52,6 +55,10 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
override def preStart() {
// Listen for remote client disconnection events, since they don't go through Akka's watch()
context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+
+ // Periodically revive offers to allow delay scheduling to work
+ val reviveInterval = System.getProperty("spark.scheduler.revive.interval", "1000").toLong
+ context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers)
}
def receive = {
@@ -64,7 +71,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
sender ! RegisteredExecutor(sparkProperties)
context.watch(sender)
executorActor(executorId) = sender
- executorHostPort(executorId) = hostPort
+ executorHost(executorId) = Utils.parseHostPort(hostPort)._1
freeCores(executorId) = cores
executorAddress(executorId) = sender.path.address
actorToExecutorId(sender) = executorId
@@ -104,13 +111,13 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
// Make fake resource offers on all executors
def makeOffers() {
launchTasks(scheduler.resourceOffers(
- executorHostPort.toArray.map {case (id, hostPort) => new WorkerOffer(id, hostPort, freeCores(id))}))
+ executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
}
// Make fake resource offers on just one executor
def makeOffers(executorId: String) {
launchTasks(scheduler.resourceOffers(
- Seq(new WorkerOffer(executorId, executorHostPort(executorId), freeCores(executorId)))))
+ Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId)))))
}
// Launch tasks returned by a set of resource offers
@@ -129,9 +136,8 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
actorToExecutorId -= executorActor(executorId)
addressToExecutorId -= executorAddress(executorId)
executorActor -= executorId
- executorHostPort -= executorId
+ executorHost -= executorId
freeCores -= executorId
- executorHostPort -= executorId
totalCoreCount.addAndGet(-numCores)
scheduler.executorLost(executorId, SlaveLost(reason))
}
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala
index 761fdf6919..309ac2f6c9 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala
@@ -15,15 +15,16 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
import java.nio.ByteBuffer
-import spark.util.SerializableBuffer
+import org.apache.spark.util.SerializableBuffer
private[spark] class TaskDescription(
val taskId: Long,
val executorId: String,
val name: String,
+ val index: Int, // Index within this task's TaskSet
_serializedTask: ByteBuffer)
extends Serializable {
@@ -31,4 +32,6 @@ private[spark] class TaskDescription(
private val buffer = new SerializableBuffer(_serializedTask)
def serializedTask: ByteBuffer = buffer.value
+
+ override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index)
}
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala
index a1ebd48b01..9685fb1a67 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import spark.Utils
+import org.apache.spark.util.Utils
/**
* Information about a running task attempt inside a TaskSet.
@@ -28,11 +28,9 @@ class TaskInfo(
val index: Int,
val launchTime: Long,
val executorId: String,
- val hostPort: String,
+ val host: String,
val taskLocality: TaskLocality.TaskLocality) {
- Utils.checkHostPort(hostPort, "Expected hostport")
-
var finishTime: Long = 0
var failed = false
@@ -51,6 +49,17 @@ class TaskInfo(
def running: Boolean = !finished
+ def status: String = {
+ if (running)
+ "RUNNING"
+ else if (failed)
+ "FAILED"
+ else if (successful)
+ "SUCCESS"
+ else
+ "UNKNOWN"
+ }
+
def duration: Long = {
if (!finished) {
throw new UnsupportedOperationException("duration() called on unfinished tasks")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala
new file mode 100644
index 0000000000..5d4130e14a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+
+private[spark] object TaskLocality
+ extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY")
+{
+ // process local is expected to be used ONLY within tasksetmanager for now.
+ val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
+
+ type TaskLocality = Value
+
+ def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = {
+ condition <= constraint
+ }
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala
index 07c3ddcc7e..648a3ef922 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala
@@ -15,20 +15,37 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import scala.collection.mutable.ArrayBuffer
-import spark.scheduler._
-import spark.TaskState.TaskState
import java.nio.ByteBuffer
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.TaskSet
+
+/**
+ * Tracks and schedules the tasks within a single TaskSet. This class keeps track of the status of
+ * each task and is responsible for retries on failure and locality. The main interfaces to it
+ * are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, and
+ * statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ *
+ * THREADING: This class is designed to only be called from code with a lock on the TaskScheduler
+ * (e.g. its event handlers). It should not be called from other threads.
+ */
private[spark] trait TaskSetManager extends Schedulable {
+ def schedulableQueue = null
+
+ def schedulingMode = SchedulingMode.NONE
+
def taskSet: TaskSet
- def slaveOffer(execId: String, hostPort: String, availableCpus: Double,
- overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription]
- def numPendingTasksForHostPort(hostPort: String): Int
- def numRackLocalPendingTasksForHost(hostPort :String): Int
- def numPendingTasksForHost(hostPort: String): Int
+
+ def resourceOffer(
+ execId: String,
+ host: String,
+ availableCpus: Int,
+ maxLocality: TaskLocality.TaskLocality)
+ : Option[TaskDescription]
+
def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer)
+
def error(message: String)
}
diff --git a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
index 06d1203f70..938f62883a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
@@ -15,11 +15,10 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
/**
* Represents free resources available on an executor.
*/
private[spark]
-class WorkerOffer(val executorId: String, val hostPort: String, val cores: Int) {
-}
+class WorkerOffer(val executorId: String, val host: String, val cores: Int)
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
index 1f73cb99a7..e8fa5e2f17 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
@@ -15,21 +15,26 @@
* limitations under the License.
*/
-package spark.scheduler.local
+package org.apache.spark.scheduler.local
import java.io.File
+import java.lang.management.ManagementFactory
import java.util.concurrent.atomic.AtomicInteger
import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
-import spark._
-import spark.TaskState.TaskState
-import spark.executor.ExecutorURLClassLoader
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.executor.ExecutorURLClassLoader
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
import akka.actor._
+import org.apache.spark.util.Utils
/**
* A FIFO or Fair TaskScheduler implementation that runs tasks locally in a thread pool. Optionally
@@ -37,10 +42,15 @@ import akka.actor._
* testing fault recovery.
*/
-private[spark] case class LocalReviveOffers()
-private[spark] case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
+private[spark]
+case class LocalReviveOffers()
+
+private[spark]
+case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
+
+private[spark]
+class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging {
-private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging {
def receive = {
case LocalReviveOffers =>
launchTask(localScheduler.resourceOffer(freeCores))
@@ -55,7 +65,7 @@ private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: I
freeCores -= 1
localScheduler.threadPool.submit(new Runnable {
def run() {
- localScheduler.runTask(task.taskId,task.serializedTask)
+ localScheduler.runTask(task.taskId, task.serializedTask)
}
})
}
@@ -80,6 +90,8 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
var schedulableBuilder: SchedulableBuilder = null
var rootPool: Pool = null
+ val schedulingMode: SchedulingMode = SchedulingMode.withName(
+ System.getProperty("spark.cluster.schedulingmode", "FIFO"))
val activeTaskSets = new HashMap[String, TaskSetManager]
val taskIdToTaskSetId = new HashMap[Long, String]
val taskSetTaskIds = new HashMap[String, HashSet[Long]]
@@ -87,15 +99,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
var localActor: ActorRef = null
override def start() {
- //default scheduler is FIFO
- val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO")
- //temporarily set rootPool name to empty
- rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0)
+ // temporarily set rootPool name to empty
+ rootPool = new Pool("", schedulingMode, 0, 0)
schedulableBuilder = {
schedulingMode match {
- case "FIFO" =>
+ case SchedulingMode.FIFO =>
new FIFOSchedulableBuilder(rootPool)
- case "FAIR" =>
+ case SchedulingMode.FAIR =>
new FairSchedulableBuilder(rootPool)
}
}
@@ -110,7 +120,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
override def submitTasks(taskSet: TaskSet) {
synchronized {
- var manager = new LocalTaskSetManager(this, taskSet)
+ val manager = new LocalTaskSetManager(this, taskSet)
schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
activeTaskSets(taskSet.id) = manager
taskSetTaskIds(taskSet.id) = new HashSet[Long]()
@@ -124,14 +134,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
val tasks = new ArrayBuffer[TaskDescription](freeCores)
val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue()
for (manager <- sortedTaskSetQueue) {
- logDebug("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks))
+ logDebug("parentName:%s,name:%s,runningTasks:%s".format(
+ manager.parent.name, manager.name, manager.runningTasks))
}
var launchTask = false
for (manager <- sortedTaskSetQueue) {
do {
launchTask = false
- manager.slaveOffer(null,null,freeCpuCores) match {
+ manager.resourceOffer(null, null, freeCpuCores, null) match {
case Some(task) =>
tasks += task
taskIdToTaskSetId(task.taskId) = manager.taskSet.id
@@ -139,7 +150,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
freeCpuCores -= 1
launchTask = true
case None => {}
- }
+ }
} while(launchTask)
}
return tasks
@@ -162,9 +173,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
// Set the Spark execution environment for the worker thread
SparkEnv.set(env)
val ser = SparkEnv.get.closureSerializer.newInstance()
- var attemptedTask: Option[Task[_]] = None
+ val objectSer = SparkEnv.get.serializer.newInstance()
+ var attemptedTask: Option[Task[_]] = None
val start = System.currentTimeMillis()
var taskStart: Long = 0
+ def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum
+ val startGCTime = getTotalGCTime
+
try {
Accumulators.clear()
Thread.currentThread().setContextClassLoader(classLoader)
@@ -186,14 +201,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
// executor does. This is useful to catch serialization errors early
// on in development (so when users move their local Spark programs
// to the cluster, they don't get surprised by serialization errors).
- val serResult = ser.serialize(result)
+ val serResult = objectSer.serialize(result)
deserializedTask.metrics.get.resultSize = serResult.limit()
- val resultToReturn = ser.deserialize[Any](serResult)
+ val resultToReturn = objectSer.deserialize[Any](serResult)
val accumUpdates = ser.deserialize[collection.mutable.Map[Long, Any]](
ser.serialize(Accumulators.values))
val serviceTime = System.currentTimeMillis() - taskStart
logInfo("Finished " + taskId)
deserializedTask.metrics.get.executorRunTime = serviceTime.toInt
+ deserializedTask.metrics.get.jvmGCTime = getTotalGCTime - startGCTime
deserializedTask.metrics.get.executorDeserializeTime = deserTime.toInt
val taskResult = new TaskResult(result, accumUpdates, deserializedTask.metrics.getOrElse(null))
val serializedResult = ser.serialize(taskResult)
@@ -202,7 +218,10 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
case t: Throwable => {
val serviceTime = System.currentTimeMillis() - taskStart
val metrics = attemptedTask.flatMap(t => t.metrics)
- metrics.foreach{m => m.executorRunTime = serviceTime.toInt}
+ for (m <- metrics) {
+ m.executorRunTime = serviceTime.toInt
+ m.jvmGCTime = getTotalGCTime - startGCTime
+ }
val failure = new ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics)
localActor ! LocalStatusUpdate(taskId, TaskState.FAILED, ser.serialize(failure))
}
diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
index e662ad6709..e52cb998bd 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
@@ -15,83 +15,79 @@
* limitations under the License.
*/
-package spark.scheduler.local
+package org.apache.spark.scheduler.local
-import java.io.File
-import java.util.concurrent.atomic.AtomicInteger
import java.nio.ByteBuffer
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import spark._
-import spark.TaskState.TaskState
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, Success, TaskState}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.{Task, TaskResult, TaskSet}
+import org.apache.spark.scheduler.cluster.{Schedulable, TaskDescription, TaskInfo, TaskLocality, TaskSetManager}
+
+
+private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet)
+ extends TaskSetManager with Logging {
-private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet) extends TaskSetManager with Logging {
var parent: Schedulable = null
var weight: Int = 1
var minShare: Int = 0
var runningTasks: Int = 0
var priority: Int = taskSet.priority
var stageId: Int = taskSet.stageId
- var name: String = "TaskSet_"+taskSet.stageId.toString
-
+ var name: String = "TaskSet_" + taskSet.stageId.toString
var failCount = new Array[Int](taskSet.tasks.size)
val taskInfos = new HashMap[Long, TaskInfo]
val numTasks = taskSet.tasks.size
var numFinished = 0
- val ser = SparkEnv.get.closureSerializer.newInstance()
+ val env = SparkEnv.get
+ val ser = env.closureSerializer.newInstance()
val copiesRunning = new Array[Int](numTasks)
val finished = new Array[Boolean](numTasks)
val numFailures = new Array[Int](numTasks)
val MAX_TASK_FAILURES = sched.maxFailures
- def increaseRunningTasks(taskNum: Int): Unit = {
- runningTasks += taskNum
- if (parent != null) {
- parent.increaseRunningTasks(taskNum)
- }
+ override def increaseRunningTasks(taskNum: Int): Unit = {
+ runningTasks += taskNum
+ if (parent != null) {
+ parent.increaseRunningTasks(taskNum)
+ }
}
- def decreaseRunningTasks(taskNum: Int): Unit = {
+ override def decreaseRunningTasks(taskNum: Int): Unit = {
runningTasks -= taskNum
if (parent != null) {
parent.decreaseRunningTasks(taskNum)
}
}
- def addSchedulable(schedulable: Schedulable): Unit = {
- //nothing
+ override def addSchedulable(schedulable: Schedulable): Unit = {
+ // nothing
}
- def removeSchedulable(schedulable: Schedulable): Unit = {
- //nothing
+ override def removeSchedulable(schedulable: Schedulable): Unit = {
+ // nothing
}
- def getSchedulableByName(name: String): Schedulable = {
+ override def getSchedulableByName(name: String): Schedulable = {
return null
}
- def executorLost(executorId: String, host: String): Unit = {
- //nothing
+ override def executorLost(executorId: String, host: String): Unit = {
+ // nothing
}
- def checkSpeculatableTasks(): Boolean = {
- return true
- }
+ override def checkSpeculatableTasks() = true
- def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
+ override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]
sortedTaskSetQueue += this
return sortedTaskSetQueue
}
- def hasPendingTasks(): Boolean = {
- return true
- }
+ override def hasPendingTasks() = true
def findTask(): Option[Int] = {
for (i <- 0 to numTasks-1) {
@@ -102,41 +98,42 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
return None
}
- def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = {
+ override def resourceOffer(
+ execId: String,
+ host: String,
+ availableCpus: Int,
+ maxLocality: TaskLocality.TaskLocality)
+ : Option[TaskDescription] =
+ {
SparkEnv.set(sched.env)
- logDebug("availableCpus:%d,numFinished:%d,numTasks:%d".format(availableCpus.toInt, numFinished, numTasks))
+ logDebug("availableCpus:%d, numFinished:%d, numTasks:%d".format(
+ availableCpus.toInt, numFinished, numTasks))
if (availableCpus > 0 && numFinished < numTasks) {
findTask() match {
case Some(index) =>
val taskId = sched.attemptId.getAndIncrement()
val task = taskSet.tasks(index)
- val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1", TaskLocality.NODE_LOCAL)
+ val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1",
+ TaskLocality.NODE_LOCAL)
taskInfos(taskId) = info
- val bytes = Task.serializeWithDependencies(task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+ // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
+ // we assume the task can be serialized without exceptions.
+ val bytes = Task.serializeWithDependencies(
+ task, sched.sc.addedFiles, sched.sc.addedJars, ser)
logInfo("Size of task " + taskId + " is " + bytes.limit + " bytes")
val taskName = "task %s:%d".format(taskSet.id, index)
copiesRunning(index) += 1
increaseRunningTasks(1)
- return Some(new TaskDescription(taskId, null, taskName, bytes))
+ taskStarted(task, info)
+ return Some(new TaskDescription(taskId, null, taskName, index, bytes))
case None => {}
}
}
return None
}
- def numPendingTasksForHostPort(hostPort: String): Int = {
- return 0
- }
-
- def numRackLocalPendingTasksForHost(hostPort :String): Int = {
- return 0
- }
-
- def numPendingTasksForHost(hostPort: String): Int = {
- return 0
- }
-
- def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+ SparkEnv.set(env)
state match {
case TaskState.FINISHED =>
taskEnded(tid, state, serializedData)
@@ -146,6 +143,10 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
}
}
+ def taskStarted(task: Task[_], info: TaskInfo) {
+ sched.listener.taskStarted(task, info)
+ }
+
def taskEnded(tid: Long, state: TaskState, serializedData: ByteBuffer) {
val info = taskInfos(tid)
val index = info.index
@@ -168,15 +169,18 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
val task = taskSet.tasks(index)
info.markFailed()
decreaseRunningTasks(1)
- val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](serializedData, getClass.getClassLoader)
+ val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](
+ serializedData, getClass.getClassLoader)
sched.listener.taskEnded(task, reason, null, null, info, reason.metrics.getOrElse(null))
if (!finished(index)) {
copiesRunning(index) -= 1
numFailures(index) += 1
val locs = reason.stackTrace.map(loc => "\tat %s".format(loc.toString))
- logInfo("Loss was due to %s\n%s\n%s".format(reason.className, reason.description, locs.mkString("\n")))
+ logInfo("Loss was due to %s\n%s\n%s".format(
+ reason.className, reason.description, locs.mkString("\n")))
if (numFailures(index) > MAX_TASK_FAILURES) {
- val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(taskSet.id, index, 4, reason.description)
+ val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(
+ taskSet.id, index, 4, reason.description)
decreaseRunningTasks(runningTasks)
sched.listener.taskSetFailed(taskSet, errorMessage)
// need to delete failed Taskset from schedule queue
@@ -185,6 +189,6 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
}
}
- def error(message: String) {
+ override def error(message: String) {
}
}
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 7bc6040544..3dbe61d706 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler.mesos
+package org.apache.spark.scheduler.mesos
import com.google.protobuf.ByteString
@@ -23,14 +23,14 @@ import org.apache.mesos.{Scheduler => MScheduler}
import org.apache.mesos._
import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
-import spark.{SparkException, Utils, Logging, SparkContext}
+import org.apache.spark.{SparkException, Logging, SparkContext}
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import scala.collection.JavaConversions._
import java.io.File
-import spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster._
import java.util.{ArrayList => JArrayList, List => JList}
import java.util.Collections
-import spark.TaskState
+import org.apache.spark.TaskState
/**
* A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds
@@ -110,12 +110,6 @@ private[spark] class CoarseMesosSchedulerBackend(
}
def createCommand(offer: Offer, numCores: Int): CommandInfo = {
- val runScript = new File(sparkHome, "run").getCanonicalPath
- val driverUrl = "akka://spark@%s:%s/user/%s".format(
- System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
- StandaloneSchedulerBackend.ACTOR_NAME)
- val command = "\"%s\" spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
- runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)
val environment = Environment.newBuilder()
sc.executorEnvs.foreach { case (key, value) =>
environment.addVariables(Environment.Variable.newBuilder()
@@ -123,7 +117,28 @@ private[spark] class CoarseMesosSchedulerBackend(
.setValue(value)
.build())
}
- return CommandInfo.newBuilder().setValue(command).setEnvironment(environment).build()
+ val command = CommandInfo.newBuilder()
+ .setEnvironment(environment)
+ val driverUrl = "akka://spark@%s:%s/user/%s".format(
+ System.getProperty("spark.driver.host"),
+ System.getProperty("spark.driver.port"),
+ StandaloneSchedulerBackend.ACTOR_NAME)
+ val uri = System.getProperty("spark.executor.uri")
+ if (uri == null) {
+ val runScript = new File(sparkHome, "spark-class").getCanonicalPath
+ command.setValue(
+ "\"%s\" org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
+ runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+ } else {
+ // Grab everything to the first '.'. We'll use that and '*' to
+ // glob the directory "correctly".
+ val basename = uri.split('/').last.split('.').head
+ command.setValue(
+ "cd %s*; ./spark-class org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
+ basename, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+ command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
+ }
+ return command.build()
}
override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala
index 75b8268b55..541f86e338 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.scheduler.mesos
+package org.apache.spark.scheduler.mesos
import com.google.protobuf.ByteString
@@ -23,14 +23,15 @@ import org.apache.mesos.{Scheduler => MScheduler}
import org.apache.mesos._
import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
-import spark.{SparkException, Utils, Logging, SparkContext}
+import org.apache.spark.{SparkException, Logging, SparkContext}
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import scala.collection.JavaConversions._
import java.io.File
-import spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster._
import java.util.{ArrayList => JArrayList, List => JList}
import java.util.Collections
-import spark.TaskState
+import org.apache.spark.TaskState
+import org.apache.spark.util.Utils
/**
* A SchedulerBackend for running fine-grained tasks on Mesos. Each Spark task is mapped to a
@@ -89,7 +90,6 @@ private[spark] class MesosSchedulerBackend(
val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
"Spark home is not set; set it through the spark.home system " +
"property, the SPARK_HOME environment variable or the SparkContext constructor"))
- val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
val environment = Environment.newBuilder()
sc.executorEnvs.foreach { case (key, value) =>
environment.addVariables(Environment.Variable.newBuilder()
@@ -97,15 +97,23 @@ private[spark] class MesosSchedulerBackend(
.setValue(value)
.build())
}
+ val command = CommandInfo.newBuilder()
+ .setEnvironment(environment)
+ val uri = System.getProperty("spark.executor.uri")
+ if (uri == null) {
+ command.setValue(new File(sparkHome, "spark-executor").getCanonicalPath)
+ } else {
+ // Grab everything to the first '.'. We'll use that and '*' to
+ // glob the directory "correctly".
+ val basename = uri.split('/').last.split('.').head
+ command.setValue("cd %s*; ./spark-executor".format(basename))
+ command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
+ }
val memory = Resource.newBuilder()
.setName("mem")
.setType(Value.Type.SCALAR)
.setScalar(Value.Scalar.newBuilder().setValue(executorMemory).build())
.build()
- val command = CommandInfo.newBuilder()
- .setValue(execScript)
- .setEnvironment(environment)
- .build()
ExecutorInfo.newBuilder()
.setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
.setCommand(command)
diff --git a/core/src/main/scala/spark/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 04c5f44e6b..4de81617b1 100644
--- a/core/src/main/scala/spark/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -15,13 +15,12 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.serializer
import java.io._
import java.nio.ByteBuffer
-import serializer.{Serializer, SerializerInstance, DeserializationStream, SerializationStream}
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.ByteBufferInputStream
private[spark] class JavaSerializationStream(out: OutputStream) extends SerializationStream {
val objOut = new ObjectOutputStream(out)
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
new file mode 100644
index 0000000000..24ef204aa1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.nio.ByteBuffer
+import java.io.{EOFException, InputStream, OutputStream}
+
+import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
+import com.esotericsoftware.kryo.{KryoException, Kryo}
+import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
+import com.twitter.chill.ScalaKryoInstantiator
+
+import org.apache.spark.{SerializableWritable, Logging}
+import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock, StorageLevel}
+
+import org.apache.spark.broadcast.HttpBroadcast
+
+/**
+ * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]].
+ */
+class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging {
+ private val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024
+
+ def newKryoOutput() = new KryoOutput(bufferSize)
+
+ def newKryoInput() = new KryoInput(bufferSize)
+
+ def newKryo(): Kryo = {
+ val instantiator = new ScalaKryoInstantiator
+ val kryo = instantiator.newKryo()
+ val classLoader = Thread.currentThread.getContextClassLoader
+
+ // Register some commonly used classes
+ val toRegister: Seq[AnyRef] = Seq(
+ ByteBuffer.allocate(1),
+ StorageLevel.MEMORY_ONLY,
+ PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY),
+ GotBlock("1", ByteBuffer.allocate(1)),
+ GetBlock("1")
+ )
+
+ for (obj <- toRegister) kryo.register(obj.getClass)
+
+ // Allow sending SerializableWritable
+ kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
+ kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
+
+ // Allow the user to register their own classes by setting spark.kryo.registrator
+ try {
+ Option(System.getProperty("spark.kryo.registrator")).foreach { regCls =>
+ logDebug("Running user registrator: " + regCls)
+ val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
+ reg.registerClasses(kryo)
+ }
+ } catch {
+ case _: Exception => println("Failed to register spark.kryo.registrator")
+ }
+
+ kryo.setClassLoader(classLoader)
+
+ // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops
+ kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
+
+ kryo
+ }
+
+ def newInstance(): SerializerInstance = {
+ new KryoSerializerInstance(this)
+ }
+}
+
+private[spark]
+class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream {
+ val output = new KryoOutput(outStream)
+
+ def writeObject[T](t: T): SerializationStream = {
+ kryo.writeClassAndObject(output, t)
+ this
+ }
+
+ def flush() { output.flush() }
+ def close() { output.close() }
+}
+
+private[spark]
+class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream {
+ val input = new KryoInput(inStream)
+
+ def readObject[T](): T = {
+ try {
+ kryo.readClassAndObject(input).asInstanceOf[T]
+ } catch {
+ // DeserializationStream uses the EOF exception to indicate stopping condition.
+ case _: KryoException => throw new EOFException
+ }
+ }
+
+ def close() {
+ // Kryo's Input automatically closes the input stream it is using.
+ input.close()
+ }
+}
+
+private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
+ val kryo = ks.newKryo()
+ val output = ks.newKryoOutput()
+ val input = ks.newKryoInput()
+
+ def serialize[T](t: T): ByteBuffer = {
+ output.clear()
+ kryo.writeClassAndObject(output, t)
+ ByteBuffer.wrap(output.toBytes)
+ }
+
+ def deserialize[T](bytes: ByteBuffer): T = {
+ input.setBuffer(bytes.array)
+ kryo.readClassAndObject(input).asInstanceOf[T]
+ }
+
+ def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = {
+ val oldClassLoader = kryo.getClassLoader
+ kryo.setClassLoader(loader)
+ input.setBuffer(bytes.array)
+ val obj = kryo.readClassAndObject(input).asInstanceOf[T]
+ kryo.setClassLoader(oldClassLoader)
+ obj
+ }
+
+ def serializeStream(s: OutputStream): SerializationStream = {
+ new KryoSerializationStream(kryo, s)
+ }
+
+ def deserializeStream(s: InputStream): DeserializationStream = {
+ new KryoDeserializationStream(kryo, s)
+ }
+}
+
+/**
+ * Interface implemented by clients to register their classes with Kryo when using Kryo
+ * serialization.
+ */
+trait KryoRegistrator {
+ def registerClasses(kryo: Kryo)
+}
diff --git a/core/src/main/scala/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index dc94d42bb6..160cca4d6c 100644
--- a/core/src/main/scala/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -15,19 +15,19 @@
* limitations under the License.
*/
-package spark.serializer
+package org.apache.spark.serializer
import java.io.{EOFException, InputStream, OutputStream}
import java.nio.ByteBuffer
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.{NextIterator, ByteBufferInputStream}
/**
* A serializer. Because some serialization libraries are not thread safe, this class is used to
- * create [[spark.serializer.SerializerInstance]] objects that do the actual serialization and are
+ * create [[org.apache.spark.serializer.SerializerInstance]] objects that do the actual serialization and are
* guaranteed to only be called from one thread at a time.
*/
trait Serializer {
@@ -95,7 +95,7 @@ trait DeserializationStream {
* Read the elements of this stream through an iterator. This can only be called once, as
* reading each element will consume data from the input source.
*/
- def asIterator: Iterator[Any] = new spark.util.NextIterator[Any] {
+ def asIterator: Iterator[Any] = new NextIterator[Any] {
override protected def getNext() = {
try {
readObject[Any]()
diff --git a/core/src/main/scala/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index b7b24705a2..2955986fec 100644
--- a/core/src/main/scala/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.serializer
+package org.apache.spark.serializer
import java.util.concurrent.ConcurrentHashMap
diff --git a/core/src/main/scala/spark/storage/BlockException.scala b/core/src/main/scala/org/apache/spark/storage/BlockException.scala
index 8ebfaf3cbf..290dbce4f5 100644
--- a/core/src/main/scala/spark/storage/BlockException.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockException.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
private[spark]
case class BlockException(blockId: String, message: String) extends Exception(message)
diff --git a/core/src/main/scala/spark/storage/BlockFetchTracker.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala
index 265e554ad8..2e0b0e6eda 100644
--- a/core/src/main/scala/spark/storage/BlockFetchTracker.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
private[spark] trait BlockFetchTracker {
def totalBlocks : Int
diff --git a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
index 1965c5bc19..3aeda3879d 100644
--- a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
import java.util.concurrent.LinkedBlockingQueue
@@ -26,13 +26,13 @@ import scala.collection.mutable.Queue
import io.netty.buffer.ByteBuf
-import spark.Logging
-import spark.Utils
-import spark.SparkException
-import spark.network.BufferMessage
-import spark.network.ConnectionManagerId
-import spark.network.netty.ShuffleCopier
-import spark.serializer.Serializer
+import org.apache.spark.Logging
+import org.apache.spark.SparkException
+import org.apache.spark.network.BufferMessage
+import org.apache.spark.network.ConnectionManagerId
+import org.apache.spark.network.netty.ShuffleCopier
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.util.Utils
/**
@@ -111,7 +111,7 @@ object BlockFetcherIterator {
protected def sendRequest(req: FetchRequest) {
logDebug("Sending request for %d blocks (%s) from %s".format(
- req.blocks.size, Utils.memoryBytesToString(req.size), req.address.hostPort))
+ req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
val cmId = new ConnectionManagerId(req.address.host, req.address.port)
val blockMessageArray = new BlockMessageArray(req.blocks.map {
case (blockId, size) => BlockMessage.fromGetBlock(GetBlock(blockId))
@@ -132,9 +132,10 @@ object BlockFetcherIterator {
"Unexpected message " + blockMessage.getType + " received from " + cmId)
}
val blockId = blockMessage.getId
+ val networkSize = blockMessage.getData.limit()
results.put(new FetchResult(blockId, sizeMap(blockId),
() => dataDeserialize(blockId, blockMessage.getData, serializer)))
- _remoteBytesRead += req.size
+ _remoteBytesRead += networkSize
logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
}
}
@@ -309,7 +310,7 @@ object BlockFetcherIterator {
}
logDebug("Sending request for %d blocks (%s) from %s".format(
- req.blocks.size, Utils.memoryBytesToString(req.size), req.address.host))
+ req.blocks.size, Utils.bytesToString(req.size), req.address.host))
val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort)
val cpier = new ShuffleCopier
cpier.getBlocks(cmId, req.blocks, putResult)
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e4ffa57ad2..60fdc5f2ee 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.io.{InputStream, OutputStream}
import java.nio.{ByteBuffer, MappedByteBuffer}
@@ -27,14 +27,13 @@ import akka.dispatch.{Await, Future}
import akka.util.Duration
import akka.util.duration._
-import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
-
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
-import spark.{Logging, SparkEnv, SparkException, Utils}
-import spark.network._
-import spark.serializer.Serializer
-import spark.util.{ByteBufferInputStream, IdGenerator, MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.{Logging, SparkEnv, SparkException}
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.network._
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.util._
import sun.nio.ch.DirectBuffer
@@ -158,6 +157,13 @@ private[spark] class BlockManager(
val metadataCleaner = new MetadataCleaner("BlockManager", this.dropOldBlocks)
initialize()
+ // The compression codec to use. Note that the "lazy" val is necessary because we want to delay
+ // the initialization of the compression codec until it is first used. The reason is that a Spark
+ // program could be using a user-defined codec in a third party jar, which is loaded in
+ // Executor.updateDependencies. When the BlockManager is initialized, user level jars hasn't been
+ // loaded yet.
+ private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec()
+
/**
* Construct a BlockManager with a memory limit set based on system properties.
*/
@@ -919,18 +925,14 @@ private[spark] class BlockManager(
* Wrap an output stream for compression if block compression is enabled for its block type
*/
def wrapForCompression(blockId: String, s: OutputStream): OutputStream = {
- if (shouldCompress(blockId)) {
- (new LZFOutputStream(s)).setFinishBlockOnFlush(true)
- } else {
- s
- }
+ if (shouldCompress(blockId)) compressionCodec.compressedOutputStream(s) else s
}
/**
* Wrap an input stream for compression if block compression is enabled for its block type
*/
def wrapForCompression(blockId: String, s: InputStream): InputStream = {
- if (shouldCompress(blockId)) new LZFInputStream(s) else s
+ if (shouldCompress(blockId)) compressionCodec.compressedInputStream(s) else s
}
def dataSerialize(
@@ -1002,43 +1004,43 @@ private[spark] object BlockManager extends Logging {
}
}
- def blockIdsToExecutorLocations(blockIds: Array[String], env: SparkEnv, blockManagerMaster: BlockManagerMaster = null): HashMap[String, List[String]] = {
+ def blockIdsToBlockManagers(
+ blockIds: Array[String],
+ env: SparkEnv,
+ blockManagerMaster: BlockManagerMaster = null)
+ : Map[String, Seq[BlockManagerId]] =
+ {
// env == null and blockManagerMaster != null is used in tests
assert (env != null || blockManagerMaster != null)
- val locationBlockIds: Seq[Seq[BlockManagerId]] =
- if (env != null) {
- env.blockManager.getLocationBlockIds(blockIds)
- } else {
- blockManagerMaster.getLocations(blockIds)
- }
+ val blockLocations: Seq[Seq[BlockManagerId]] = if (env != null) {
+ env.blockManager.getLocationBlockIds(blockIds)
+ } else {
+ blockManagerMaster.getLocations(blockIds)
+ }
- // Convert from block master locations to executor locations (we need that for task scheduling)
- val executorLocations = new HashMap[String, List[String]]()
+ val blockManagers = new HashMap[String, Seq[BlockManagerId]]
for (i <- 0 until blockIds.length) {
- val blockId = blockIds(i)
- val blockLocations = locationBlockIds(i)
-
- val executors = new HashSet[String]()
-
- if (env != null) {
- for (bkLocation <- blockLocations) {
- val executorHostPort = env.resolveExecutorIdToHostPort(bkLocation.executorId, bkLocation.host)
- executors += executorHostPort
- // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort)
- }
- } else {
- // Typically while testing, etc - revert to simply using host.
- for (bkLocation <- blockLocations) {
- executors += bkLocation.host
- // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort)
- }
- }
-
- executorLocations.put(blockId, executors.toSeq.toList)
+ blockManagers(blockIds(i)) = blockLocations(i)
}
+ blockManagers.toMap
+ }
- executorLocations
+ def blockIdsToExecutorIds(
+ blockIds: Array[String],
+ env: SparkEnv,
+ blockManagerMaster: BlockManagerMaster = null)
+ : Map[String, Seq[String]] =
+ {
+ blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.executorId))
}
+ def blockIdsToHosts(
+ blockIds: Array[String],
+ env: SparkEnv,
+ blockManagerMaster: BlockManagerMaster = null)
+ : Map[String, Seq[String]] =
+ {
+ blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host))
+ }
}
diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index b36a6176c0..74207f59af 100644
--- a/core/src/main/scala/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
import java.util.concurrent.ConcurrentHashMap
-import spark.Utils
+import org.apache.spark.util.Utils
/**
* This class represent an unique identifier for a BlockManager.
@@ -92,13 +92,13 @@ private[spark] class BlockManagerId private (
private[spark] object BlockManagerId {
/**
- * Returns a [[spark.storage.BlockManagerId]] for the given configuraiton.
+ * Returns a [[org.apache.spark.storage.BlockManagerId]] for the given configuraiton.
*
* @param execId ID of the executor.
* @param host Host name of the block manager.
* @param port Port of the block manager.
* @param nettyPort Optional port for the Netty-based shuffle sender.
- * @return A new [[spark.storage.BlockManagerId]].
+ * @return A new [[org.apache.spark.storage.BlockManagerId]].
*/
def apply(execId: String, host: String, port: Int, nettyPort: Int) =
getCachedBlockManagerId(new BlockManagerId(execId, host, port, nettyPort))
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 3186f7c85b..cf463d6ffc 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -15,14 +15,15 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import akka.actor.ActorRef
import akka.dispatch.{Await, Future}
import akka.pattern.ask
import akka.util.Duration
-import spark.{Logging, SparkException}
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.storage.BlockManagerMessages._
private[spark] class BlockManagerMaster(var driverActor: ActorRef) extends Logging {
diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
index 244000d952..c7b23ab094 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.util.{HashMap => JHashMap}
@@ -28,7 +28,10 @@ import akka.pattern.ask
import akka.util.Duration
import akka.util.duration._
-import spark.{Logging, Utils, SparkException}
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.storage.BlockManagerMessages._
+import org.apache.spark.util.Utils
+
/**
* BlockManagerMasterActor is an actor on the master node to track statuses of
@@ -330,7 +333,7 @@ object BlockManagerMasterActor {
private val _blocks = new JHashMap[String, BlockStatus]
logInfo("Registering block manager %s with %s RAM".format(
- blockManagerId.hostPort, Utils.memoryBytesToString(maxMem)))
+ blockManagerId.hostPort, Utils.bytesToString(maxMem)))
def updateLastSeenMs() {
_lastSeenMs = System.currentTimeMillis()
@@ -356,12 +359,12 @@ object BlockManagerMasterActor {
if (storageLevel.useMemory) {
_remainingMem -= memSize
logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
- blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize),
- Utils.memoryBytesToString(_remainingMem)))
+ blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
+ Utils.bytesToString(_remainingMem)))
}
if (storageLevel.useDisk) {
logInfo("Added %s on disk on %s (size: %s)".format(
- blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize)))
+ blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize)))
}
} else if (_blocks.containsKey(blockId)) {
// If isValid is not true, drop the block.
@@ -370,12 +373,12 @@ object BlockManagerMasterActor {
if (blockStatus.storageLevel.useMemory) {
_remainingMem += blockStatus.memSize
logInfo("Removed %s on %s in memory (size: %s, free: %s)".format(
- blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize),
- Utils.memoryBytesToString(_remainingMem)))
+ blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize),
+ Utils.bytesToString(_remainingMem)))
}
if (blockStatus.storageLevel.useDisk) {
logInfo("Removed %s on %s on disk (size: %s)".format(
- blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize)))
+ blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.diskSize)))
}
}
}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
new file mode 100644
index 0000000000..24333a179c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
+
+import akka.actor.ActorRef
+
+
+private[storage] object BlockManagerMessages {
+ //////////////////////////////////////////////////////////////////////////////////
+ // Messages from the master to slaves.
+ //////////////////////////////////////////////////////////////////////////////////
+ sealed trait ToBlockManagerSlave
+
+ // Remove a block from the slaves that have it. This can only be used to remove
+ // blocks that the master knows about.
+ case class RemoveBlock(blockId: String) extends ToBlockManagerSlave
+
+ // Remove all blocks belonging to a specific RDD.
+ case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave
+
+
+ //////////////////////////////////////////////////////////////////////////////////
+ // Messages from slaves to the master.
+ //////////////////////////////////////////////////////////////////////////////////
+ sealed trait ToBlockManagerMaster
+
+ case class RegisterBlockManager(
+ blockManagerId: BlockManagerId,
+ maxMemSize: Long,
+ sender: ActorRef)
+ extends ToBlockManagerMaster
+
+ case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
+
+ class UpdateBlockInfo(
+ var blockManagerId: BlockManagerId,
+ var blockId: String,
+ var storageLevel: StorageLevel,
+ var memSize: Long,
+ var diskSize: Long)
+ extends ToBlockManagerMaster
+ with Externalizable {
+
+ def this() = this(null, null, null, 0, 0) // For deserialization only
+
+ override def writeExternal(out: ObjectOutput) {
+ blockManagerId.writeExternal(out)
+ out.writeUTF(blockId)
+ storageLevel.writeExternal(out)
+ out.writeLong(memSize)
+ out.writeLong(diskSize)
+ }
+
+ override def readExternal(in: ObjectInput) {
+ blockManagerId = BlockManagerId(in)
+ blockId = in.readUTF()
+ storageLevel = StorageLevel(in)
+ memSize = in.readLong()
+ diskSize = in.readLong()
+ }
+ }
+
+ object UpdateBlockInfo {
+ def apply(blockManagerId: BlockManagerId,
+ blockId: String,
+ storageLevel: StorageLevel,
+ memSize: Long,
+ diskSize: Long): UpdateBlockInfo = {
+ new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)
+ }
+
+ // For pattern-matching
+ def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = {
+ Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize))
+ }
+ }
+
+ case class GetLocations(blockId: String) extends ToBlockManagerMaster
+
+ case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster
+
+ case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
+
+ case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
+
+ case object StopBlockManagerMaster extends ToBlockManagerMaster
+
+ case object GetMemoryStatus extends ToBlockManagerMaster
+
+ case object ExpireDeadHosts extends ToBlockManagerMaster
+
+ case object GetStorageStatus extends ToBlockManagerMaster
+}
diff --git a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
index 45cffad810..951503019f 100644
--- a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import akka.actor.Actor
-import spark.{Logging, SparkException, Utils}
+import org.apache.spark.storage.BlockManagerMessages._
/**
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
new file mode 100644
index 0000000000..24190cdd67
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
@@ -0,0 +1,48 @@
+package org.apache.spark.storage
+
+import com.codahale.metrics.{Gauge,MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+
+private[spark] class BlockManagerSource(val blockManager: BlockManager) extends Source {
+ val metricRegistry = new MetricRegistry()
+ val sourceName = "BlockManager"
+
+ metricRegistry.register(MetricRegistry.name("memory", "maxMem", "MBytes"), new Gauge[Long] {
+ override def getValue: Long = {
+ val storageStatusList = blockManager.master.getStorageStatus
+ val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
+ maxMem / 1024 / 1024
+ }
+ })
+
+ metricRegistry.register(MetricRegistry.name("memory", "remainingMem", "MBytes"), new Gauge[Long] {
+ override def getValue: Long = {
+ val storageStatusList = blockManager.master.getStorageStatus
+ val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
+ remainingMem / 1024 / 1024
+ }
+ })
+
+ metricRegistry.register(MetricRegistry.name("memory", "memUsed", "MBytes"), new Gauge[Long] {
+ override def getValue: Long = {
+ val storageStatusList = blockManager.master.getStorageStatus
+ val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
+ val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
+ (maxMem - remainingMem) / 1024 / 1024
+ }
+ })
+
+ metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed", "MBytes"), new Gauge[Long] {
+ override def getValue: Long = {
+ val storageStatusList = blockManager.master.getStorageStatus
+ val diskSpaceUsed = storageStatusList
+ .flatMap(_.blocks.values.map(_.diskSize))
+ .reduceOption(_ + _)
+ .getOrElse(0L)
+
+ diskSpaceUsed / 1024 / 1024
+ }
+ })
+}
diff --git a/core/src/main/scala/spark/storage/BlockManagerWorker.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
index 39064bce92..678c38203c 100644
--- a/core/src/main/scala/spark/storage/BlockManagerWorker.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
@@ -15,12 +15,13 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
-import spark.{Logging, Utils}
-import spark.network._
+import org.apache.spark.{Logging}
+import org.apache.spark.network._
+import org.apache.spark.util.Utils
/**
* A network interface for BlockManager. Each slave should have one
diff --git a/core/src/main/scala/spark/storage/BlockMessage.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
index ab72dbb62b..d8fa6a91d1 100644
--- a/core/src/main/scala/spark/storage/BlockMessage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
@@ -15,15 +15,14 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
import scala.collection.mutable.StringBuilder
import scala.collection.mutable.ArrayBuffer
-import spark._
-import spark.network._
+import org.apache.spark.network._
private[spark] case class GetBlock(id: String)
private[spark] case class GotBlock(id: String, data: ByteBuffer)
diff --git a/core/src/main/scala/spark/storage/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
index b0229d6124..0aaf846b5b 100644
--- a/core/src/main/scala/spark/storage/BlockMessageArray.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
@@ -15,15 +15,14 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
-import scala.collection.mutable.StringBuilder
import scala.collection.mutable.ArrayBuffer
-import spark._
-import spark.network._
+import org.apache.spark._
+import org.apache.spark.network._
private[spark]
class BlockMessageArray(var blockMessages: Seq[BlockMessage]) extends Seq[BlockMessage] with Logging {
@@ -113,7 +112,7 @@ private[spark] object BlockMessageArray {
def main(args: Array[String]) {
val blockMessages =
- (0 until 10).map(i => {
+ (0 until 10).map { i =>
if (i % 2 == 0) {
val buffer = ByteBuffer.allocate(100)
buffer.clear
@@ -121,7 +120,7 @@ private[spark] object BlockMessageArray {
} else {
BlockMessage.fromGetBlock(GetBlock(i.toString))
}
- })
+ }
val blockMessageArray = new BlockMessageArray(blockMessages)
println("Block message array created")
diff --git a/core/src/main/scala/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 01ed6e8c1f..39f103297f 100644
--- a/core/src/main/scala/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -15,9 +15,7 @@
* limitations under the License.
*/
-package spark.storage
-
-import java.nio.ByteBuffer
+package org.apache.spark.storage
/**
diff --git a/core/src/main/scala/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index c8db0022b0..fa834371f4 100644
--- a/core/src/main/scala/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
import scala.collection.mutable.ArrayBuffer
-import spark.Logging
+import org.apache.spark.Logging
/**
* Abstract class to store blocks
diff --git a/core/src/main/scala/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 3495d653bd..fc25ef0fae 100644
--- a/core/src/main/scala/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.io.{File, FileOutputStream, OutputStream, RandomAccessFile}
import java.nio.ByteBuffer
@@ -28,12 +28,12 @@ import scala.collection.mutable.ArrayBuffer
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
-import spark.Utils
-import spark.executor.ExecutorExitCode
-import spark.serializer.{Serializer, SerializationStream}
-import spark.Logging
-import spark.network.netty.ShuffleSender
-import spark.network.netty.PathResolver
+import org.apache.spark.executor.ExecutorExitCode
+import org.apache.spark.serializer.{Serializer, SerializationStream}
+import org.apache.spark.Logging
+import org.apache.spark.network.netty.ShuffleSender
+import org.apache.spark.network.netty.PathResolver
+import org.apache.spark.util.Utils
/**
@@ -66,7 +66,6 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
override def close() {
if (initialized) {
objOut.close()
- bs.close()
channel = null
bs = null
objOut = null
@@ -148,7 +147,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
channel.close()
val finishTime = System.currentTimeMillis
logDebug("Block %s stored as %s file on disk in %d ms".format(
- blockId, Utils.memoryBytesToString(bytes.limit), (finishTime - startTime)))
+ blockId, Utils.bytesToString(bytes.limit), (finishTime - startTime)))
}
private def getFileBytes(file: File): ByteBuffer = {
@@ -182,7 +181,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
val timeTaken = System.currentTimeMillis - startTime
logDebug("Block %s stored as %s file on disk in %d ms".format(
- blockId, Utils.memoryBytesToString(length), timeTaken))
+ blockId, Utils.bytesToString(length), timeTaken))
if (returnValues) {
// Return a byte buffer for the contents of the file
diff --git a/core/src/main/scala/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index b5a86b85a7..3b3b2342fa 100644
--- a/core/src/main/scala/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.util.LinkedHashMap
import java.util.concurrent.ArrayBlockingQueue
-import spark.{SizeEstimator, Utils}
import java.nio.ByteBuffer
import collection.mutable.ArrayBuffer
+import org.apache.spark.util.{SizeEstimator, Utils}
/**
* Stores blocks in memory, either as ArrayBuffers of deserialized Java objects or as
@@ -38,7 +38,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
// blocks from the memory store.
private val putLock = new Object()
- logInfo("MemoryStore started with capacity %s.".format(Utils.memoryBytesToString(maxMemory)))
+ logInfo("MemoryStore started with capacity %s.".format(Utils.bytesToString(maxMemory)))
def freeMemory: Long = maxMemory - currentMemory
@@ -164,10 +164,10 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
currentMemory += size
if (deserialized) {
logInfo("Block %s stored as values to memory (estimated size %s, free %s)".format(
- blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory)))
+ blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
} else {
logInfo("Block %s stored as bytes to memory (size %s, free %s)".format(
- blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory)))
+ blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
}
true
} else {
diff --git a/core/src/main/scala/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
index 3a0974fe15..2eba2f06b5 100644
--- a/core/src/main/scala/spark/storage/PutResult.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
diff --git a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index 8a7a6f9ed3..9da11efb57 100644
--- a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
-import spark.serializer.Serializer
+import org.apache.spark.serializer.Serializer
private[spark]
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index f52650988c..755f1a760e 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
@@ -23,7 +23,7 @@ import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
* Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory,
* whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
* in a serialized format, and whether to replicate the RDD partitions on multiple nodes.
- * The [[spark.storage.StorageLevel$]] singleton object contains some static constants for
+ * The [[org.apache.spark.storage.StorageLevel$]] singleton object contains some static constants for
* commonly useful storage levels. To create your own storage level object, use the factor method
* of the singleton object (`StorageLevel(...)`).
*/
diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index 2aeed4ea3c..2bb7715696 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -15,10 +15,11 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
-import spark.{Utils, SparkContext}
+import org.apache.spark.{SparkContext}
import BlockManagerMasterActor.BlockStatus
+import org.apache.spark.util.Utils
private[spark]
case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long,
@@ -42,9 +43,9 @@ case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel,
numCachedPartitions: Int, numPartitions: Int, memSize: Long, diskSize: Long)
extends Ordered[RDDInfo] {
override def toString = {
- import Utils.memoryBytesToString
+ import Utils.bytesToString
"RDD \"%s\" (%d) Storage: %s; CachedPartitions: %d; TotalPartitions: %d; MemorySize: %s; DiskSize: %s".format(name, id,
- storageLevel.toString, numCachedPartitions, numPartitions, memoryBytesToString(memSize), memoryBytesToString(diskSize))
+ storageLevel.toString, numCachedPartitions, numPartitions, bytesToString(memSize), bytesToString(diskSize))
}
override def compare(that: RDDInfo) = {
diff --git a/core/src/main/scala/spark/storage/ThreadingTest.scala b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
index b3ab1ff4b4..f2ae8dd97d 100644
--- a/core/src/main/scala/spark/storage/ThreadingTest.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import akka.actor._
-import spark.KryoSerializer
import java.util.concurrent.ArrayBlockingQueue
import util.Random
+import org.apache.spark.serializer.KryoSerializer
/**
* This class tests the BlockManager and MemoryStore for thread safety and
diff --git a/core/src/main/scala/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index ca6088ad93..7211dbc7c6 100644
--- a/core/src/main/scala/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -15,22 +15,22 @@
* limitations under the License.
*/
-package spark.ui
-
-import annotation.tailrec
+package org.apache.spark.ui
import javax.servlet.http.{HttpServletResponse, HttpServletRequest}
+import scala.annotation.tailrec
+import scala.util.{Try, Success, Failure}
+import scala.xml.Node
+
import net.liftweb.json.{JValue, pretty, render}
import org.eclipse.jetty.server.{Server, Request, Handler}
import org.eclipse.jetty.server.handler.{ResourceHandler, HandlerList, ContextHandler, AbstractHandler}
import org.eclipse.jetty.util.thread.QueuedThreadPool
-import scala.util.{Try, Success, Failure}
-import scala.xml.Node
+import org.apache.spark.Logging
-import spark.Logging
/** Utilities for launching a web server using Jetty's HTTP Server class */
private[spark] object JettyUtils extends Logging {
@@ -48,7 +48,7 @@ private[spark] object JettyUtils extends Logging {
implicit def textResponderToHandler(responder: Responder[String]): Handler =
createHandler(responder, "text/plain")
- private def createHandler[T <% AnyRef](responder: Responder[T], contentType: String,
+ def createHandler[T <% AnyRef](responder: Responder[T], contentType: String,
extractFn: T => String = (in: Any) => in.toString): Handler = {
new AbstractHandler {
def handle(target: String,
@@ -117,7 +117,6 @@ private[spark] object JettyUtils extends Logging {
Try { server.start() } match {
case s: Success[_] =>
- sys.addShutdownHook(server.stop()) // Be kind, un-bind
(server, server.getConnectors.head.getLocalPort)
case f: Failure[_] =>
server.stop()
diff --git a/core/src/main/scala/spark/ui/Page.scala b/core/src/main/scala/org/apache/spark/ui/Page.scala
index a31e750d06..b2a069a375 100644
--- a/core/src/main/scala/spark/ui/Page.scala
+++ b/core/src/main/scala/org/apache/spark/ui/Page.scala
@@ -15,6 +15,8 @@
* limitations under the License.
*/
-package spark.ui
+package org.apache.spark.ui
-private[spark] object Page extends Enumeration { val Storage, Jobs, Environment = Value }
+private[spark] object Page extends Enumeration {
+ val Stages, Storage, Environment, Executors = Value
+}
diff --git a/core/src/main/scala/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 9396f22063..ad456ea565 100644
--- a/core/src/main/scala/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -15,21 +15,23 @@
* limitations under the License.
*/
-package spark.ui
+package org.apache.spark.ui
import javax.servlet.http.HttpServletRequest
import org.eclipse.jetty.server.{Handler, Server}
-import spark.{Logging, SparkContext, Utils}
-import spark.ui.env.EnvironmentUI
-import spark.ui.storage.BlockManagerUI
-import spark.ui.jobs.JobProgressUI
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging, SparkContext, SparkEnv}
+import org.apache.spark.ui.env.EnvironmentUI
+import org.apache.spark.ui.exec.ExecutorsUI
+import org.apache.spark.ui.storage.BlockManagerUI
+import org.apache.spark.ui.jobs.JobProgressUI
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.util.Utils
/** Top level user interface for Spark */
private[spark] class SparkUI(sc: SparkContext) extends Logging {
- val host = Utils.localHostName()
+ val host = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(Utils.localHostName())
val port = Option(System.getProperty("spark.ui.port")).getOrElse(SparkUI.DEFAULT_PORT).toInt
var boundPort: Option[Int] = None
var server: Option[Server] = None
@@ -41,7 +43,13 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
val storage = new BlockManagerUI(sc)
val jobs = new JobProgressUI(sc)
val env = new EnvironmentUI(sc)
- val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++ handlers
+ val exec = new ExecutorsUI(sc)
+
+ // Add MetricsServlet handlers by default
+ val metricsServletHandlers = SparkEnv.get.metricsSystem.getServletHandlers
+
+ val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++
+ exec.getHandlers ++ metricsServletHandlers ++ handlers
/** Bind the HTTP server which backs this web interface */
def bind() {
@@ -51,9 +59,9 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
server = Some(srv)
boundPort = Some(usedPort)
} catch {
- case e: Exception =>
- logError("Failed to create Spark JettyUtils", e)
- System.exit(1)
+ case e: Exception =>
+ logError("Failed to create Spark JettyUtils", e)
+ System.exit(1)
}
}
@@ -64,6 +72,7 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
// This server must register all handlers, including JobProgressUI, before binding
// JobProgressUI registers a listener with SparkContext, which requires sc to initialize
jobs.start()
+ exec.start()
}
def stop() {
@@ -74,6 +83,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
}
private[spark] object SparkUI {
- val DEFAULT_PORT = "33000"
- val STATIC_RESOURCE_DIR = "spark/ui/static"
+ val DEFAULT_PORT = "3030"
+ val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
}
diff --git a/core/src/main/scala/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index b1d11954dd..ce1acf564c 100644
--- a/core/src/main/scala/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.ui
+package org.apache.spark.ui
import scala.xml.Node
-import spark.SparkContext
+import org.apache.spark.SparkContext
/** Utility functions for generating XML pages with spark content. */
private[spark] object UIUtils {
@@ -28,64 +28,53 @@ private[spark] object UIUtils {
/** Returns a spark page with correctly formatted headers */
def headerSparkPage(content: => Seq[Node], sc: SparkContext, title: String, page: Page.Value)
: Seq[Node] = {
+ val jobs = page match {
+ case Stages => <li class="active"><a href="/stages">Stages</a></li>
+ case _ => <li><a href="/stages">Stages</a></li>
+ }
val storage = page match {
case Storage => <li class="active"><a href="/storage">Storage</a></li>
case _ => <li><a href="/storage">Storage</a></li>
}
- val jobs = page match {
- case Jobs => <li class="active"><a href="/stages">Jobs</a></li>
- case _ => <li><a href="/stages">Jobs</a></li>
- }
val environment = page match {
case Environment => <li class="active"><a href="/environment">Environment</a></li>
case _ => <li><a href="/environment">Environment</a></li>
}
+ val executors = page match {
+ case Executors => <li class="active"><a href="/executors">Executors</a></li>
+ case _ => <li><a href="/executors">Executors</a></li>
+ }
<html>
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" />
<link rel="stylesheet" href="/static/webui.css" type="text/css" />
- <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" />
<script src="/static/sorttable.js"></script>
<title>{sc.appName} - {title}</title>
- <style type="text/css">
- table.sortable thead {{ cursor: pointer; }}
- </style>
</head>
<body>
- <div class="container">
-
- <div class="row">
- <div class="span12">
- <div class="navbar">
- <div class="navbar-inner">
- <div class="container">
- <div class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></div>
- <ul class="nav">
- {storage}
- {jobs}
- {environment}
- </ul>
- <ul id="infolist">
- <li>Application: <strong>{sc.appName}</strong></li>
- <li>Master: <strong>{sc.master}</strong></li>
- <li>Executors: <strong>{sc.getExecutorStorageStatus.size}</strong></li>
- </ul>
- </div>
- </div>
- </div>
- </div>
+ <div class="navbar navbar-static-top">
+ <div class="navbar-inner">
+ <a href="/" class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></a>
+ <ul class="nav">
+ {jobs}
+ {storage}
+ {environment}
+ {executors}
+ </ul>
+ <p class="navbar-text pull-right"><strong>{sc.appName}</strong> application UI</p>
</div>
+ </div>
- <div class="row" style="padding-top: 5px;">
+ <div class="container-fluid">
+ <div class="row-fluid">
<div class="span12">
- <h1 style="vertical-align: bottom; display: inline-block;">
+ <h3 style="vertical-align: bottom; display: inline-block;">
{title}
- </h1>
+ </h3>
</div>
</div>
- <hr/>
{content}
</div>
</body>
@@ -98,23 +87,18 @@ private[spark] object UIUtils {
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" />
- <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" />
+ <link rel="stylesheet" href="/static/webui.css" type="text/css" />
<script src="/static/sorttable.js"></script>
<title>{title}</title>
- <style type="text/css">
- table.sortable thead {{ cursor: pointer; }}
- </style>
</head>
<body>
- <div class="container">
- <div class="row">
- <div class="span2">
- <img src="/static/spark_logo.png" />
- </div>
- <div class="span10">
- <h1 style="vertical-align: bottom; margin-top: 40px; display: inline-block;">
+ <div class="container-fluid">
+ <div class="row-fluid">
+ <div class="span12">
+ <h3 style="vertical-align: middle; display: inline-block;">
+ <img src="/static/spark-logo-77x50px-hd.png" style="margin-right: 15px;" />
{title}
- </h1>
+ </h3>
</div>
</div>
{content}
@@ -124,9 +108,21 @@ private[spark] object UIUtils {
}
/** Returns an HTML table constructed by generating a row for each object in a sequence. */
- def listingTable[T](headers: Seq[String], makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
- <table class="table table-bordered table-striped table-condensed sortable">
- <thead>{headers.map(h => <th>{h}</th>)}</thead>
+ def listingTable[T](
+ headers: Seq[String],
+ makeRow: T => Seq[Node],
+ rows: Seq[T],
+ fixedWidth: Boolean = false): Seq[Node] = {
+
+ val colWidth = 100.toDouble / headers.size
+ val colWidthAttr = if (fixedWidth) colWidth + "%" else ""
+ var tableClass = "table table-bordered table-striped table-condensed sortable"
+ if (fixedWidth) {
+ tableClass += " table-fixed"
+ }
+
+ <table class={tableClass}>
+ <thead>{headers.map(h => <th width={colWidthAttr}>{h}</th>)}</thead>
<tbody>
{rows.map(r => makeRow(r))}
</tbody>
diff --git a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index a80e2d7002..0ecb22d2f9 100644
--- a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -15,12 +15,14 @@
* limitations under the License.
*/
-package spark.ui
+package org.apache.spark.ui
import scala.util.Random
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.scheduler.cluster.SchedulingMode
+
/**
* Continuously generates jobs that expose various features of the WebUI (internal testing tool).
@@ -29,18 +31,29 @@ import spark.SparkContext._
*/
private[spark] object UIWorkloadGenerator {
val NUM_PARTITIONS = 100
- val INTER_JOB_WAIT_MS = 500
+ val INTER_JOB_WAIT_MS = 5000
def main(args: Array[String]) {
+ if (args.length < 2) {
+ println("usage: ./spark-class spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]")
+ System.exit(1)
+ }
val master = args(0)
+ val schedulingMode = SchedulingMode.withName(args(1))
val appName = "Spark UI Tester"
+
+ if (schedulingMode == SchedulingMode.FAIR) {
+ System.setProperty("spark.cluster.schedulingmode", "FAIR")
+ }
val sc = new SparkContext(master, appName)
- // NOTE: Right now there is no easy way for us to show spark.job.annotation for a given phase,
- // but we pass it here anyways since it will be useful once we do.
- def setName(s: String) = {
- sc.addLocalProperties("spark.job.annotation", s)
+ def setProperties(s: String) = {
+ if(schedulingMode == SchedulingMode.FAIR) {
+ sc.setLocalProperty("spark.scheduler.cluster.fair.pool", s)
+ }
+ sc.setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, s)
}
+
val baseData = sc.makeRDD(1 to NUM_PARTITIONS * 10, NUM_PARTITIONS)
def nextFloat() = (new Random()).nextFloat()
@@ -73,14 +86,18 @@ private[spark] object UIWorkloadGenerator {
while (true) {
for ((desc, job) <- jobs) {
- try {
- setName(desc)
- job()
- println("Job funished: " + desc)
- } catch {
- case e: Exception =>
- println("Job Failed: " + desc)
- }
+ new Thread {
+ override def run() {
+ try {
+ setProperties(desc)
+ job()
+ println("Job funished: " + desc)
+ } catch {
+ case e: Exception =>
+ println("Job Failed: " + desc)
+ }
+ }
+ }.start
Thread.sleep(INTER_JOB_WAIT_MS)
}
}
diff --git a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
index 5ae7935ed4..c5bf2acc9e 100644
--- a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
@@ -15,22 +15,21 @@
* limitations under the License.
*/
-package spark.ui.env
+package org.apache.spark.ui.env
import javax.servlet.http.HttpServletRequest
-import org.eclipse.jetty.server.Handler
-
import scala.collection.JavaConversions._
import scala.util.Properties
+import scala.xml.Node
-import spark.ui.JettyUtils._
-import spark.ui.UIUtils.headerSparkPage
-import spark.ui.Page.Environment
-import spark.SparkContext
-import spark.ui.UIUtils
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.ui.Page.Environment
+import org.apache.spark.SparkContext
-import scala.xml.Node
private[spark] class EnvironmentUI(sc: SparkContext) {
@@ -44,22 +43,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
("Java Home", Properties.javaHome),
("Scala Version", Properties.versionString),
("Scala Home", Properties.scalaHome)
- )
+ ).sorted
def jvmRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
- def jvmTable = UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation)
+ def jvmTable =
+ UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation, fixedWidth = true)
val properties = System.getProperties.iterator.toSeq
- val classPathProperty = properties
- .filter{case (k, v) => k.contains("java.class.path")}
- .headOption
- .getOrElse("", "")
- val sparkProperties = properties.filter(_._1.startsWith("spark"))
- val otherProperties = properties.diff(sparkProperties :+ classPathProperty)
+ val classPathProperty = properties.find { case (k, v) =>
+ k.contains("java.class.path")
+ }.getOrElse(("", ""))
+ val sparkProperties = properties.filter(_._1.startsWith("spark")).sorted
+ val otherProperties = properties.diff(sparkProperties :+ classPathProperty).sorted
val propertyHeaders = Seq("Name", "Value")
def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
- val sparkPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties)
- val otherPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties)
+ val sparkPropertyTable =
+ UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties, fixedWidth = true)
+ val otherPropertyTable =
+ UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties, fixedWidth = true)
val classPathEntries = classPathProperty._2
.split(System.getProperty("path.separator", ":"))
@@ -67,20 +68,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
.map(e => (e, "System Classpath"))
val addedJars = sc.addedJars.iterator.toSeq.map{case (path, time) => (path, "Added By User")}
val addedFiles = sc.addedFiles.iterator.toSeq.map{case (path, time) => (path, "Added By User")}
- val classPath = addedJars ++ addedFiles ++ classPathEntries
+ val classPath = (addedJars ++ addedFiles ++ classPathEntries).sorted
val classPathHeaders = Seq("Resource", "Source")
def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr>
- val classPathTable = UIUtils.listingTable(classPathHeaders, classPathRow, classPath)
+ val classPathTable =
+ UIUtils.listingTable(classPathHeaders, classPathRow, classPath, fixedWidth = true)
val content =
<span>
- <h2>Runtime Information</h2> {jvmTable}
- <h2>Spark Properties</h2> {sparkPropertyTable}
- <h2>System Properties</h2> {otherPropertyTable}
- <h2>Classpath Entries</h2> {classPathTable}
+ <h4>Runtime Information</h4> {jvmTable}
+ <h4>Spark Properties</h4>
+ {sparkPropertyTable}
+ <h4>System Properties</h4>
+ {otherPropertyTable}
+ <h4>Classpath Entries</h4>
+ {classPathTable}
</span>
- headerSparkPage(content, sc, "Environment", Environment)
+ UIUtils.headerSparkPage(content, sc, "Environment", Environment)
}
}
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
new file mode 100644
index 0000000000..6e56c22d04
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
@@ -0,0 +1,137 @@
+package org.apache.spark.ui.exec
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.collection.mutable.{HashMap, HashSet}
+import scala.xml.Node
+
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.{ExceptionFailure, Logging, SparkContext}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.{SparkListenerTaskStart, SparkListenerTaskEnd, SparkListener}
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.Page.Executors
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
+
+
+private[spark] class ExecutorsUI(val sc: SparkContext) {
+
+ private var _listener: Option[ExecutorsListener] = None
+ def listener = _listener.get
+
+ def start() {
+ _listener = Some(new ExecutorsListener)
+ sc.addSparkListener(listener)
+ }
+
+ def getHandlers = Seq[(String, Handler)](
+ ("/executors", (request: HttpServletRequest) => render(request))
+ )
+
+ def render(request: HttpServletRequest): Seq[Node] = {
+ val storageStatusList = sc.getExecutorStorageStatus
+
+ val maxMem = storageStatusList.map(_.maxMem).fold(0L)(_+_)
+ val memUsed = storageStatusList.map(_.memUsed()).fold(0L)(_+_)
+ val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)).fold(0L)(_+_)
+
+ val execHead = Seq("Executor ID", "Address", "RDD blocks", "Memory used", "Disk used",
+ "Active tasks", "Failed tasks", "Complete tasks", "Total tasks")
+
+ def execRow(kv: Seq[String]) = {
+ <tr>
+ <td>{kv(0)}</td>
+ <td>{kv(1)}</td>
+ <td>{kv(2)}</td>
+ <td sorttable_customkey={kv(3)}>
+ {Utils.bytesToString(kv(3).toLong)} / {Utils.bytesToString(kv(4).toLong)}
+ </td>
+ <td sorttable_customkey={kv(5)}>
+ {Utils.bytesToString(kv(5).toLong)}
+ </td>
+ <td>{kv(6)}</td>
+ <td>{kv(7)}</td>
+ <td>{kv(8)}</td>
+ <td>{kv(9)}</td>
+ </tr>
+ }
+
+ val execInfo = for (b <- 0 until storageStatusList.size) yield getExecInfo(b)
+ val execTable = UIUtils.listingTable(execHead, execRow, execInfo)
+
+ val content =
+ <div class="row-fluid">
+ <div class="span12">
+ <ul class="unstyled">
+ <li><strong>Memory:</strong>
+ {Utils.bytesToString(memUsed)} Used
+ ({Utils.bytesToString(maxMem)} Total) </li>
+ <li><strong>Disk:</strong> {Utils.bytesToString(diskSpaceUsed)} Used </li>
+ </ul>
+ </div>
+ </div>
+ <div class = "row">
+ <div class="span12">
+ {execTable}
+ </div>
+ </div>;
+
+ UIUtils.headerSparkPage(content, sc, "Executors (" + execInfo.size + ")", Executors)
+ }
+
+ def getExecInfo(a: Int): Seq[String] = {
+ val execId = sc.getExecutorStorageStatus(a).blockManagerId.executorId
+ val hostPort = sc.getExecutorStorageStatus(a).blockManagerId.hostPort
+ val rddBlocks = sc.getExecutorStorageStatus(a).blocks.size.toString
+ val memUsed = sc.getExecutorStorageStatus(a).memUsed().toString
+ val maxMem = sc.getExecutorStorageStatus(a).maxMem.toString
+ val diskUsed = sc.getExecutorStorageStatus(a).diskUsed().toString
+ val activeTasks = listener.executorToTasksActive.get(a.toString).map(l => l.size).getOrElse(0)
+ val failedTasks = listener.executorToTasksFailed.getOrElse(a.toString, 0)
+ val completedTasks = listener.executorToTasksComplete.getOrElse(a.toString, 0)
+ val totalTasks = activeTasks + failedTasks + completedTasks
+
+ Seq(
+ execId,
+ hostPort,
+ rddBlocks,
+ memUsed,
+ maxMem,
+ diskUsed,
+ activeTasks.toString,
+ failedTasks.toString,
+ completedTasks.toString,
+ totalTasks.toString
+ )
+ }
+
+ private[spark] class ExecutorsListener extends SparkListener with Logging {
+ val executorToTasksActive = HashMap[String, HashSet[TaskInfo]]()
+ val executorToTasksComplete = HashMap[String, Int]()
+ val executorToTasksFailed = HashMap[String, Int]()
+
+ override def onTaskStart(taskStart: SparkListenerTaskStart) {
+ val eid = taskStart.taskInfo.executorId
+ val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]())
+ activeTasks += taskStart.taskInfo
+ }
+
+ override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+ val eid = taskEnd.taskInfo.executorId
+ val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]())
+ activeTasks -= taskEnd.taskInfo
+ val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
+ taskEnd.reason match {
+ case e: ExceptionFailure =>
+ executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1
+ (Some(e), e.metrics)
+ case _ =>
+ executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1
+ (None, Option(taskEnd.taskMetrics))
+ }
+ }
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala
new file mode 100644
index 0000000000..3b428effaf
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{NodeSeq, Node}
+
+import org.apache.spark.scheduler.cluster.SchedulingMode
+import org.apache.spark.ui.Page._
+import org.apache.spark.ui.UIUtils._
+
+
+/** Page showing list of all ongoing and recently finished stages and pools*/
+private[spark] class IndexPage(parent: JobProgressUI) {
+ def listener = parent.listener
+
+ def render(request: HttpServletRequest): Seq[Node] = {
+ listener.synchronized {
+ val activeStages = listener.activeStages.toSeq
+ val completedStages = listener.completedStages.reverse.toSeq
+ val failedStages = listener.failedStages.reverse.toSeq
+ val now = System.currentTimeMillis()
+
+ var activeTime = 0L
+ for (tasks <- listener.stageToTasksActive.values; t <- tasks) {
+ activeTime += t.timeRunning(now)
+ }
+
+ val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent)
+ val completedStagesTable = new StageTable(completedStages.sortBy(_.submissionTime).reverse, parent)
+ val failedStagesTable = new StageTable(failedStages.sortBy(_.submissionTime).reverse, parent)
+
+ val pools = listener.sc.getAllPools
+ val poolTable = new PoolTable(pools, listener)
+ val summary: NodeSeq =
+ <div>
+ <ul class="unstyled">
+ <li>
+ <strong>Total Duration: </strong>
+ {parent.formatDuration(now - listener.sc.startTime)}
+ </li>
+ <li><strong>Scheduling Mode:</strong> {parent.sc.getSchedulingMode}</li>
+ <li>
+ <a href="#active"><strong>Active Stages:</strong></a>
+ {activeStages.size}
+ </li>
+ <li>
+ <a href="#completed"><strong>Completed Stages:</strong></a>
+ {completedStages.size}
+ </li>
+ <li>
+ <a href="#failed"><strong>Failed Stages:</strong></a>
+ {failedStages.size}
+ </li>
+ </ul>
+ </div>
+
+ val content = summary ++
+ {if (listener.sc.getSchedulingMode == SchedulingMode.FAIR) {
+ <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq
+ } else {
+ Seq()
+ }} ++
+ <h4 id="active">Active Stages ({activeStages.size})</h4> ++
+ activeStagesTable.toNodeSeq++
+ <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
+ completedStagesTable.toNodeSeq++
+ <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
+ failedStagesTable.toNodeSeq
+
+ headerSparkPage(content, parent.sc, "Spark Stages", Stages)
+ }
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
new file mode 100644
index 0000000000..86e0af0399
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -0,0 +1,156 @@
+package org.apache.spark.ui.jobs
+
+import scala.Seq
+import scala.collection.mutable.{ListBuffer, HashMap, HashSet}
+
+import org.apache.spark.{ExceptionFailure, SparkContext, Success}
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.executor.TaskMetrics
+import collection.mutable
+
+/**
+ * Tracks task-level information to be displayed in the UI.
+ *
+ * All access to the data structures in this class must be synchronized on the
+ * class, since the UI thread and the DAGScheduler event loop may otherwise
+ * be reading/updating the internal data structures concurrently.
+ */
+private[spark] class JobProgressListener(val sc: SparkContext) extends SparkListener {
+ // How many stages to remember
+ val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt
+ val DEFAULT_POOL_NAME = "default"
+
+ val stageToPool = new HashMap[Stage, String]()
+ val stageToDescription = new HashMap[Stage, String]()
+ val poolToActiveStages = new HashMap[String, HashSet[Stage]]()
+
+ val activeStages = HashSet[Stage]()
+ val completedStages = ListBuffer[Stage]()
+ val failedStages = ListBuffer[Stage]()
+
+ // Total metrics reflect metrics only for completed tasks
+ var totalTime = 0L
+ var totalShuffleRead = 0L
+ var totalShuffleWrite = 0L
+
+ val stageToTime = HashMap[Int, Long]()
+ val stageToShuffleRead = HashMap[Int, Long]()
+ val stageToShuffleWrite = HashMap[Int, Long]()
+ val stageToTasksActive = HashMap[Int, HashSet[TaskInfo]]()
+ val stageToTasksComplete = HashMap[Int, Int]()
+ val stageToTasksFailed = HashMap[Int, Int]()
+ val stageToTaskInfos =
+ HashMap[Int, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]()
+
+ override def onJobStart(jobStart: SparkListenerJobStart) {}
+
+ override def onStageCompleted(stageCompleted: StageCompleted) = synchronized {
+ val stage = stageCompleted.stageInfo.stage
+ poolToActiveStages(stageToPool(stage)) -= stage
+ activeStages -= stage
+ completedStages += stage
+ trimIfNecessary(completedStages)
+ }
+
+ /** If stages is too large, remove and garbage collect old stages */
+ def trimIfNecessary(stages: ListBuffer[Stage]) = synchronized {
+ if (stages.size > RETAINED_STAGES) {
+ val toRemove = RETAINED_STAGES / 10
+ stages.takeRight(toRemove).foreach( s => {
+ stageToTaskInfos.remove(s.id)
+ stageToTime.remove(s.id)
+ stageToShuffleRead.remove(s.id)
+ stageToShuffleWrite.remove(s.id)
+ stageToTasksActive.remove(s.id)
+ stageToTasksComplete.remove(s.id)
+ stageToTasksFailed.remove(s.id)
+ stageToPool.remove(s)
+ if (stageToDescription.contains(s)) {stageToDescription.remove(s)}
+ })
+ stages.trimEnd(toRemove)
+ }
+ }
+
+ /** For FIFO, all stages are contained by "default" pool but "default" pool here is meaningless */
+ override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = synchronized {
+ val stage = stageSubmitted.stage
+ activeStages += stage
+
+ val poolName = Option(stageSubmitted.properties).map {
+ p => p.getProperty("spark.scheduler.cluster.fair.pool", DEFAULT_POOL_NAME)
+ }.getOrElse(DEFAULT_POOL_NAME)
+ stageToPool(stage) = poolName
+
+ val description = Option(stageSubmitted.properties).flatMap {
+ p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
+ }
+ description.map(d => stageToDescription(stage) = d)
+
+ val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashSet[Stage]())
+ stages += stage
+ }
+
+ override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
+ val sid = taskStart.task.stageId
+ val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
+ tasksActive += taskStart.taskInfo
+ val taskList = stageToTaskInfos.getOrElse(
+ sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
+ taskList += ((taskStart.taskInfo, None, None))
+ stageToTaskInfos(sid) = taskList
+ }
+
+ override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
+ val sid = taskEnd.task.stageId
+ val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
+ tasksActive -= taskEnd.taskInfo
+ val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
+ taskEnd.reason match {
+ case e: ExceptionFailure =>
+ stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1
+ (Some(e), e.metrics)
+ case _ =>
+ stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1
+ (None, Option(taskEnd.taskMetrics))
+ }
+
+ stageToTime.getOrElseUpdate(sid, 0L)
+ val time = metrics.map(m => m.executorRunTime).getOrElse(0)
+ stageToTime(sid) += time
+ totalTime += time
+
+ stageToShuffleRead.getOrElseUpdate(sid, 0L)
+ val shuffleRead = metrics.flatMap(m => m.shuffleReadMetrics).map(s =>
+ s.remoteBytesRead).getOrElse(0L)
+ stageToShuffleRead(sid) += shuffleRead
+ totalShuffleRead += shuffleRead
+
+ stageToShuffleWrite.getOrElseUpdate(sid, 0L)
+ val shuffleWrite = metrics.flatMap(m => m.shuffleWriteMetrics).map(s =>
+ s.shuffleBytesWritten).getOrElse(0L)
+ stageToShuffleWrite(sid) += shuffleWrite
+ totalShuffleWrite += shuffleWrite
+
+ val taskList = stageToTaskInfos.getOrElse(
+ sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
+ taskList -= ((taskEnd.taskInfo, None, None))
+ taskList += ((taskEnd.taskInfo, metrics, failureInfo))
+ stageToTaskInfos(sid) = taskList
+ }
+
+ override def onJobEnd(jobEnd: SparkListenerJobEnd) = synchronized {
+ jobEnd match {
+ case end: SparkListenerJobEnd =>
+ end.jobResult match {
+ case JobFailed(ex, Some(stage)) =>
+ activeStages -= stage
+ poolToActiveStages(stageToPool(stage)) -= stage
+ failedStages += stage
+ trimIfNecessary(failedStages)
+ case _ =>
+ }
+ case _ =>
+ }
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
new file mode 100644
index 0000000000..6aecef5120
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import akka.util.Duration
+
+import java.text.SimpleDateFormat
+
+import javax.servlet.http.HttpServletRequest
+
+import org.eclipse.jetty.server.Handler
+
+import scala.Seq
+import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer}
+
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.{ExceptionFailure, SparkContext, Success}
+import org.apache.spark.scheduler._
+import collection.mutable
+import org.apache.spark.scheduler.cluster.SchedulingMode
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.util.Utils
+
+/** Web UI showing progress status of all jobs in the given SparkContext. */
+private[spark] class JobProgressUI(val sc: SparkContext) {
+ private var _listener: Option[JobProgressListener] = None
+ def listener = _listener.get
+ val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+
+ private val indexPage = new IndexPage(this)
+ private val stagePage = new StagePage(this)
+ private val poolPage = new PoolPage(this)
+
+ def start() {
+ _listener = Some(new JobProgressListener(sc))
+ sc.addSparkListener(listener)
+ }
+
+ def formatDuration(ms: Long) = Utils.msDurationToString(ms)
+
+ def getHandlers = Seq[(String, Handler)](
+ ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)),
+ ("/stages/pool", (request: HttpServletRequest) => poolPage.render(request)),
+ ("/stages", (request: HttpServletRequest) => indexPage.render(request))
+ )
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
new file mode 100644
index 0000000000..ce92b6932b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -0,0 +1,32 @@
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{NodeSeq, Node}
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+
+/** Page showing specific pool details */
+private[spark] class PoolPage(parent: JobProgressUI) {
+ def listener = parent.listener
+
+ def render(request: HttpServletRequest): Seq[Node] = {
+ listener.synchronized {
+ val poolName = request.getParameter("poolname")
+ val poolToActiveStages = listener.poolToActiveStages
+ val activeStages = poolToActiveStages.get(poolName).toSeq.flatten
+ val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent)
+
+ val pool = listener.sc.getPoolForName(poolName).get
+ val poolTable = new PoolTable(Seq(pool), listener)
+
+ val content = <h4>Summary </h4> ++ poolTable.toNodeSeq() ++
+ <h4>{activeStages.size} Active Stages</h4> ++ activeStagesTable.toNodeSeq()
+
+ headerSparkPage(content, parent.sc, "Fair Scheduler Pool: " + poolName, Stages)
+ }
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
new file mode 100644
index 0000000000..f31465e59d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -0,0 +1,55 @@
+package org.apache.spark.ui.jobs
+
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+import scala.xml.Node
+
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.scheduler.cluster.Schedulable
+
+/** Table showing list of pools */
+private[spark] class PoolTable(pools: Seq[Schedulable], listener: JobProgressListener) {
+
+ var poolToActiveStages: HashMap[String, HashSet[Stage]] = listener.poolToActiveStages
+
+ def toNodeSeq(): Seq[Node] = {
+ listener.synchronized {
+ poolTable(poolRow, pools)
+ }
+ }
+
+ private def poolTable(makeRow: (Schedulable, HashMap[String, HashSet[Stage]]) => Seq[Node],
+ rows: Seq[Schedulable]
+ ): Seq[Node] = {
+ <table class="table table-bordered table-striped table-condensed sortable table-fixed">
+ <thead>
+ <th>Pool Name</th>
+ <th>Minimum Share</th>
+ <th>Pool Weight</th>
+ <th>Active Stages</th>
+ <th>Running Tasks</th>
+ <th>SchedulingMode</th>
+ </thead>
+ <tbody>
+ {rows.map(r => makeRow(r, poolToActiveStages))}
+ </tbody>
+ </table>
+ }
+
+ private def poolRow(p: Schedulable, poolToActiveStages: HashMap[String, HashSet[Stage]])
+ : Seq[Node] = {
+ val activeStages = poolToActiveStages.get(p.name) match {
+ case Some(stages) => stages.size
+ case None => 0
+ }
+ <tr>
+ <td><a href={"/stages/pool?poolname=%s".format(p.name)}>{p.name}</a></td>
+ <td>{p.minShare}</td>
+ <td>{p.weight}</td>
+ <td>{activeStages}</td>
+ <td>{p.runningTasks}</td>
+ <td>{p.schedulingMode}</td>
+ </tr>
+ }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
new file mode 100644
index 0000000000..a9969ab1c0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import java.util.Date
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
+
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.{Utils, Distribution}
+import org.apache.spark.{ExceptionFailure}
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.executor.TaskMetrics
+
+/** Page showing statistics and task list for a given stage */
+private[spark] class StagePage(parent: JobProgressUI) {
+ def listener = parent.listener
+ val dateFmt = parent.dateFmt
+
+ def render(request: HttpServletRequest): Seq[Node] = {
+ listener.synchronized {
+ val stageId = request.getParameter("id").toInt
+ val now = System.currentTimeMillis()
+
+ if (!listener.stageToTaskInfos.contains(stageId)) {
+ val content =
+ <div>
+ <h4>Summary Metrics</h4> No tasks have started yet
+ <h4>Tasks</h4> No tasks have started yet
+ </div>
+ return headerSparkPage(content, parent.sc, "Details for Stage %s".format(stageId), Stages)
+ }
+
+ val tasks = listener.stageToTaskInfos(stageId).toSeq.sortBy(_._1.launchTime)
+
+ val numCompleted = tasks.count(_._1.finished)
+ val shuffleReadBytes = listener.stageToShuffleRead.getOrElse(stageId, 0L)
+ val hasShuffleRead = shuffleReadBytes > 0
+ val shuffleWriteBytes = listener.stageToShuffleWrite.getOrElse(stageId, 0L)
+ val hasShuffleWrite = shuffleWriteBytes > 0
+
+ var activeTime = 0L
+ listener.stageToTasksActive(stageId).foreach(activeTime += _.timeRunning(now))
+
+ val summary =
+ <div>
+ <ul class="unstyled">
+ <li>
+ <strong>CPU time: </strong>
+ {parent.formatDuration(listener.stageToTime.getOrElse(stageId, 0L) + activeTime)}
+ </li>
+ {if (hasShuffleRead)
+ <li>
+ <strong>Shuffle read: </strong>
+ {Utils.bytesToString(shuffleReadBytes)}
+ </li>
+ }
+ {if (hasShuffleWrite)
+ <li>
+ <strong>Shuffle write: </strong>
+ {Utils.bytesToString(shuffleWriteBytes)}
+ </li>
+ }
+ </ul>
+ </div>
+
+ val taskHeaders: Seq[String] =
+ Seq("Task ID", "Status", "Locality Level", "Executor", "Launch Time", "Duration") ++
+ Seq("GC Time") ++
+ {if (hasShuffleRead) Seq("Shuffle Read") else Nil} ++
+ {if (hasShuffleWrite) Seq("Shuffle Write") else Nil} ++
+ Seq("Errors")
+
+ val taskTable = listingTable(taskHeaders, taskRow(hasShuffleRead, hasShuffleWrite), tasks)
+
+ // Excludes tasks which failed and have incomplete metrics
+ val validTasks = tasks.filter(t => t._1.status == "SUCCESS" && (t._2.isDefined))
+
+ val summaryTable: Option[Seq[Node]] =
+ if (validTasks.size == 0) {
+ None
+ }
+ else {
+ val serviceTimes = validTasks.map{case (info, metrics, exception) =>
+ metrics.get.executorRunTime.toDouble}
+ val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
+ ms => parent.formatDuration(ms.toLong))
+
+ def getQuantileCols(data: Seq[Double]) =
+ Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong))
+
+ val shuffleReadSizes = validTasks.map {
+ case(info, metrics, exception) =>
+ metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
+ }
+ val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes)
+
+ val shuffleWriteSizes = validTasks.map {
+ case(info, metrics, exception) =>
+ metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
+ }
+ val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
+
+ val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
+ if (hasShuffleRead) shuffleReadQuantiles else Nil,
+ if (hasShuffleWrite) shuffleWriteQuantiles else Nil)
+
+ val quantileHeaders = Seq("Metric", "Min", "25th percentile",
+ "Median", "75th percentile", "Max")
+ def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
+ Some(listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true))
+ }
+
+ val content =
+ summary ++
+ <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
+ <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
+ <h4>Tasks</h4> ++ taskTable;
+
+ headerSparkPage(content, parent.sc, "Details for Stage %d".format(stageId), Stages)
+ }
+ }
+
+
+ def taskRow(shuffleRead: Boolean, shuffleWrite: Boolean)
+ (taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = {
+ def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] =
+ trace.map(e => <span style="display:block;">{e.toString}</span>)
+ val (info, metrics, exception) = taskData
+
+ val duration = if (info.status == "RUNNING") info.timeRunning(System.currentTimeMillis())
+ else metrics.map(m => m.executorRunTime).getOrElse(1)
+ val formatDuration = if (info.status == "RUNNING") parent.formatDuration(duration)
+ else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
+ val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
+
+ <tr>
+ <td>{info.taskId}</td>
+ <td>{info.status}</td>
+ <td>{info.taskLocality}</td>
+ <td>{info.host}</td>
+ <td>{dateFmt.format(new Date(info.launchTime))}</td>
+ <td sorttable_customkey={duration.toString}>
+ {formatDuration}
+ </td>
+ <td sorttable_customkey={gcTime.toString}>
+ {if (gcTime > 0) parent.formatDuration(gcTime) else ""}
+ </td>
+ {if (shuffleRead) {
+ <td>{metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
+ Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")}</td>
+ }}
+ {if (shuffleWrite) {
+ <td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
+ Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")}</td>
+ }}
+ <td>{exception.map(e =>
+ <span>
+ {e.className} ({e.description})<br/>
+ {fmtStackTrace(e.stackTrace)}
+ </span>).getOrElse("")}
+ </td>
+ </tr>
+ }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
new file mode 100644
index 0000000000..71e58a977e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -0,0 +1,107 @@
+package org.apache.spark.ui.jobs
+
+import java.util.Date
+
+import scala.xml.Node
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.scheduler.cluster.{SchedulingMode, TaskInfo}
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.util.Utils
+
+
+/** Page showing list of all ongoing and recently finished stages */
+private[spark] class StageTable(val stages: Seq[Stage], val parent: JobProgressUI) {
+
+ val listener = parent.listener
+ val dateFmt = parent.dateFmt
+ val isFairScheduler = listener.sc.getSchedulingMode == SchedulingMode.FAIR
+
+ def toNodeSeq(): Seq[Node] = {
+ listener.synchronized {
+ stageTable(stageRow, stages)
+ }
+ }
+
+ /** Special table which merges two header cells. */
+ private def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
+ <table class="table table-bordered table-striped table-condensed sortable">
+ <thead>
+ <th>Stage Id</th>
+ {if (isFairScheduler) {<th>Pool Name</th>} else {}}
+ <th>Description</th>
+ <th>Submitted</th>
+ <th>Duration</th>
+ <th>Tasks: Succeeded/Total</th>
+ <th>Shuffle Read</th>
+ <th>Shuffle Write</th>
+ </thead>
+ <tbody>
+ {rows.map(r => makeRow(r))}
+ </tbody>
+ </table>
+ }
+
+ private def makeProgressBar(started: Int, completed: Int, failed: String, total: Int): Seq[Node] = {
+ val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
+ val startWidth = "width: %s%%".format((started.toDouble/total)*100)
+
+ <div class="progress">
+ <span style="text-align:center; position:absolute; width:100%;">
+ {completed}/{total} {failed}
+ </span>
+ <div class="bar bar-completed" style={completeWidth}></div>
+ <div class="bar bar-running" style={startWidth}></div>
+ </div>
+ }
+
+
+ private def stageRow(s: Stage): Seq[Node] = {
+ val submissionTime = s.submissionTime match {
+ case Some(t) => dateFmt.format(new Date(t))
+ case None => "Unknown"
+ }
+
+ val shuffleRead = listener.stageToShuffleRead.getOrElse(s.id, 0L) match {
+ case 0 => ""
+ case b => Utils.bytesToString(b)
+ }
+ val shuffleWrite = listener.stageToShuffleWrite.getOrElse(s.id, 0L) match {
+ case 0 => ""
+ case b => Utils.bytesToString(b)
+ }
+
+ val startedTasks = listener.stageToTasksActive.getOrElse(s.id, HashSet[TaskInfo]()).size
+ val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0)
+ val failedTasks = listener.stageToTasksFailed.getOrElse(s.id, 0) match {
+ case f if f > 0 => "(%s failed)".format(f)
+ case _ => ""
+ }
+ val totalTasks = s.numPartitions
+
+ val poolName = listener.stageToPool.get(s)
+
+ val nameLink = <a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a>
+ val description = listener.stageToDescription.get(s)
+ .map(d => <div><em>{d}</em></div><div>{nameLink}</div>).getOrElse(nameLink)
+ val finishTime = s.completionTime.getOrElse(System.currentTimeMillis())
+ val duration = s.submissionTime.map(t => finishTime - t)
+
+ <tr>
+ <td>{s.id}</td>
+ {if (isFairScheduler) {
+ <td><a href={"/stages/pool?poolname=%s".format(poolName.get)}>{poolName.get}</a></td>}
+ }
+ <td>{description}</td>
+ <td valign="middle">{submissionTime}</td>
+ <td sorttable_customkey={duration.getOrElse(-1).toString}>
+ {duration.map(d => parent.formatDuration(d)).getOrElse("Unknown")}
+ </td>
+ <td class="progress-cell">
+ {makeProgressBar(startedTasks, completedTasks, failedTasks, totalTasks)}
+ </td>
+ <td>{shuffleRead}</td>
+ <td>{shuffleWrite}</td>
+ </tr>
+ }
+}
diff --git a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
index 49ed069c75..1d633d374a 100644
--- a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.ui.storage
+package org.apache.spark.ui.storage
import akka.util.Duration
@@ -23,8 +23,8 @@ import javax.servlet.http.HttpServletRequest
import org.eclipse.jetty.server.Handler
-import spark.{Logging, SparkContext}
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.ui.JettyUtils._
/** Web UI showing storage status of all RDD's in the given SparkContext. */
private[spark] class BlockManagerUI(val sc: SparkContext) extends Logging {
diff --git a/core/src/main/scala/spark/ui/storage/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala
index 4e0360d19a..c3ec907370 100644
--- a/core/src/main/scala/spark/ui/storage/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package spark.ui.storage
+package org.apache.spark.ui.storage
import javax.servlet.http.HttpServletRequest
import scala.xml.Node
-import spark.storage.{RDDInfo, StorageUtils}
-import spark.Utils
-import spark.ui.UIUtils._
-import spark.ui.Page._
+import org.apache.spark.storage.{RDDInfo, StorageUtils}
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.Utils
/** Page showing list of RDD's currently stored in the cluster */
private[spark] class IndexPage(parent: BlockManagerUI) {
@@ -33,34 +33,18 @@ private[spark] class IndexPage(parent: BlockManagerUI) {
def render(request: HttpServletRequest): Seq[Node] = {
val storageStatusList = sc.getExecutorStorageStatus
// Calculate macro-level statistics
- val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
- val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
- val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
- .reduceOption(_+_).getOrElse(0L)
val rddHeaders = Seq(
"RDD Name",
"Storage Level",
"Cached Partitions",
- "Fraction Partitions Cached",
+ "Fraction Cached",
"Size in Memory",
"Size on Disk")
val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
- val rddTable = listingTable(rddHeaders, rddRow, rdds)
+ val content = listingTable(rddHeaders, rddRow, rdds)
- val content =
- <div class="row">
- <div class="span12">
- <ul class="unstyled">
- <li><strong>Memory:</strong>
- {Utils.memoryBytesToString(maxMem - remainingMem)} Used
- ({Utils.memoryBytesToString(remainingMem)} Available) </li>
- <li><strong>Disk:</strong> {Utils.memoryBytesToString(diskSpaceUsed)} Used </li>
- </ul>
- </div>
- </div> ++ {rddTable};
-
- headerSparkPage(content, parent.sc, "Spark Storage ", Storage)
+ headerSparkPage(content, parent.sc, "Storage ", Storage)
}
def rddRow(rdd: RDDInfo): Seq[Node] = {
@@ -73,9 +57,9 @@ private[spark] class IndexPage(parent: BlockManagerUI) {
<td>{rdd.storageLevel.description}
</td>
<td>{rdd.numCachedPartitions}</td>
- <td>{rdd.numCachedPartitions / rdd.numPartitions.toDouble}</td>
- <td>{Utils.memoryBytesToString(rdd.memSize)}</td>
- <td>{Utils.memoryBytesToString(rdd.diskSize)}</td>
+ <td>{"%.0f%%".format(rdd.numCachedPartitions * 100.0 / rdd.numPartitions)}</td>
+ <td>{Utils.bytesToString(rdd.memSize)}</td>
+ <td>{Utils.bytesToString(rdd.diskSize)}</td>
</tr>
}
}
diff --git a/core/src/main/scala/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 003be54ad8..43c1257677 100644
--- a/core/src/main/scala/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -15,17 +15,18 @@
* limitations under the License.
*/
-package spark.ui.storage
+package org.apache.spark.ui.storage
import javax.servlet.http.HttpServletRequest
import scala.xml.Node
-import spark.storage.{StorageStatus, StorageUtils}
-import spark.ui.UIUtils._
-import spark.Utils
-import spark.storage.BlockManagerMasterActor.BlockStatus
-import spark.ui.Page._
+import org.apache.spark.storage.{StorageStatus, StorageUtils}
+import org.apache.spark.storage.BlockManagerMasterActor.BlockStatus
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.Utils
+
/** Page showing storage details for a given RDD */
private[spark] class RDDPage(parent: BlockManagerUI) {
@@ -44,7 +45,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
val workerTable = listingTable(workerHeaders, workerRow, workers)
val blockHeaders = Seq("Block Name", "Storage Level", "Size in Memory", "Size on Disk",
- "Locations")
+ "Executors")
val blockStatuses = filteredStorageStatusList.flatMap(_.blocks).toArray.sortWith(_._1 < _._1)
val blockLocations = StorageUtils.blockLocationsFromStorageStatus(filteredStorageStatusList)
@@ -54,7 +55,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
val blockTable = listingTable(blockHeaders, blockRow, blocks)
val content =
- <div class="row">
+ <div class="row-fluid">
<div class="span12">
<ul class="unstyled">
<li>
@@ -71,30 +72,31 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
</li>
<li>
<strong>Memory Size:</strong>
- {Utils.memoryBytesToString(rddInfo.memSize)}
+ {Utils.bytesToString(rddInfo.memSize)}
</li>
<li>
<strong>Disk Size:</strong>
- {Utils.memoryBytesToString(rddInfo.diskSize)}
+ {Utils.bytesToString(rddInfo.diskSize)}
</li>
</ul>
</div>
</div>
- <hr/>
- <div class="row">
+
+ <div class="row-fluid">
<div class="span12">
+ <h4> Data Distribution on {workers.size} Executors </h4>
{workerTable}
</div>
</div>
- <hr/>
- <div class="row">
+
+ <div class="row-fluid">
<div class="span12">
- <h3> RDD Summary </h3>
+ <h4> {blocks.size} Partitions </h4>
{blockTable}
</div>
</div>;
- headerSparkPage(content, parent.sc, "RDD Info: " + rddInfo.name, Jobs)
+ headerSparkPage(content, parent.sc, "RDD Storage Info for " + rddInfo.name, Storage)
}
def blockRow(row: (String, BlockStatus, Seq[String])): Seq[Node] = {
@@ -105,10 +107,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
{block.storageLevel.description}
</td>
<td sorttable_customkey={block.memSize.toString}>
- {Utils.memoryBytesToString(block.memSize)}
+ {Utils.bytesToString(block.memSize)}
</td>
<td sorttable_customkey={block.diskSize.toString}>
- {Utils.memoryBytesToString(block.diskSize)}
+ {Utils.bytesToString(block.diskSize)}
</td>
<td>
{locations.map(l => <span>{l}<br/></span>)}
@@ -121,10 +123,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
<tr>
<td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td>
<td>
- {Utils.memoryBytesToString(status.memUsed(prefix))}
- ({Utils.memoryBytesToString(status.memRemaining)} Total Available)
+ {Utils.bytesToString(status.memUsed(prefix))}
+ ({Utils.bytesToString(status.memRemaining)} Remaining)
</td>
- <td>{Utils.memoryBytesToString(status.diskUsed(prefix))}</td>
+ <td>{Utils.bytesToString(status.diskUsed(prefix))}</td>
</tr>
}
}
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index 9233277bdb..d4c5065c3f 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import akka.actor.{ActorSystem, ExtendedActorSystem}
import com.typesafe.config.ConfigFactory
diff --git a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
index 0575497f5d..0b51c23f7b 100644
--- a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala
+++ b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.io.Serializable
import java.util.{PriorityQueue => JPriorityQueue}
diff --git a/core/src/main/scala/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index 47a28e2f76..e214d2a519 100644
--- a/core/src/main/scala/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.io.InputStream
import java.nio.ByteBuffer
-import spark.storage.BlockManager
+import org.apache.spark.storage.BlockManager
/**
* Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose()
diff --git a/core/src/main/scala/org/apache/spark/util/Clock.scala b/core/src/main/scala/org/apache/spark/util/Clock.scala
new file mode 100644
index 0000000000..97c2b45aab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/Clock.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+/**
+ * An interface to represent clocks, so that they can be mocked out in unit tests.
+ */
+private[spark] trait Clock {
+ def getTime(): Long
+}
+
+private[spark] object SystemClock extends Clock {
+ def getTime(): Long = System.currentTimeMillis()
+}
diff --git a/core/src/main/scala/spark/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 8b39241095..7108595e3e 100644
--- a/core/src/main/scala/spark/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import java.lang.reflect.Field
@@ -25,6 +25,7 @@ import scala.collection.mutable.Set
import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor, Type}
import org.objectweb.asm.Opcodes._
import java.io.{InputStream, IOException, ByteArrayOutputStream, ByteArrayInputStream, BufferedInputStream}
+import org.apache.spark.Logging
private[spark] object ClosureCleaner extends Logging {
// Get an ASM class reader for a given class from the JAR that loaded it
diff --git a/core/src/main/scala/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index 210450892b..dc15a38b29 100644
--- a/core/src/main/scala/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
/**
* Wrapper around an iterator which calls a completion method after it successfully iterates through all the elements
diff --git a/core/src/main/scala/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 5d4d7a6c50..33bf3562fe 100644
--- a/core/src/main/scala/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.io.PrintStream
diff --git a/core/src/main/scala/spark/util/IdGenerator.scala b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala
index 3422280559..17e55f7996 100644
--- a/core/src/main/scala/spark/util/IdGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.util.concurrent.atomic.AtomicInteger
diff --git a/core/src/main/scala/spark/util/IntParam.scala b/core/src/main/scala/org/apache/spark/util/IntParam.scala
index daf0d58fa2..626bb49eea 100644
--- a/core/src/main/scala/spark/util/IntParam.scala
+++ b/core/src/main/scala/org/apache/spark/util/IntParam.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
/**
* An extractor object for parsing strings into integers.
diff --git a/core/src/main/scala/spark/util/MemoryParam.scala b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala
index 298562323a..4869c9897a 100644
--- a/core/src/main/scala/spark/util/MemoryParam.scala
+++ b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala
@@ -15,9 +15,7 @@
* limitations under the License.
*/
-package spark.util
-
-import spark.Utils
+package org.apache.spark.util
/**
* An extractor object for parsing JVM memory strings, such as "10g", into an Int representing
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
index 92909e0959..a430a75451 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.util.concurrent.{TimeUnit, ScheduledFuture, Executors}
import java.util.{TimerTask, Timer}
-import spark.Logging
+import org.apache.spark.Logging
/**
diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/util/MutablePair.scala
index 1a7cdf4788..34f1f6606f 100644
--- a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/util/MutablePair.scala
@@ -15,16 +15,22 @@
* limitations under the License.
*/
-package org.apache.hadoop.mapreduce
+package org.apache.spark.util
-import org.apache.hadoop.conf.Configuration
-import task.{TaskAttemptContextImpl, JobContextImpl}
-trait HadoopMapReduceUtil {
- def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId)
-
- def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+/**
+ * A tuple of 2 elements. This can be used as an alternative to Scala's Tuple2 when we want to
+ * minimize object allocation.
+ *
+ * @param _1 Element 1 of this MutablePair
+ * @param _2 Element 2 of this MutablePair
+ */
+case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T1,
+ @specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T2]
+ (var _1: T1, var _2: T2)
+ extends Product2[T1, T2]
+{
+ override def toString = "(" + _1 + "," + _2 + ")"
- def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) =
- new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId)
+ override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_,_]]
}
diff --git a/core/src/main/scala/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
index 22163ece8d..8266e5e495 100644
--- a/core/src/main/scala/spark/util/NextIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
/** Provides a basic/boilerplate Iterator implementation. */
private[spark] abstract class NextIterator[U] extends Iterator[U] {
diff --git a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
index 00f782bbe7..47e1b45004 100644
--- a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import scala.annotation.tailrec
diff --git a/core/src/main/scala/spark/util/SerializableBuffer.scala b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
index 7e6842628a..f2b1ad7d0e 100644
--- a/core/src/main/scala/spark/util/SerializableBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.nio.ByteBuffer
import java.io.{IOException, ObjectOutputStream, EOFException, ObjectInputStream}
diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 6cc57566d7..a25b37a2a9 100644
--- a/core/src/main/scala/spark/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import java.lang.reflect.Field
import java.lang.reflect.Modifier
@@ -30,6 +30,7 @@ import java.lang.management.ManagementFactory
import scala.collection.mutable.ArrayBuffer
import it.unimi.dsi.fastutil.ints.IntOpenHashSet
+import org.apache.spark.Logging
/**
* Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
diff --git a/core/src/main/scala/spark/util/StatCounter.scala b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
index 76358d4151..020d5edba9 100644
--- a/core/src/main/scala/spark/util/StatCounter.scala
+++ b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
/**
* A class for tracking the statistics of a set of numbers (count, mean and variance) in a
diff --git a/core/src/main/scala/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
index cc7909194a..277de2f8a6 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
@@ -15,12 +15,14 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import java.util.concurrent.ConcurrentHashMap
import scala.collection.JavaConversions
import scala.collection.mutable.Map
-import spark.scheduler.MapStatus
+import scala.collection.immutable
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.Logging
/**
* This is a custom implementation of scala.collection.mutable.Map which stores the insertion
@@ -28,7 +30,7 @@ import spark.scheduler.MapStatus
* threshold time can them be removed using the clearOldValues method. This is intended to be a drop-in
* replacement of scala.collection.mutable.HashMap.
*/
-class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
+class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging {
val internalMap = new ConcurrentHashMap[A, (B, Long)]()
def get(key: A): Option[B] = {
@@ -99,6 +101,8 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
}
}
+ def toMap: immutable.Map[A, B] = iterator.toMap
+
/**
* Removes old key-value pairs that have timestamp earlier than `threshTime`
*/
diff --git a/core/src/main/scala/spark/util/TimeStampedHashSet.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala
index 41e3fd8cba..26983138ff 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import scala.collection.mutable.Set
import scala.collection.JavaConversions
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index e6a96a5ec1..bb47fc0a2c 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import java.io._
import java.net.{InetAddress, URL, URI, NetworkInterface, Inet4Address, ServerSocket}
@@ -33,14 +33,16 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder
import org.apache.hadoop.fs.{Path, FileSystem, FileUtil}
-import spark.serializer.SerializerInstance
-import spark.deploy.SparkHadoopUtil
+import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
+import org.apache.spark.deploy.SparkHadoopUtil
+import java.nio.ByteBuffer
+import org.apache.spark.{SparkEnv, SparkException, Logging}
/**
* Various utility methods used by Spark.
*/
-private object Utils extends Logging {
+private[spark] object Utils extends Logging {
/** Serialize an object using Java serialization */
def serialize[T](o: T): Array[Byte] = {
@@ -68,6 +70,47 @@ private object Utils extends Logging {
return ois.readObject.asInstanceOf[T]
}
+ /** Serialize via nested stream using specific serializer */
+ def serializeViaNestedStream(os: OutputStream, ser: SerializerInstance)(f: SerializationStream => Unit) = {
+ val osWrapper = ser.serializeStream(new OutputStream {
+ def write(b: Int) = os.write(b)
+
+ override def write(b: Array[Byte], off: Int, len: Int) = os.write(b, off, len)
+ })
+ try {
+ f(osWrapper)
+ } finally {
+ osWrapper.close()
+ }
+ }
+
+ /** Deserialize via nested stream using specific serializer */
+ def deserializeViaNestedStream(is: InputStream, ser: SerializerInstance)(f: DeserializationStream => Unit) = {
+ val isWrapper = ser.deserializeStream(new InputStream {
+ def read(): Int = is.read()
+
+ override def read(b: Array[Byte], off: Int, len: Int): Int = is.read(b, off, len)
+ })
+ try {
+ f(isWrapper)
+ } finally {
+ isWrapper.close()
+ }
+ }
+
+ /**
+ * Primitive often used when writing {@link java.nio.ByteBuffer} to {@link java.io.DataOutput}.
+ */
+ def writeByteBuffer(bb: ByteBuffer, out: ObjectOutput) = {
+ if (bb.hasArray) {
+ out.write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
+ } else {
+ val bbval = new Array[Byte](bb.remaining())
+ bb.get(bbval)
+ out.write(bbval)
+ }
+ }
+
def isAlpha(c: Char): Boolean = {
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
}
@@ -224,8 +267,9 @@ private object Utils extends Logging {
}
case _ =>
// Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others
+ val env = SparkEnv.get
val uri = new URI(url)
- val conf = SparkHadoopUtil.newConfiguration()
+ val conf = env.hadoop.newConfiguration()
val fs = FileSystem.get(uri, conf)
val in = fs.open(new Path(uri))
val out = new FileOutputStream(tempFile)
@@ -351,48 +395,17 @@ private object Utils extends Logging {
retval
}
-/*
- // Used by DEBUG code : remove when all testing done
- private val ipPattern = Pattern.compile("^[0-9]+(\\.[0-9]+)*$")
def checkHost(host: String, message: String = "") {
- // Currently catches only ipv4 pattern, this is just a debugging tool - not rigourous !
- // if (host.matches("^[0-9]+(\\.[0-9]+)*$")) {
- if (ipPattern.matcher(host).matches()) {
- Utils.logErrorWithStack("Unexpected to have host " + host + " which matches IP pattern. Message " + message)
- }
- if (Utils.parseHostPort(host)._2 != 0){
- Utils.logErrorWithStack("Unexpected to have host " + host + " which has port in it. Message " + message)
- }
+ assert(host.indexOf(':') == -1, message)
}
- // Used by DEBUG code : remove when all testing done
def checkHostPort(hostPort: String, message: String = "") {
- val (host, port) = Utils.parseHostPort(hostPort)
- checkHost(host)
- if (port <= 0){
- Utils.logErrorWithStack("Unexpected to have port " + port + " which is not valid in " + hostPort + ". Message " + message)
- }
+ assert(hostPort.indexOf(':') != -1, message)
}
// Used by DEBUG code : remove when all testing done
def logErrorWithStack(msg: String) {
try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } }
- // temp code for debug
- System.exit(-1)
- }
-*/
-
- // Once testing is complete in various modes, replace with this ?
- def checkHost(host: String, message: String = "") {}
- def checkHostPort(hostPort: String, message: String = "") {}
-
- // Used by DEBUG code : remove when all testing done
- def logErrorWithStack(msg: String) {
- try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } }
- }
-
- def getUserNameFromEnvironment(): String = {
- SparkHadoopUtil.getUserNameFromEnvironment
}
// Typically, this will be of order of number of nodes in cluster
@@ -479,9 +492,9 @@ private object Utils extends Logging {
}
/**
- * Convert a memory quantity in bytes to a human-readable string such as "4.0 MB".
+ * Convert a quantity in bytes to a human-readable string such as "4.0 MB".
*/
- def memoryBytesToString(size: Long): String = {
+ def bytesToString(size: Long): String = {
val TB = 1L << 40
val GB = 1L << 30
val MB = 1L << 20
@@ -524,10 +537,10 @@ private object Utils extends Logging {
}
/**
- * Convert a memory quantity in megabytes to a human-readable string such as "4.0 MB".
+ * Convert a quantity in megabytes to a human-readable string such as "4.0 MB".
*/
- def memoryMegabytesToString(megabytes: Long): String = {
- memoryBytesToString(megabytes * 1024L * 1024L)
+ def megabytesToString(megabytes: Long): String = {
+ bytesToString(megabytes * 1024L * 1024L)
}
/**
@@ -596,7 +609,7 @@ private object Utils extends Logging {
output.toString
}
- /**
+ /**
* A regular expression to match classes of the "core" Spark API that we want to skip when
* finding the call site of a method.
*/
@@ -756,4 +769,13 @@ private object Utils extends Logging {
}
return buf
}
+
+ /* Calculates 'x' modulo 'mod', takes to consideration sign of x,
+ * i.e. if 'x' is negative, than 'x' % 'mod' is negative too
+ * so function return (x % mod) + mod in that case.
+ */
+ def nonNegativeMod(x: Int, mod: Int): Int = {
+ val rawMod = x % mod
+ rawMod + (if (rawMod < 0) mod else 0)
+ }
}
diff --git a/core/src/main/scala/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
index ed49386f18..fe710c58ac 100644
--- a/core/src/main/scala/spark/util/Vector.scala
+++ b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
class Vector(val elements: Array[Double]) extends Serializable {
def length = elements.length
@@ -73,7 +73,6 @@ class Vector(val elements: Array[Double]) extends Serializable {
def += (other: Vector): Vector = {
if (length != other.length)
throw new IllegalArgumentException("Vectors of different length")
- var ans = 0.0
var i = 0
while (i < length) {
elements(i) += other(i)
@@ -117,9 +116,7 @@ object Vector {
def apply(elements: Double*) = new Vector(elements.toArray)
def apply(length: Int, initializer: Int => Double): Vector = {
- val elements = new Array[Double](length)
- for (i <- 0 until length)
- elements(i) = initializer(i)
+ val elements: Array[Double] = Array.tabulate(length)(initializer)
return new Vector(elements)
}
@@ -133,7 +130,7 @@ object Vector {
implicit def doubleToMultiplier(num: Double) = new Multiplier(num)
- implicit object VectorAccumParam extends spark.AccumulatorParam[Vector] {
+ implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
def addInPlace(t1: Vector, t2: Vector) = t1 + t2
def zero(initialValue: Vector) = Vector.zeros(initialValue.length)
diff --git a/core/src/main/scala/spark/Cache.scala b/core/src/main/scala/spark/Cache.scala
deleted file mode 100644
index b0c83ce59d..0000000000
--- a/core/src/main/scala/spark/Cache.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark
-
-import java.util.concurrent.atomic.AtomicInteger
-
-private[spark] sealed trait CachePutResponse
-private[spark] case class CachePutSuccess(size: Long) extends CachePutResponse
-private[spark] case class CachePutFailure() extends CachePutResponse
-
-/**
- * An interface for caches in Spark, to allow for multiple implementations. Caches are used to store
- * both partitions of cached RDDs and broadcast variables on Spark executors. Caches are also aware
- * of which entries are part of the same dataset (for example, partitions in the same RDD). The key
- * for each value in a cache is a (datasetID, partition) pair.
- *
- * A single Cache instance gets created on each machine and is shared by all caches (i.e. both the
- * RDD split cache and the broadcast variable cache), to enable global replacement policies.
- * However, because these several independent modules all perform caching, it is important to give
- * them separate key namespaces, so that an RDD and a broadcast variable (for example) do not use
- * the same key. For this purpose, Cache has the notion of KeySpaces. Each client module must first
- * ask for a KeySpace, and then call get() and put() on that space using its own keys.
- *
- * This abstract class handles the creation of key spaces, so that subclasses need only deal with
- * keys that are unique across modules.
- */
-private[spark] abstract class Cache {
- private val nextKeySpaceId = new AtomicInteger(0)
- private def newKeySpaceId() = nextKeySpaceId.getAndIncrement()
-
- def newKeySpace() = new KeySpace(this, newKeySpaceId())
-
- /**
- * Get the value for a given (datasetId, partition), or null if it is not
- * found.
- */
- def get(datasetId: Any, partition: Int): Any
-
- /**
- * Attempt to put a value in the cache; returns CachePutFailure if this was
- * not successful (e.g. because the cache replacement policy forbids it), and
- * CachePutSuccess if successful. If size estimation is available, the cache
- * implementation should set the size field in CachePutSuccess.
- */
- def put(datasetId: Any, partition: Int, value: Any): CachePutResponse
-
- /**
- * Report the capacity of the cache partition. By default this just reports
- * zero. Specific implementations can choose to provide the capacity number.
- */
- def getCapacity: Long = 0L
-}
-
-/**
- * A key namespace in a Cache.
- */
-private[spark] class KeySpace(cache: Cache, val keySpaceId: Int) {
- def get(datasetId: Any, partition: Int): Any =
- cache.get((keySpaceId, datasetId), partition)
-
- def put(datasetId: Any, partition: Int, value: Any): CachePutResponse =
- cache.put((keySpaceId, datasetId), partition, value)
-
- def getCapacity: Long = cache.getCapacity
-}
diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala
deleted file mode 100644
index ee37da7948..0000000000
--- a/core/src/main/scala/spark/KryoSerializer.scala
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark
-
-import java.io._
-import java.nio.ByteBuffer
-import java.nio.channels.Channels
-
-import scala.collection.immutable
-import scala.collection.mutable
-
-import com.esotericsoftware.kryo._
-import com.esotericsoftware.kryo.{Serializer => KSerializer}
-import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
-import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
-import de.javakaffee.kryoserializers.KryoReflectionFactorySupport
-
-import serializer.{SerializerInstance, DeserializationStream, SerializationStream}
-import spark.broadcast._
-import spark.storage._
-
-private[spark]
-class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream {
-
- val output = new KryoOutput(outStream)
-
- def writeObject[T](t: T): SerializationStream = {
- kryo.writeClassAndObject(output, t)
- this
- }
-
- def flush() { output.flush() }
- def close() { output.close() }
-}
-
-private[spark]
-class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream {
-
- val input = new KryoInput(inStream)
-
- def readObject[T](): T = {
- try {
- kryo.readClassAndObject(input).asInstanceOf[T]
- } catch {
- // DeserializationStream uses the EOF exception to indicate stopping condition.
- case e: com.esotericsoftware.kryo.KryoException => throw new java.io.EOFException
- }
- }
-
- def close() {
- // Kryo's Input automatically closes the input stream it is using.
- input.close()
- }
-}
-
-private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
-
- val kryo = ks.kryo.get()
- val output = ks.output.get()
- val input = ks.input.get()
-
- def serialize[T](t: T): ByteBuffer = {
- output.clear()
- kryo.writeClassAndObject(output, t)
- ByteBuffer.wrap(output.toBytes)
- }
-
- def deserialize[T](bytes: ByteBuffer): T = {
- input.setBuffer(bytes.array)
- kryo.readClassAndObject(input).asInstanceOf[T]
- }
-
- def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = {
- val oldClassLoader = kryo.getClassLoader
- kryo.setClassLoader(loader)
- input.setBuffer(bytes.array)
- val obj = kryo.readClassAndObject(input).asInstanceOf[T]
- kryo.setClassLoader(oldClassLoader)
- obj
- }
-
- def serializeStream(s: OutputStream): SerializationStream = {
- new KryoSerializationStream(kryo, s)
- }
-
- def deserializeStream(s: InputStream): DeserializationStream = {
- new KryoDeserializationStream(kryo, s)
- }
-}
-
-/**
- * Interface implemented by clients to register their classes with Kryo when using Kryo
- * serialization.
- */
-trait KryoRegistrator {
- def registerClasses(kryo: Kryo): Unit
-}
-
-/**
- * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]].
- */
-class KryoSerializer extends spark.serializer.Serializer with Logging {
-
- val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024
-
- val kryo = new ThreadLocal[Kryo] {
- override def initialValue = createKryo()
- }
-
- val output = new ThreadLocal[KryoOutput] {
- override def initialValue = new KryoOutput(bufferSize)
- }
-
- val input = new ThreadLocal[KryoInput] {
- override def initialValue = new KryoInput(bufferSize)
- }
-
- def createKryo(): Kryo = {
- val kryo = new KryoReflectionFactorySupport()
-
- // Register some commonly used classes
- val toRegister: Seq[AnyRef] = Seq(
- // Arrays
- Array(1), Array(1.0), Array(1.0f), Array(1L), Array(""), Array(("", "")),
- Array(new java.lang.Object), Array(1.toByte), Array(true), Array('c'),
- // Specialized Tuple2s
- ("", ""), ("", 1), (1, 1), (1.0, 1.0), (1L, 1L),
- (1, 1.0), (1.0, 1), (1L, 1.0), (1.0, 1L), (1, 1L), (1L, 1),
- // Scala collections
- List(1), mutable.ArrayBuffer(1),
- // Options and Either
- Some(1), Left(1), Right(1),
- // Higher-dimensional tuples
- (1, 1, 1), (1, 1, 1, 1), (1, 1, 1, 1, 1),
- None,
- ByteBuffer.allocate(1),
- StorageLevel.MEMORY_ONLY,
- PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY),
- GotBlock("1", ByteBuffer.allocate(1)),
- GetBlock("1")
- )
- for (obj <- toRegister) {
- kryo.register(obj.getClass)
- }
-
- // Allow sending SerializableWritable
- kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
- kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
-
- // Register some commonly used Scala singleton objects. Because these
- // are singletons, we must return the exact same local object when we
- // deserialize rather than returning a clone as FieldSerializer would.
- class SingletonSerializer[T](obj: T) extends KSerializer[T] {
- override def write(kryo: Kryo, output: KryoOutput, obj: T) {}
- override def read(kryo: Kryo, input: KryoInput, cls: java.lang.Class[T]): T = obj
- }
- kryo.register(None.getClass, new SingletonSerializer[AnyRef](None))
- kryo.register(Nil.getClass, new SingletonSerializer[AnyRef](Nil))
-
- // Register maps with a special serializer since they have complex internal structure
- class ScalaMapSerializer(buildMap: Array[(Any, Any)] => scala.collection.Map[Any, Any])
- extends KSerializer[Array[(Any, Any)] => scala.collection.Map[Any, Any]] {
-
- //hack, look at https://groups.google.com/forum/#!msg/kryo-users/Eu5V4bxCfws/k-8UQ22y59AJ
- private final val FAKE_REFERENCE = new Object()
- override def write(
- kryo: Kryo,
- output: KryoOutput,
- obj: Array[(Any, Any)] => scala.collection.Map[Any, Any]) {
- val map = obj.asInstanceOf[scala.collection.Map[Any, Any]]
- output.writeInt(map.size)
- for ((k, v) <- map) {
- kryo.writeClassAndObject(output, k)
- kryo.writeClassAndObject(output, v)
- }
- }
- override def read (
- kryo: Kryo,
- input: KryoInput,
- cls: Class[Array[(Any, Any)] => scala.collection.Map[Any, Any]])
- : Array[(Any, Any)] => scala.collection.Map[Any, Any] = {
- kryo.reference(FAKE_REFERENCE)
- val size = input.readInt()
- val elems = new Array[(Any, Any)](size)
- for (i <- 0 until size) {
- val k = kryo.readClassAndObject(input)
- val v = kryo.readClassAndObject(input)
- elems(i)=(k,v)
- }
- buildMap(elems).asInstanceOf[Array[(Any, Any)] => scala.collection.Map[Any, Any]]
- }
- }
- kryo.register(mutable.HashMap().getClass, new ScalaMapSerializer(mutable.HashMap() ++ _))
- // TODO: add support for immutable maps too; this is more annoying because there are many
- // subclasses of immutable.Map for small maps (with <= 4 entries)
- val map1 = Map[Any, Any](1 -> 1)
- val map2 = Map[Any, Any](1 -> 1, 2 -> 2)
- val map3 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3)
- val map4 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4)
- val map5 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4, 5 -> 5)
- kryo.register(map1.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
- kryo.register(map2.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
- kryo.register(map3.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
- kryo.register(map4.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
- kryo.register(map5.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-
- // Allow the user to register their own classes by setting spark.kryo.registrator
- val regCls = System.getProperty("spark.kryo.registrator")
- if (regCls != null) {
- logInfo("Running user registrator: " + regCls)
- val classLoader = Thread.currentThread.getContextClassLoader
- val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
- reg.registerClasses(kryo)
- }
-
- // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops
- kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
-
- kryo
- }
-
- def newInstance(): SerializerInstance = {
- this.kryo.get().setClassLoader(Thread.currentThread().getContextClassLoader)
- new KryoSerializerInstance(this)
- }
-}
diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala
deleted file mode 100644
index e1f8aff6f5..0000000000
--- a/core/src/main/scala/spark/deploy/DeployMessage.scala
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.deploy
-
-import spark.deploy.ExecutorState.ExecutorState
-import spark.deploy.master.{WorkerInfo, ApplicationInfo}
-import spark.deploy.worker.ExecutorRunner
-import scala.collection.immutable.List
-import spark.Utils
-
-
-private[spark] sealed trait DeployMessage extends Serializable
-
-// Worker to Master
-
-private[spark]
-case class RegisterWorker(
- id: String,
- host: String,
- port: Int,
- cores: Int,
- memory: Int,
- webUiPort: Int,
- publicAddress: String)
- extends DeployMessage {
- Utils.checkHost(host, "Required hostname")
- assert (port > 0)
-}
-
-private[spark]
-case class ExecutorStateChanged(
- appId: String,
- execId: Int,
- state: ExecutorState,
- message: Option[String],
- exitStatus: Option[Int])
- extends DeployMessage
-
-private[spark] case class Heartbeat(workerId: String) extends DeployMessage
-
-// Master to Worker
-
-private[spark] case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage
-private[spark] case class RegisterWorkerFailed(message: String) extends DeployMessage
-private[spark] case class KillExecutor(appId: String, execId: Int) extends DeployMessage
-
-private[spark] case class LaunchExecutor(
- appId: String,
- execId: Int,
- appDesc: ApplicationDescription,
- cores: Int,
- memory: Int,
- sparkHome: String)
- extends DeployMessage
-
-// Client to Master
-
-private[spark] case class RegisterApplication(appDescription: ApplicationDescription)
- extends DeployMessage
-
-// Master to Client
-
-private[spark]
-case class RegisteredApplication(appId: String) extends DeployMessage
-
-private[spark]
-case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) {
- Utils.checkHostPort(hostPort, "Required hostport")
-}
-
-private[spark]
-case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String],
- exitStatus: Option[Int])
-
-private[spark]
-case class ApplicationRemoved(message: String)
-
-// Internal message in Client
-
-private[spark] case object StopClient
-
-// MasterWebUI To Master
-
-private[spark] case object RequestMasterState
-
-// Master to MasterWebUI
-
-private[spark]
-case class MasterState(host: String, port: Int, workers: Array[WorkerInfo],
- activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) {
-
- Utils.checkHost(host, "Required hostname")
- assert (port > 0)
-
- def uri = "spark://" + host + ":" + port
-}
-
-// WorkerWebUI to Worker
-private[spark] case object RequestWorkerState
-
-// Worker to WorkerWebUI
-
-private[spark]
-case class WorkerState(host: String, port: Int, workerId: String, executors: List[ExecutorRunner],
- finishedExecutors: List[ExecutorRunner], masterUrl: String, cores: Int, memory: Int,
- coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) {
-
- Utils.checkHost(host, "Required hostname")
- assert (port > 0)
-}
diff --git a/core/src/main/scala/spark/package.scala b/core/src/main/scala/spark/package.scala
deleted file mode 100644
index b244bfbf06..0000000000
--- a/core/src/main/scala/spark/package.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Core Spark functionality. [[spark.SparkContext]] serves as the main entry point to Spark, while
- * [[spark.RDD]] is the data type representing a distributed collection, and provides most
- * parallel operations.
- *
- * In addition, [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value
- * pairs, such as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations
- * available only on RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations
- * available on RDDs that can be saved as SequenceFiles. These operations are automatically
- * available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit conversions when
- * you `import spark.SparkContext._`.
- */
-package object spark {
- // For package docs only
-}
diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
deleted file mode 100644
index 2b5bf18541..0000000000
--- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.rdd
-
-import spark.{Dependency, OneToOneDependency, NarrowDependency, RDD, Partition, TaskContext}
-import java.io.{ObjectOutputStream, IOException}
-
-private[spark] case class CoalescedRDDPartition(
- index: Int,
- @transient rdd: RDD[_],
- parentsIndices: Array[Int]
- ) extends Partition {
- var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
-
- @throws(classOf[IOException])
- private def writeObject(oos: ObjectOutputStream) {
- // Update the reference to parent split at the time of task serialization
- parents = parentsIndices.map(rdd.partitions(_))
- oos.defaultWriteObject()
- }
-}
-
-/**
- * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of
- * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the
- * parent had more than this many partitions, or fewer if the parent had fewer.
- *
- * This transformation is useful when an RDD with many partitions gets filtered into a smaller one,
- * or to avoid having a large number of small tasks when processing a directory with many files.
- */
-class CoalescedRDD[T: ClassManifest](
- @transient var prev: RDD[T],
- maxPartitions: Int)
- extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies
-
- override def getPartitions: Array[Partition] = {
- val prevSplits = prev.partitions
- if (prevSplits.length < maxPartitions) {
- prevSplits.map(_.index).map{idx => new CoalescedRDDPartition(idx, prev, Array(idx)) }
- } else {
- (0 until maxPartitions).map { i =>
- val rangeStart = ((i.toLong * prevSplits.length) / maxPartitions).toInt
- val rangeEnd = (((i.toLong + 1) * prevSplits.length) / maxPartitions).toInt
- new CoalescedRDDPartition(i, prev, (rangeStart until rangeEnd).toArray)
- }.toArray
- }
- }
-
- override def compute(split: Partition, context: TaskContext): Iterator[T] = {
- split.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentSplit =>
- firstParent[T].iterator(parentSplit, context)
- }
- }
-
- override def getDependencies: Seq[Dependency[_]] = {
- Seq(new NarrowDependency(prev) {
- def getParents(id: Int): Seq[Int] =
- partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices
- })
- }
-
- override def clearDependencies() {
- super.clearDependencies()
- prev = null
- }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
deleted file mode 100644
index 7c10074dc7..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ /dev/null
@@ -1,631 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import java.lang.{Boolean => JBoolean}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-
-import spark._
-import spark.TaskState.TaskState
-import spark.scheduler._
-import java.nio.ByteBuffer
-import java.util.concurrent.atomic.AtomicLong
-import java.util.{TimerTask, Timer}
-
-/**
- * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call
- * start(), then submit task sets through the runTasks method.
- */
-private[spark] class ClusterScheduler(val sc: SparkContext)
- extends TaskScheduler
- with Logging {
-
- // How often to check for speculative tasks
- val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
- // Threshold above which we warn user initial TaskSet may be starved
- val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
- // How often to revive offers in case there are pending tasks - that is how often to try to get
- // tasks scheduled in case there are nodes available : default 0 is to disable it - to preserve existing behavior
- // Note that this is required due to delayed scheduling due to data locality waits, etc.
- // TODO: rename property ?
- val TASK_REVIVAL_INTERVAL = System.getProperty("spark.tasks.revive.interval", "0").toLong
-
- /*
- This property controls how aggressive we should be to modulate waiting for node local task scheduling.
- To elaborate, currently there is a time limit (3 sec def) to ensure that spark attempts to wait for node locality of tasks before
- scheduling on other nodes. We have modified this in yarn branch such that offers to task set happen in prioritized order :
- node-local, rack-local and then others
- But once all available node local (and no pref) tasks are scheduled, instead of waiting for 3 sec before
- scheduling to other nodes (which degrades performance for time sensitive tasks and on larger clusters), we can
- modulate that : to also allow rack local nodes or any node. The default is still set to HOST - so that previous behavior is
- maintained. This is to allow tuning the tension between pulling rdd data off node and scheduling computation asap.
-
- TODO: rename property ? The value is one of
- - NODE_LOCAL (default, no change w.r.t current behavior),
- - RACK_LOCAL and
- - ANY
-
- Note that this property makes more sense when used in conjugation with spark.tasks.revive.interval > 0 : else it is not very effective.
-
- Additional Note: For non trivial clusters, there is a 4x - 5x reduction in running time (in some of our experiments) based on whether
- it is left at default NODE_LOCAL, RACK_LOCAL (if cluster is configured to be rack aware) or ANY.
- If cluster is rack aware, then setting it to RACK_LOCAL gives best tradeoff and a 3x - 4x performance improvement while minimizing IO impact.
- Also, it brings down the variance in running time drastically.
- */
- val TASK_SCHEDULING_AGGRESSION = TaskLocality.parse(System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL"))
-
- val activeTaskSets = new HashMap[String, TaskSetManager]
-
- val taskIdToTaskSetId = new HashMap[Long, String]
- val taskIdToExecutorId = new HashMap[Long, String]
- val taskSetTaskIds = new HashMap[String, HashSet[Long]]
-
- @volatile private var hasReceivedTask = false
- @volatile private var hasLaunchedTask = false
- private val starvationTimer = new Timer(true)
-
- // Incrementing Mesos task IDs
- val nextTaskId = new AtomicLong(0)
-
- // Which executor IDs we have executors on
- val activeExecutorIds = new HashSet[String]
-
- // TODO: We might want to remove this and merge it with execId datastructures - but later.
- // Which hosts in the cluster are alive (contains hostPort's) - used for process local and node local task locality.
- private val hostPortsAlive = new HashSet[String]
- private val hostToAliveHostPorts = new HashMap[String, HashSet[String]]
-
- // The set of executors we have on each host; this is used to compute hostsAlive, which
- // in turn is used to decide when we can attain data locality on a given host
- private val executorsByHostPort = new HashMap[String, HashSet[String]]
-
- private val executorIdToHostPort = new HashMap[String, String]
-
- // JAR server, if any JARs were added by the user to the SparkContext
- var jarServer: HttpServer = null
-
- // URIs of JARs to pass to executor
- var jarUris: String = ""
-
- // Listener object to pass upcalls into
- var listener: TaskSchedulerListener = null
-
- var backend: SchedulerBackend = null
-
- val mapOutputTracker = SparkEnv.get.mapOutputTracker
-
- var schedulableBuilder: SchedulableBuilder = null
- var rootPool: Pool = null
-
- override def setListener(listener: TaskSchedulerListener) {
- this.listener = listener
- }
-
- def initialize(context: SchedulerBackend) {
- backend = context
- //default scheduler is FIFO
- val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO")
- //temporarily set rootPool name to empty
- rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0)
- schedulableBuilder = {
- schedulingMode match {
- case "FIFO" =>
- new FIFOSchedulableBuilder(rootPool)
- case "FAIR" =>
- new FairSchedulableBuilder(rootPool)
- }
- }
- schedulableBuilder.buildPools()
- // resolve executorId to hostPort mapping.
- def executorToHostPort(executorId: String, defaultHostPort: String): String = {
- executorIdToHostPort.getOrElse(executorId, defaultHostPort)
- }
-
- // Unfortunately, this means that SparkEnv is indirectly referencing ClusterScheduler
- // Will that be a design violation ?
- SparkEnv.get.executorIdToHostPort = Some(executorToHostPort)
- }
-
-
- def newTaskId(): Long = nextTaskId.getAndIncrement()
-
- override def start() {
- backend.start()
-
- if (JBoolean.getBoolean("spark.speculation")) {
- new Thread("ClusterScheduler speculation check") {
- setDaemon(true)
-
- override def run() {
- logInfo("Starting speculative execution thread")
- while (true) {
- try {
- Thread.sleep(SPECULATION_INTERVAL)
- } catch {
- case e: InterruptedException => {}
- }
- checkSpeculatableTasks()
- }
- }
- }.start()
- }
-
-
- // Change to always run with some default if TASK_REVIVAL_INTERVAL <= 0 ?
- if (TASK_REVIVAL_INTERVAL > 0) {
- new Thread("ClusterScheduler task offer revival check") {
- setDaemon(true)
-
- override def run() {
- logInfo("Starting speculative task offer revival thread")
- while (true) {
- try {
- Thread.sleep(TASK_REVIVAL_INTERVAL)
- } catch {
- case e: InterruptedException => {}
- }
-
- if (hasPendingTasks()) backend.reviveOffers()
- }
- }
- }.start()
- }
- }
-
- override def submitTasks(taskSet: TaskSet) {
- val tasks = taskSet.tasks
- logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
- this.synchronized {
- val manager = new ClusterTaskSetManager(this, taskSet)
- activeTaskSets(taskSet.id) = manager
- schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
- taskSetTaskIds(taskSet.id) = new HashSet[Long]()
-
- if (hasReceivedTask == false) {
- starvationTimer.scheduleAtFixedRate(new TimerTask() {
- override def run() {
- if (!hasLaunchedTask) {
- logWarning("Initial job has not accepted any resources; " +
- "check your cluster UI to ensure that workers are registered")
- } else {
- this.cancel()
- }
- }
- }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
- }
- hasReceivedTask = true;
- }
- backend.reviveOffers()
- }
-
- def taskSetFinished(manager: TaskSetManager) {
- this.synchronized {
- activeTaskSets -= manager.taskSet.id
- manager.parent.removeSchedulable(manager)
- logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
- taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
- taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
- taskSetTaskIds.remove(manager.taskSet.id)
- }
- }
-
- /**
- * Called by cluster manager to offer resources on slaves. We respond by asking our active task
- * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
- * that tasks are balanced across the cluster.
- */
- def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = {
- synchronized {
- SparkEnv.set(sc.env)
- // Mark each slave as alive and remember its hostname
- for (o <- offers) {
- // DEBUG Code
- Utils.checkHostPort(o.hostPort)
-
- executorIdToHostPort(o.executorId) = o.hostPort
- if (! executorsByHostPort.contains(o.hostPort)) {
- executorsByHostPort(o.hostPort) = new HashSet[String]()
- }
-
- hostPortsAlive += o.hostPort
- hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(o.hostPort)._1, new HashSet[String]).add(o.hostPort)
- executorGained(o.executorId, o.hostPort)
- }
- // Build a list of tasks to assign to each slave
- val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
- // merge availableCpus into nodeToAvailableCpus block ?
- val availableCpus = offers.map(o => o.cores).toArray
- val nodeToAvailableCpus = {
- val map = new HashMap[String, Int]()
- for (offer <- offers) {
- val hostPort = offer.hostPort
- val cores = offer.cores
- // DEBUG code
- Utils.checkHostPort(hostPort)
-
- val host = Utils.parseHostPort(hostPort)._1
-
- map.put(host, map.getOrElse(host, 0) + cores)
- }
-
- map
- }
- var launchedTask = false
- val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue()
- for (manager <- sortedTaskSetQueue)
- {
- logInfo("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks))
- }
- for (manager <- sortedTaskSetQueue) {
-
- // Split offers based on node local, rack local and off-rack tasks.
- val processLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
- val nodeLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
- val rackLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
- val otherOffers = new HashMap[String, ArrayBuffer[Int]]()
-
- for (i <- 0 until offers.size) {
- val hostPort = offers(i).hostPort
- // DEBUG code
- Utils.checkHostPort(hostPort)
-
- val numProcessLocalTasks = math.max(0, math.min(manager.numPendingTasksForHostPort(hostPort), availableCpus(i)))
- if (numProcessLocalTasks > 0){
- val list = processLocalOffers.getOrElseUpdate(hostPort, new ArrayBuffer[Int])
- for (j <- 0 until numProcessLocalTasks) list += i
- }
-
- val host = Utils.parseHostPort(hostPort)._1
- val numNodeLocalTasks = math.max(0,
- // Remove process local tasks (which are also host local btw !) from this
- math.min(manager.numPendingTasksForHost(hostPort) - numProcessLocalTasks, nodeToAvailableCpus(host)))
- if (numNodeLocalTasks > 0){
- val list = nodeLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
- for (j <- 0 until numNodeLocalTasks) list += i
- }
-
- val numRackLocalTasks = math.max(0,
- // Remove node local tasks (which are also rack local btw !) from this
- math.min(manager.numRackLocalPendingTasksForHost(hostPort) - numProcessLocalTasks - numNodeLocalTasks, nodeToAvailableCpus(host)))
- if (numRackLocalTasks > 0){
- val list = rackLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
- for (j <- 0 until numRackLocalTasks) list += i
- }
- if (numNodeLocalTasks <= 0 && numRackLocalTasks <= 0){
- // add to others list - spread even this across cluster.
- val list = otherOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
- list += i
- }
- }
-
- val offersPriorityList = new ArrayBuffer[Int](
- processLocalOffers.size + nodeLocalOffers.size + rackLocalOffers.size + otherOffers.size)
-
- // First process local, then host local, then rack, then others
-
- // numNodeLocalOffers contains count of both process local and host offers.
- val numNodeLocalOffers = {
- val processLocalPriorityList = ClusterScheduler.prioritizeContainers(processLocalOffers)
- offersPriorityList ++= processLocalPriorityList
-
- val nodeLocalPriorityList = ClusterScheduler.prioritizeContainers(nodeLocalOffers)
- offersPriorityList ++= nodeLocalPriorityList
-
- processLocalPriorityList.size + nodeLocalPriorityList.size
- }
- val numRackLocalOffers = {
- val rackLocalPriorityList = ClusterScheduler.prioritizeContainers(rackLocalOffers)
- offersPriorityList ++= rackLocalPriorityList
- rackLocalPriorityList.size
- }
- offersPriorityList ++= ClusterScheduler.prioritizeContainers(otherOffers)
-
- var lastLoop = false
- val lastLoopIndex = TASK_SCHEDULING_AGGRESSION match {
- case TaskLocality.NODE_LOCAL => numNodeLocalOffers
- case TaskLocality.RACK_LOCAL => numRackLocalOffers + numNodeLocalOffers
- case TaskLocality.ANY => offersPriorityList.size
- }
-
- do {
- launchedTask = false
- var loopCount = 0
- for (i <- offersPriorityList) {
- val execId = offers(i).executorId
- val hostPort = offers(i).hostPort
-
- // If last loop and within the lastLoopIndex, expand scope - else use null (which will use default/existing)
- val overrideLocality = if (lastLoop && loopCount < lastLoopIndex) TASK_SCHEDULING_AGGRESSION else null
-
- // If last loop, override waiting for host locality - we scheduled all local tasks already and there might be more available ...
- loopCount += 1
-
- manager.slaveOffer(execId, hostPort, availableCpus(i), overrideLocality) match {
- case Some(task) =>
- tasks(i) += task
- val tid = task.taskId
- taskIdToTaskSetId(tid) = manager.taskSet.id
- taskSetTaskIds(manager.taskSet.id) += tid
- taskIdToExecutorId(tid) = execId
- activeExecutorIds += execId
- executorsByHostPort(hostPort) += execId
- availableCpus(i) -= 1
- launchedTask = true
-
- case None => {}
- }
- }
- // Loop once more - when lastLoop = true, then we try to schedule task on all nodes irrespective of
- // data locality (we still go in order of priority : but that would not change anything since
- // if data local tasks had been available, we would have scheduled them already)
- if (lastLoop) {
- // prevent more looping
- launchedTask = false
- } else if (!lastLoop && !launchedTask) {
- // Do this only if TASK_SCHEDULING_AGGRESSION != NODE_LOCAL
- if (TASK_SCHEDULING_AGGRESSION != TaskLocality.NODE_LOCAL) {
- // fudge launchedTask to ensure we loop once more
- launchedTask = true
- // dont loop anymore
- lastLoop = true
- }
- }
- } while (launchedTask)
- }
-
- if (tasks.size > 0) {
- hasLaunchedTask = true
- }
- return tasks
- }
- }
-
- def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
- var taskSetToUpdate: Option[TaskSetManager] = None
- var failedExecutor: Option[String] = None
- var taskFailed = false
- synchronized {
- try {
- if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
- // We lost this entire executor, so remember that it's gone
- val execId = taskIdToExecutorId(tid)
- if (activeExecutorIds.contains(execId)) {
- removeExecutor(execId)
- failedExecutor = Some(execId)
- }
- }
- taskIdToTaskSetId.get(tid) match {
- case Some(taskSetId) =>
- if (activeTaskSets.contains(taskSetId)) {
- taskSetToUpdate = Some(activeTaskSets(taskSetId))
- }
- if (TaskState.isFinished(state)) {
- taskIdToTaskSetId.remove(tid)
- if (taskSetTaskIds.contains(taskSetId)) {
- taskSetTaskIds(taskSetId) -= tid
- }
- taskIdToExecutorId.remove(tid)
- }
- if (state == TaskState.FAILED) {
- taskFailed = true
- }
- case None =>
- logInfo("Ignoring update from TID " + tid + " because its task set is gone")
- }
- } catch {
- case e: Exception => logError("Exception in statusUpdate", e)
- }
- }
- // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock
- if (taskSetToUpdate != None) {
- taskSetToUpdate.get.statusUpdate(tid, state, serializedData)
- }
- if (failedExecutor != None) {
- listener.executorLost(failedExecutor.get)
- backend.reviveOffers()
- }
- if (taskFailed) {
-
- // Also revive offers if a task had failed for some reason other than host lost
- backend.reviveOffers()
- }
- }
-
- def error(message: String) {
- synchronized {
- if (activeTaskSets.size > 0) {
- // Have each task set throw a SparkException with the error
- for ((taskSetId, manager) <- activeTaskSets) {
- try {
- manager.error(message)
- } catch {
- case e: Exception => logError("Exception in error callback", e)
- }
- }
- } else {
- // No task sets are active but we still got an error. Just exit since this
- // must mean the error is during registration.
- // It might be good to do something smarter here in the future.
- logError("Exiting due to error from cluster scheduler: " + message)
- System.exit(1)
- }
- }
- }
-
- override def stop() {
- if (backend != null) {
- backend.stop()
- }
- if (jarServer != null) {
- jarServer.stop()
- }
-
- // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out.
- // TODO: Do something better !
- Thread.sleep(5000L)
- }
-
- override def defaultParallelism() = backend.defaultParallelism()
-
-
- // Check for speculatable tasks in all our active jobs.
- def checkSpeculatableTasks() {
- var shouldRevive = false
- synchronized {
- shouldRevive = rootPool.checkSpeculatableTasks()
- }
- if (shouldRevive) {
- backend.reviveOffers()
- }
- }
-
- // Check for pending tasks in all our active jobs.
- def hasPendingTasks(): Boolean = {
- synchronized {
- rootPool.hasPendingTasks()
- }
- }
-
- def executorLost(executorId: String, reason: ExecutorLossReason) {
- var failedExecutor: Option[String] = None
-
- synchronized {
- if (activeExecutorIds.contains(executorId)) {
- val hostPort = executorIdToHostPort(executorId)
- logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason))
- removeExecutor(executorId)
- failedExecutor = Some(executorId)
- } else {
- // We may get multiple executorLost() calls with different loss reasons. For example, one
- // may be triggered by a dropped connection from the slave while another may be a report
- // of executor termination from Mesos. We produce log messages for both so we eventually
- // report the termination reason.
- logError("Lost an executor " + executorId + " (already removed): " + reason)
- }
- }
- // Call listener.executorLost without holding the lock on this to prevent deadlock
- if (failedExecutor != None) {
- listener.executorLost(failedExecutor.get)
- backend.reviveOffers()
- }
- }
-
- /** Remove an executor from all our data structures and mark it as lost */
- private def removeExecutor(executorId: String) {
- activeExecutorIds -= executorId
- val hostPort = executorIdToHostPort(executorId)
- if (hostPortsAlive.contains(hostPort)) {
- // DEBUG Code
- Utils.checkHostPort(hostPort)
-
- hostPortsAlive -= hostPort
- hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(hostPort)._1, new HashSet[String]).remove(hostPort)
- }
-
- val execs = executorsByHostPort.getOrElse(hostPort, new HashSet)
- execs -= executorId
- if (execs.isEmpty) {
- executorsByHostPort -= hostPort
- }
- executorIdToHostPort -= executorId
- rootPool.executorLost(executorId, hostPort)
- }
-
- def executorGained(execId: String, hostPort: String) {
- listener.executorGained(execId, hostPort)
- }
-
- def getExecutorsAliveOnHost(host: String): Option[Set[String]] = {
- Utils.checkHost(host)
-
- val retval = hostToAliveHostPorts.get(host)
- if (retval.isDefined) {
- return Some(retval.get.toSet)
- }
-
- None
- }
-
- def isExecutorAliveOnHostPort(hostPort: String): Boolean = {
- // Even if hostPort is a host, it does not matter - it is just a specific check.
- // But we do have to ensure that only hostPort get into hostPortsAlive !
- // So no check against Utils.checkHostPort
- hostPortsAlive.contains(hostPort)
- }
-
- // By default, rack is unknown
- def getRackForHost(value: String): Option[String] = None
-
- // By default, (cached) hosts for rack is unknown
- def getCachedHostsForRack(rack: String): Option[Set[String]] = None
-}
-
-
-object ClusterScheduler {
-
- // Used to 'spray' available containers across the available set to ensure too many containers on same host
- // are not used up. Used in yarn mode and in task scheduling (when there are multiple containers available
- // to execute a task)
- // For example: yarn can returns more containers than we would have requested under ANY, this method
- // prioritizes how to use the allocated containers.
- // flatten the map such that the array buffer entries are spread out across the returned value.
- // given <host, list[container]> == <h1, [c1 .. c5]>, <h2, [c1 .. c3]>, <h3, [c1, c2]>, <h4, c1>, <h5, c1>, i
- // the return value would be something like : h1c1, h2c1, h3c1, h4c1, h5c1, h1c2, h2c2, h3c2, h1c3, h2c3, h1c4, h1c5
- // We then 'use' the containers in this order (consuming only the top K from this list where
- // K = number to be user). This is to ensure that if we have multiple eligible allocations,
- // they dont end up allocating all containers on a small number of hosts - increasing probability of
- // multiple container failure when a host goes down.
- // Note, there is bias for keys with higher number of entries in value to be picked first (by design)
- // Also note that invocation of this method is expected to have containers of same 'type'
- // (host-local, rack-local, off-rack) and not across types : so that reordering is simply better from
- // the available list - everything else being same.
- // That is, we we first consume data local, then rack local and finally off rack nodes. So the
- // prioritization from this method applies to within each category
- def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = {
- val _keyList = new ArrayBuffer[K](map.size)
- _keyList ++= map.keys
-
- // order keyList based on population of value in map
- val keyList = _keyList.sortWith(
- (left, right) => map.get(left).getOrElse(Set()).size > map.get(right).getOrElse(Set()).size
- )
-
- val retval = new ArrayBuffer[T](keyList.size * 2)
- var index = 0
- var found = true
-
- while (found){
- found = false
- for (key <- keyList) {
- val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null)
- assert(containerList != null)
- // Get the index'th entry for this host - if present
- if (index < containerList.size){
- retval += containerList.apply(index)
- found = true
- }
- }
- index += 1
- }
-
- retval.toList
- }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala
deleted file mode 100644
index 3d06520675..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala
+++ /dev/null
@@ -1,765 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import java.util.{HashMap => JHashMap, NoSuchElementException, Arrays}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.math.max
-import scala.math.min
-
-import spark._
-import spark.scheduler._
-import spark.TaskState.TaskState
-import java.nio.ByteBuffer
-
-private[spark] object TaskLocality extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") with Logging {
-
- // process local is expected to be used ONLY within tasksetmanager for now.
- val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
-
- type TaskLocality = Value
-
- def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = {
-
- // Must not be the constraint.
- assert (constraint != TaskLocality.PROCESS_LOCAL)
-
- constraint match {
- case TaskLocality.NODE_LOCAL => condition == TaskLocality.NODE_LOCAL
- case TaskLocality.RACK_LOCAL => condition == TaskLocality.NODE_LOCAL || condition == TaskLocality.RACK_LOCAL
- // For anything else, allow
- case _ => true
- }
- }
-
- def parse(str: String): TaskLocality = {
- // better way to do this ?
- try {
- val retval = TaskLocality.withName(str)
- // Must not specify PROCESS_LOCAL !
- assert (retval != TaskLocality.PROCESS_LOCAL)
-
- retval
- } catch {
- case nEx: NoSuchElementException => {
- logWarning("Invalid task locality specified '" + str + "', defaulting to NODE_LOCAL");
- // default to preserve earlier behavior
- NODE_LOCAL
- }
- }
- }
-}
-
-/**
- * Schedules the tasks within a single TaskSet in the ClusterScheduler.
- */
-private[spark] class ClusterTaskSetManager(
- sched: ClusterScheduler,
- val taskSet: TaskSet)
- extends TaskSetManager
- with Logging {
-
- // Maximum time to wait to run a task in a preferred location (in ms)
- val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
-
- // CPUs to request per task
- val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble
-
- // Maximum times a task is allowed to fail before failing the job
- val MAX_TASK_FAILURES = 4
-
- // Quantile of tasks at which to start speculation
- val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
- val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble
-
- // Serializer for closures and tasks.
- val ser = SparkEnv.get.closureSerializer.newInstance()
-
- val tasks = taskSet.tasks
- val numTasks = tasks.length
- val copiesRunning = new Array[Int](numTasks)
- val finished = new Array[Boolean](numTasks)
- val numFailures = new Array[Int](numTasks)
- val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
- var tasksFinished = 0
-
- var weight = 1
- var minShare = 0
- var runningTasks = 0
- var priority = taskSet.priority
- var stageId = taskSet.stageId
- var name = "TaskSet_"+taskSet.stageId.toString
- var parent:Schedulable = null
-
- // Last time when we launched a preferred task (for delay scheduling)
- var lastPreferredLaunchTime = System.currentTimeMillis
-
- // List of pending tasks for each node (process local to container). These collections are actually
- // treated as stacks, in which new tasks are added to the end of the
- // ArrayBuffer and removed from the end. This makes it faster to detect
- // tasks that repeatedly fail because whenever a task failed, it is put
- // back at the head of the stack. They are also only cleaned up lazily;
- // when a task is launched, it remains in all the pending lists except
- // the one that it was launched from, but gets removed from them later.
- private val pendingTasksForHostPort = new HashMap[String, ArrayBuffer[Int]]
-
- // List of pending tasks for each node.
- // Essentially, similar to pendingTasksForHostPort, except at host level
- private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
-
- // List of pending tasks for each node based on rack locality.
- // Essentially, similar to pendingTasksForHost, except at rack level
- private val pendingRackLocalTasksForHost = new HashMap[String, ArrayBuffer[Int]]
-
- // List containing pending tasks with no locality preferences
- val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
-
- // List containing all pending tasks (also used as a stack, as above)
- val allPendingTasks = new ArrayBuffer[Int]
-
- // Tasks that can be speculated. Since these will be a small fraction of total
- // tasks, we'll just hold them in a HashSet.
- val speculatableTasks = new HashSet[Int]
-
- // Task index, start and finish time for each task attempt (indexed by task ID)
- val taskInfos = new HashMap[Long, TaskInfo]
-
- // Did the job fail?
- var failed = false
- var causeOfFailure = ""
-
- // How frequently to reprint duplicate exceptions in full, in milliseconds
- val EXCEPTION_PRINT_INTERVAL =
- System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong
- // Map of recent exceptions (identified by string representation and
- // top stack frame) to duplicate count (how many times the same
- // exception has appeared) and time the full exception was
- // printed. This should ideally be an LRU map that can drop old
- // exceptions automatically.
- val recentExceptions = HashMap[String, (Int, Long)]()
-
- // Figure out the current map output tracker generation and set it on all tasks
- val generation = sched.mapOutputTracker.getGeneration
- logDebug("Generation for " + taskSet.id + ": " + generation)
- for (t <- tasks) {
- t.generation = generation
- }
-
- // Add all our tasks to the pending lists. We do this in reverse order
- // of task index so that tasks with low indices get launched first.
- for (i <- (0 until numTasks).reverse) {
- addPendingTask(i)
- }
-
- // Note that it follows the hierarchy.
- // if we search for NODE_LOCAL, the output will include PROCESS_LOCAL and
- // if we search for RACK_LOCAL, it will include PROCESS_LOCAL & NODE_LOCAL
- private def findPreferredLocations(_taskPreferredLocations: Seq[String], scheduler: ClusterScheduler,
- taskLocality: TaskLocality.TaskLocality): HashSet[String] = {
-
- if (TaskLocality.PROCESS_LOCAL == taskLocality) {
- // straight forward comparison ! Special case it.
- val retval = new HashSet[String]()
- scheduler.synchronized {
- for (location <- _taskPreferredLocations) {
- if (scheduler.isExecutorAliveOnHostPort(location)) {
- retval += location
- }
- }
- }
-
- return retval
- }
-
- val taskPreferredLocations =
- if (TaskLocality.NODE_LOCAL == taskLocality) {
- _taskPreferredLocations
- } else {
- assert (TaskLocality.RACK_LOCAL == taskLocality)
- // Expand set to include all 'seen' rack local hosts.
- // This works since container allocation/management happens within master - so any rack locality information is updated in msater.
- // Best case effort, and maybe sort of kludge for now ... rework it later ?
- val hosts = new HashSet[String]
- _taskPreferredLocations.foreach(h => {
- val rackOpt = scheduler.getRackForHost(h)
- if (rackOpt.isDefined) {
- val hostsOpt = scheduler.getCachedHostsForRack(rackOpt.get)
- if (hostsOpt.isDefined) {
- hosts ++= hostsOpt.get
- }
- }
-
- // Ensure that irrespective of what scheduler says, host is always added !
- hosts += h
- })
-
- hosts
- }
-
- val retval = new HashSet[String]
- scheduler.synchronized {
- for (prefLocation <- taskPreferredLocations) {
- val aliveLocationsOpt = scheduler.getExecutorsAliveOnHost(Utils.parseHostPort(prefLocation)._1)
- if (aliveLocationsOpt.isDefined) {
- retval ++= aliveLocationsOpt.get
- }
- }
- }
-
- retval
- }
-
- // Add a task to all the pending-task lists that it should be on.
- private def addPendingTask(index: Int) {
- // We can infer hostLocalLocations from rackLocalLocations by joining it against tasks(index).preferredLocations (with appropriate
- // hostPort <-> host conversion). But not doing it for simplicity sake. If this becomes a performance issue, modify it.
- val processLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.PROCESS_LOCAL)
- val hostLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
- val rackLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
-
- if (rackLocalLocations.size == 0) {
- // Current impl ensures this.
- assert (processLocalLocations.size == 0)
- assert (hostLocalLocations.size == 0)
- pendingTasksWithNoPrefs += index
- } else {
-
- // process local locality
- for (hostPort <- processLocalLocations) {
- // DEBUG Code
- Utils.checkHostPort(hostPort)
-
- val hostPortList = pendingTasksForHostPort.getOrElseUpdate(hostPort, ArrayBuffer())
- hostPortList += index
- }
-
- // host locality (includes process local)
- for (hostPort <- hostLocalLocations) {
- // DEBUG Code
- Utils.checkHostPort(hostPort)
-
- val host = Utils.parseHostPort(hostPort)._1
- val hostList = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer())
- hostList += index
- }
-
- // rack locality (includes process local and host local)
- for (rackLocalHostPort <- rackLocalLocations) {
- // DEBUG Code
- Utils.checkHostPort(rackLocalHostPort)
-
- val rackLocalHost = Utils.parseHostPort(rackLocalHostPort)._1
- val list = pendingRackLocalTasksForHost.getOrElseUpdate(rackLocalHost, ArrayBuffer())
- list += index
- }
- }
-
- allPendingTasks += index
- }
-
- // Return the pending tasks list for a given host port (process local), or an empty list if
- // there is no map entry for that host
- private def getPendingTasksForHostPort(hostPort: String): ArrayBuffer[Int] = {
- // DEBUG Code
- Utils.checkHostPort(hostPort)
- pendingTasksForHostPort.getOrElse(hostPort, ArrayBuffer())
- }
-
- // Return the pending tasks list for a given host, or an empty list if
- // there is no map entry for that host
- private def getPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = {
- val host = Utils.parseHostPort(hostPort)._1
- pendingTasksForHost.getOrElse(host, ArrayBuffer())
- }
-
- // Return the pending tasks (rack level) list for a given host, or an empty list if
- // there is no map entry for that host
- private def getRackLocalPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = {
- val host = Utils.parseHostPort(hostPort)._1
- pendingRackLocalTasksForHost.getOrElse(host, ArrayBuffer())
- }
-
- // Number of pending tasks for a given host Port (which would be process local)
- def numPendingTasksForHostPort(hostPort: String): Int = {
- getPendingTasksForHostPort(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
- }
-
- // Number of pending tasks for a given host (which would be data local)
- def numPendingTasksForHost(hostPort: String): Int = {
- getPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
- }
-
- // Number of pending rack local tasks for a given host
- def numRackLocalPendingTasksForHost(hostPort: String): Int = {
- getRackLocalPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
- }
-
-
- // Dequeue a pending task from the given list and return its index.
- // Return None if the list is empty.
- // This method also cleans up any tasks in the list that have already
- // been launched, since we want that to happen lazily.
- private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
- while (!list.isEmpty) {
- val index = list.last
- list.trimEnd(1)
- if (copiesRunning(index) == 0 && !finished(index)) {
- return Some(index)
- }
- }
- return None
- }
-
- // Return a speculative task for a given host if any are available. The task should not have an
- // attempt running on this host, in case the host is slow. In addition, if locality is set, the
- // task must have a preference for this host/rack/no preferred locations at all.
- private def findSpeculativeTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = {
-
- assert (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL))
- speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
-
- if (speculatableTasks.size > 0) {
- val localTask = speculatableTasks.find {
- index =>
- val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
- val attemptLocs = taskAttempts(index).map(_.hostPort)
- (locations.size == 0 || locations.contains(hostPort)) && !attemptLocs.contains(hostPort)
- }
-
- if (localTask != None) {
- speculatableTasks -= localTask.get
- return localTask
- }
-
- // check for rack locality
- if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
- val rackTask = speculatableTasks.find {
- index =>
- val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
- val attemptLocs = taskAttempts(index).map(_.hostPort)
- locations.contains(hostPort) && !attemptLocs.contains(hostPort)
- }
-
- if (rackTask != None) {
- speculatableTasks -= rackTask.get
- return rackTask
- }
- }
-
- // Any task ...
- if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
- // Check for attemptLocs also ?
- val nonLocalTask = speculatableTasks.find(i => !taskAttempts(i).map(_.hostPort).contains(hostPort))
- if (nonLocalTask != None) {
- speculatableTasks -= nonLocalTask.get
- return nonLocalTask
- }
- }
- }
- return None
- }
-
- // Dequeue a pending task for a given node and return its index.
- // If localOnly is set to false, allow non-local tasks as well.
- private def findTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = {
- val processLocalTask = findTaskFromList(getPendingTasksForHostPort(hostPort))
- if (processLocalTask != None) {
- return processLocalTask
- }
-
- val localTask = findTaskFromList(getPendingTasksForHost(hostPort))
- if (localTask != None) {
- return localTask
- }
-
- if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
- val rackLocalTask = findTaskFromList(getRackLocalPendingTasksForHost(hostPort))
- if (rackLocalTask != None) {
- return rackLocalTask
- }
- }
-
- // Look for no pref tasks AFTER rack local tasks - this has side effect that we will get to failed tasks later rather than sooner.
- // TODO: That code path needs to be revisited (adding to no prefs list when host:port goes down).
- val noPrefTask = findTaskFromList(pendingTasksWithNoPrefs)
- if (noPrefTask != None) {
- return noPrefTask
- }
-
- if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
- val nonLocalTask = findTaskFromList(allPendingTasks)
- if (nonLocalTask != None) {
- return nonLocalTask
- }
- }
-
- // Finally, if all else has failed, find a speculative task
- return findSpeculativeTask(hostPort, locality)
- }
-
- private def isProcessLocalLocation(task: Task[_], hostPort: String): Boolean = {
- Utils.checkHostPort(hostPort)
-
- val locs = task.preferredLocations
-
- locs.contains(hostPort)
- }
-
- private def isHostLocalLocation(task: Task[_], hostPort: String): Boolean = {
- val locs = task.preferredLocations
-
- // If no preference, consider it as host local
- if (locs.isEmpty) return true
-
- val host = Utils.parseHostPort(hostPort)._1
- locs.find(h => Utils.parseHostPort(h)._1 == host).isDefined
- }
-
- // Does a host count as a rack local preferred location for a task? (assumes host is NOT preferred location).
- // This is true if either the task has preferred locations and this host is one, or it has
- // no preferred locations (in which we still count the launch as preferred).
- private def isRackLocalLocation(task: Task[_], hostPort: String): Boolean = {
-
- val locs = task.preferredLocations
-
- val preferredRacks = new HashSet[String]()
- for (preferredHost <- locs) {
- val rack = sched.getRackForHost(preferredHost)
- if (None != rack) preferredRacks += rack.get
- }
-
- if (preferredRacks.isEmpty) return false
-
- val hostRack = sched.getRackForHost(hostPort)
-
- return None != hostRack && preferredRacks.contains(hostRack.get)
- }
-
- // Respond to an offer of a single slave from the scheduler by finding a task
- def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = {
-
- if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) {
- // If explicitly specified, use that
- val locality = if (overrideLocality != null) overrideLocality else {
- // expand only if we have waited for more than LOCALITY_WAIT for a host local task ...
- val time = System.currentTimeMillis
- if (time - lastPreferredLaunchTime < LOCALITY_WAIT) TaskLocality.NODE_LOCAL else TaskLocality.ANY
- }
-
- findTask(hostPort, locality) match {
- case Some(index) => {
- // Found a task; do some bookkeeping and return a Mesos task for it
- val task = tasks(index)
- val taskId = sched.newTaskId()
- // Figure out whether this should count as a preferred launch
- val taskLocality =
- if (isProcessLocalLocation(task, hostPort)) TaskLocality.PROCESS_LOCAL
- else if (isHostLocalLocation(task, hostPort)) TaskLocality.NODE_LOCAL
- else if (isRackLocalLocation(task, hostPort)) TaskLocality.RACK_LOCAL
- else TaskLocality.ANY
- val prefStr = taskLocality.toString
- logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
- taskSet.id, index, taskId, execId, hostPort, prefStr))
- // Do various bookkeeping
- copiesRunning(index) += 1
- val time = System.currentTimeMillis
- val info = new TaskInfo(taskId, index, time, execId, hostPort, taskLocality)
- taskInfos(taskId) = info
- taskAttempts(index) = info :: taskAttempts(index)
- if (taskLocality == TaskLocality.PROCESS_LOCAL || taskLocality == TaskLocality.NODE_LOCAL) {
- lastPreferredLaunchTime = time
- }
- // Serialize and return the task
- val startTime = System.currentTimeMillis
- val serializedTask = Task.serializeWithDependencies(
- task, sched.sc.addedFiles, sched.sc.addedJars, ser)
- val timeTaken = System.currentTimeMillis - startTime
- increaseRunningTasks(1)
- logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
- taskSet.id, index, serializedTask.limit, timeTaken))
- val taskName = "task %s:%d".format(taskSet.id, index)
- return Some(new TaskDescription(taskId, execId, taskName, serializedTask))
- }
- case _ =>
- }
- }
- return None
- }
-
- def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
- state match {
- case TaskState.FINISHED =>
- taskFinished(tid, state, serializedData)
- case TaskState.LOST =>
- taskLost(tid, state, serializedData)
- case TaskState.FAILED =>
- taskLost(tid, state, serializedData)
- case TaskState.KILLED =>
- taskLost(tid, state, serializedData)
- case _ =>
- }
- }
-
- def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) {
- val info = taskInfos(tid)
- if (info.failed) {
- // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
- // or even from Mesos itself when acks get delayed.
- return
- }
- val index = info.index
- info.markSuccessful()
- decreaseRunningTasks(1)
- if (!finished(index)) {
- tasksFinished += 1
- logInfo("Finished TID %s in %d ms (progress: %d/%d)".format(
- tid, info.duration, tasksFinished, numTasks))
- // Deserialize task result and pass it to the scheduler
- try {
- val result = ser.deserialize[TaskResult[_]](serializedData)
- result.metrics.resultSize = serializedData.limit()
- sched.listener.taskEnded(tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
- } catch {
- case cnf: ClassNotFoundException =>
- val loader = Thread.currentThread().getContextClassLoader
- throw new SparkException("ClassNotFound with classloader: " + loader, cnf)
- case ex => throw ex
- }
- // Mark finished and stop if we've finished all the tasks
- finished(index) = true
- if (tasksFinished == numTasks) {
- sched.taskSetFinished(this)
- }
- } else {
- logInfo("Ignoring task-finished event for TID " + tid +
- " because task " + index + " is already finished")
- }
- }
-
- def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) {
- val info = taskInfos(tid)
- if (info.failed) {
- // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
- // or even from Mesos itself when acks get delayed.
- return
- }
- val index = info.index
- info.markFailed()
- decreaseRunningTasks(1)
- if (!finished(index)) {
- logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
- copiesRunning(index) -= 1
- // Check if the problem is a map output fetch failure. In that case, this
- // task will never succeed on any node, so tell the scheduler about it.
- if (serializedData != null && serializedData.limit() > 0) {
- val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader)
- reason match {
- case fetchFailed: FetchFailed =>
- logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress)
- sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null)
- finished(index) = true
- tasksFinished += 1
- sched.taskSetFinished(this)
- decreaseRunningTasks(runningTasks)
- return
-
- case taskResultTooBig: TaskResultTooBigFailure =>
- logInfo("Loss was due to task %s result exceeding Akka frame size; " +
- "aborting job".format(tid))
- abort("Task %s result exceeded Akka frame size".format(tid))
- return
-
- case ef: ExceptionFailure =>
- sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null))
- val key = ef.description
- val now = System.currentTimeMillis
- val (printFull, dupCount) = {
- if (recentExceptions.contains(key)) {
- val (dupCount, printTime) = recentExceptions(key)
- if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
- recentExceptions(key) = (0, now)
- (true, 0)
- } else {
- recentExceptions(key) = (dupCount + 1, printTime)
- (false, dupCount + 1)
- }
- } else {
- recentExceptions(key) = (0, now)
- (true, 0)
- }
- }
- if (printFull) {
- val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
- logInfo("Loss was due to %s\n%s\n%s".format(
- ef.className, ef.description, locs.mkString("\n")))
- } else {
- logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
- }
-
- case _ => {}
- }
- }
- // On non-fetch failures, re-enqueue the task as pending for a max number of retries
- addPendingTask(index)
- // Count failed attempts only on FAILED and LOST state (not on KILLED)
- if (state == TaskState.FAILED || state == TaskState.LOST) {
- numFailures(index) += 1
- if (numFailures(index) > MAX_TASK_FAILURES) {
- logError("Task %s:%d failed more than %d times; aborting job".format(
- taskSet.id, index, MAX_TASK_FAILURES))
- abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
- }
- }
- } else {
- logInfo("Ignoring task-lost event for TID " + tid +
- " because task " + index + " is already finished")
- }
- }
-
- def error(message: String) {
- // Save the error message
- abort("Error: " + message)
- }
-
- def abort(message: String) {
- failed = true
- causeOfFailure = message
- // TODO: Kill running tasks if we were not terminated due to a Mesos error
- sched.listener.taskSetFailed(taskSet, message)
- decreaseRunningTasks(runningTasks)
- sched.taskSetFinished(this)
- }
-
- override def increaseRunningTasks(taskNum: Int) {
- runningTasks += taskNum
- if (parent != null) {
- parent.increaseRunningTasks(taskNum)
- }
- }
-
- override def decreaseRunningTasks(taskNum: Int) {
- runningTasks -= taskNum
- if (parent != null) {
- parent.decreaseRunningTasks(taskNum)
- }
- }
-
- //TODO: for now we just find Pool not TaskSetManager, we can extend this function in future if needed
- override def getSchedulableByName(name: String): Schedulable = {
- return null
- }
-
- override def addSchedulable(schedulable:Schedulable) {
- //nothing
- }
-
- override def removeSchedulable(schedulable:Schedulable) {
- //nothing
- }
-
- override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
- var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]
- sortedTaskSetQueue += this
- return sortedTaskSetQueue
- }
-
- override def executorLost(execId: String, hostPort: String) {
- logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
-
- // If some task has preferred locations only on hostname, and there are no more executors there,
- // put it in the no-prefs list to avoid the wait from delay scheduling
-
- // host local tasks - should we push this to rack local or no pref list ? For now, preserving behavior and moving to
- // no prefs list. Note, this was done due to impliations related to 'waiting' for data local tasks, etc.
- // Note: NOT checking process local list - since host local list is super set of that. We need to ad to no prefs only if
- // there is no host local node for the task (not if there is no process local node for the task)
- for (index <- getPendingTasksForHost(Utils.parseHostPort(hostPort)._1)) {
- // val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
- val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
- if (newLocs.isEmpty) {
- pendingTasksWithNoPrefs += index
- }
- }
-
- // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
- if (tasks(0).isInstanceOf[ShuffleMapTask]) {
- for ((tid, info) <- taskInfos if info.executorId == execId) {
- val index = taskInfos(tid).index
- if (finished(index)) {
- finished(index) = false
- copiesRunning(index) -= 1
- tasksFinished -= 1
- addPendingTask(index)
- // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
- // stage finishes when a total of tasks.size tasks finish.
- sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null)
- }
- }
- }
- // Also re-enqueue any tasks that were running on the node
- for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
- taskLost(tid, TaskState.KILLED, null)
- }
- }
-
- /**
- * Check for tasks to be speculated and return true if there are any. This is called periodically
- * by the ClusterScheduler.
- *
- * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
- * we don't scan the whole task set. It might also help to make this sorted by launch time.
- */
- override def checkSpeculatableTasks(): Boolean = {
- // Can't speculate if we only have one task, or if all tasks have finished.
- if (numTasks == 1 || tasksFinished == numTasks) {
- return false
- }
- var foundTasks = false
- val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
- logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
- if (tasksFinished >= minFinishedForSpeculation) {
- val time = System.currentTimeMillis()
- val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
- Arrays.sort(durations)
- val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1))
- val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
- // TODO: Threshold should also look at standard deviation of task durations and have a lower
- // bound based on that.
- logDebug("Task length threshold for speculation: " + threshold)
- for ((tid, info) <- taskInfos) {
- val index = info.index
- if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
- !speculatableTasks.contains(index)) {
- logInfo(
- "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
- taskSet.id, index, info.hostPort, threshold))
- speculatableTasks += index
- foundTasks = true
- }
- }
- }
- return foundTasks
- }
-
- override def hasPendingTasks(): Boolean = {
- numTasks > 0 && tasksFinished < numTasks
- }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
deleted file mode 100644
index ac9e5ef94d..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import spark.TaskState.TaskState
-import java.nio.ByteBuffer
-import spark.util.SerializableBuffer
-import spark.Utils
-
-private[spark] sealed trait StandaloneClusterMessage extends Serializable
-
-// Driver to executors
-private[spark]
-case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
-
-private[spark]
-case class RegisteredExecutor(sparkProperties: Seq[(String, String)])
- extends StandaloneClusterMessage
-
-private[spark]
-case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage
-
-// Executors to driver
-private[spark]
-case class RegisterExecutor(executorId: String, hostPort: String, cores: Int)
- extends StandaloneClusterMessage {
- Utils.checkHostPort(hostPort, "Expected host port")
-}
-
-private[spark]
-case class StatusUpdate(executorId: String, taskId: Long, state: TaskState, data: SerializableBuffer)
- extends StandaloneClusterMessage
-
-private[spark]
-object StatusUpdate {
- /** Alternate factory method that takes a ByteBuffer directly for the data field */
- def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer): StatusUpdate = {
- StatusUpdate(executorId, taskId, state, new SerializableBuffer(data))
- }
-}
-
-// Internal messages in driver
-private[spark] case object ReviveOffers extends StandaloneClusterMessage
-private[spark] case object StopDriver extends StandaloneClusterMessage
-
-private[spark] case class RemoveExecutor(executorId: String, reason: String)
- extends StandaloneClusterMessage
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
deleted file mode 100644
index 01de4ccb8f..0000000000
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.storage
-
-import java.io.{Externalizable, ObjectInput, ObjectOutput}
-
-import akka.actor.ActorRef
-
-
-//////////////////////////////////////////////////////////////////////////////////
-// Messages from the master to slaves.
-//////////////////////////////////////////////////////////////////////////////////
-private[spark]
-sealed trait ToBlockManagerSlave
-
-// Remove a block from the slaves that have it. This can only be used to remove
-// blocks that the master knows about.
-private[spark]
-case class RemoveBlock(blockId: String) extends ToBlockManagerSlave
-
-// Remove all blocks belonging to a specific RDD.
-private[spark] case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave
-
-
-//////////////////////////////////////////////////////////////////////////////////
-// Messages from slaves to the master.
-//////////////////////////////////////////////////////////////////////////////////
-private[spark]
-sealed trait ToBlockManagerMaster
-
-private[spark]
-case class RegisterBlockManager(
- blockManagerId: BlockManagerId,
- maxMemSize: Long,
- sender: ActorRef)
- extends ToBlockManagerMaster
-
-private[spark]
-case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
-
-private[spark]
-class UpdateBlockInfo(
- var blockManagerId: BlockManagerId,
- var blockId: String,
- var storageLevel: StorageLevel,
- var memSize: Long,
- var diskSize: Long)
- extends ToBlockManagerMaster
- with Externalizable {
-
- def this() = this(null, null, null, 0, 0) // For deserialization only
-
- override def writeExternal(out: ObjectOutput) {
- blockManagerId.writeExternal(out)
- out.writeUTF(blockId)
- storageLevel.writeExternal(out)
- out.writeLong(memSize)
- out.writeLong(diskSize)
- }
-
- override def readExternal(in: ObjectInput) {
- blockManagerId = BlockManagerId(in)
- blockId = in.readUTF()
- storageLevel = StorageLevel(in)
- memSize = in.readLong()
- diskSize = in.readLong()
- }
-}
-
-private[spark]
-object UpdateBlockInfo {
- def apply(blockManagerId: BlockManagerId,
- blockId: String,
- storageLevel: StorageLevel,
- memSize: Long,
- diskSize: Long): UpdateBlockInfo = {
- new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)
- }
-
- // For pattern-matching
- def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = {
- Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize))
- }
-}
-
-private[spark]
-case class GetLocations(blockId: String) extends ToBlockManagerMaster
-
-private[spark]
-case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster
-
-private[spark]
-case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
-
-private[spark]
-case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
-
-private[spark]
-case object StopBlockManagerMaster extends ToBlockManagerMaster
-
-private[spark]
-case object GetMemoryStatus extends ToBlockManagerMaster
-
-private[spark]
-case object ExpireDeadHosts extends ToBlockManagerMaster
-
-private[spark]
-case object GetStorageStatus extends ToBlockManagerMaster
diff --git a/core/src/main/scala/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/spark/ui/jobs/IndexPage.scala
deleted file mode 100644
index f31af3cda6..0000000000
--- a/core/src/main/scala/spark/ui/jobs/IndexPage.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import java.util.Date
-
-import javax.servlet.http.HttpServletRequest
-
-import scala.Some
-import scala.xml.{NodeSeq, Node}
-
-import spark.scheduler.Stage
-import spark.ui.UIUtils._
-import spark.ui.Page._
-import spark.storage.StorageLevel
-
-/** Page showing list of all ongoing and recently finished stages */
-private[spark] class IndexPage(parent: JobProgressUI) {
- def listener = parent.listener
- val dateFmt = parent.dateFmt
-
- def render(request: HttpServletRequest): Seq[Node] = {
- val activeStages = listener.activeStages.toSeq
- val completedStages = listener.completedStages.reverse.toSeq
- val failedStages = listener.failedStages.reverse.toSeq
-
- /** Special table which merges two header cells. */
- def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
- <table class="table table-bordered table-striped table-condensed sortable">
- <thead>
- <th>Stage Id</th>
- <th>Origin</th>
- <th>Submitted</th>
- <th>Duration</th>
- <th colspan="2">Tasks: Complete/Total</th>
- <th>Shuffle Activity</th>
- <th>Stored RDD</th>
- </thead>
- <tbody>
- {rows.map(r => makeRow(r))}
- </tbody>
- </table>
- }
-
- val activeStageTable: NodeSeq = stageTable(stageRow, activeStages)
- val completedStageTable = stageTable(stageRow, completedStages)
- val failedStageTable: NodeSeq = stageTable(stageRow, failedStages)
-
- val content = <h2>Active Stages</h2> ++ activeStageTable ++
- <h2>Completed Stages</h2> ++ completedStageTable ++
- <h2>Failed Stages</h2> ++ failedStageTable
-
- headerSparkPage(content, parent.sc, "Spark Stages", Jobs)
- }
-
- def getElapsedTime(submitted: Option[Long], completed: Long): String = {
- submitted match {
- case Some(t) => parent.formatDuration(completed - t)
- case _ => "Unknown"
- }
- }
-
- def makeProgressBar(completed: Int, total: Int): Seq[Node] = {
- val width=130
- val height=15
- val completeWidth = (completed.toDouble / total) * width
-
- <svg width={width.toString} height={height.toString}>
- <rect width={width.toString} height={height.toString}
- fill="white" stroke="rgb(51,51,51)" stroke-width="1" />
- <rect width={completeWidth.toString} height={height.toString}
- fill="rgb(0,136,204)" stroke="black" stroke-width="1" />
- </svg>
- }
-
-
- def stageRow(s: Stage): Seq[Node] = {
- val submissionTime = s.submissionTime match {
- case Some(t) => dateFmt.format(new Date(t))
- case None => "Unknown"
- }
- val (read, write) = (listener.hasShuffleRead(s.id), listener.hasShuffleWrite(s.id))
- val shuffleInfo = (read, write) match {
- case (true, true) => "Read/Write"
- case (true, false) => "Read"
- case (false, true) => "Write"
- case _ => ""
- }
- val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0)
- val totalTasks = s.numPartitions
-
- <tr>
- <td>{s.id}</td>
- <td><a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a></td>
- <td>{submissionTime}</td>
- <td>{getElapsedTime(s.submissionTime,
- s.completionTime.getOrElse(System.currentTimeMillis()))}</td>
- <td class="progress-cell">{makeProgressBar(completedTasks, totalTasks)}</td>
- <td style="border-left: 0; text-align: center;">{completedTasks} / {totalTasks}
- {listener.stageToTasksFailed.getOrElse(s.id, 0) match {
- case f if f > 0 => "(%s failed)".format(f)
- case _ =>
- }}
- </td>
- <td>{shuffleInfo}</td>
- <td>{if (s.rdd.getStorageLevel != StorageLevel.NONE) {
- <a href={"/storage/rdd?id=%s".format(s.rdd.id)}>
- {Option(s.rdd.name).getOrElse(s.rdd.id)}
- </a>
- }}
- </td>
- </tr>
- }
-}
diff --git a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala
deleted file mode 100644
index 44dcf82d11..0000000000
--- a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import akka.util.Duration
-
-import java.text.SimpleDateFormat
-
-import javax.servlet.http.HttpServletRequest
-
-import org.eclipse.jetty.server.Handler
-
-import scala.Seq
-import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer}
-
-import spark.ui.JettyUtils._
-import spark.{ExceptionFailure, SparkContext, Success, Utils}
-import spark.scheduler._
-import spark.scheduler.cluster.TaskInfo
-import spark.executor.TaskMetrics
-import collection.mutable
-
-/** Web UI showing progress status of all jobs in the given SparkContext. */
-private[spark] class JobProgressUI(val sc: SparkContext) {
- private var _listener: Option[JobProgressListener] = None
- def listener = _listener.get
- val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
-
- private val indexPage = new IndexPage(this)
- private val stagePage = new StagePage(this)
-
- def start() {
- _listener = Some(new JobProgressListener)
- sc.addSparkListener(listener)
- }
-
- def formatDuration(ms: Long) = Utils.msDurationToString(ms)
-
- def getHandlers = Seq[(String, Handler)](
- ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)),
- ("/stages", (request: HttpServletRequest) => indexPage.render(request))
- )
-}
-
-private[spark] class JobProgressListener extends SparkListener {
- // How many stages to remember
- val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt
-
- val activeStages = HashSet[Stage]()
- val completedStages = ListBuffer[Stage]()
- val failedStages = ListBuffer[Stage]()
-
- val stageToTasksComplete = HashMap[Int, Int]()
- val stageToTasksFailed = HashMap[Int, Int]()
- val stageToTaskInfos =
- HashMap[Int, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]()
-
- override def onJobStart(jobStart: SparkListenerJobStart) {}
-
- override def onStageCompleted(stageCompleted: StageCompleted) = {
- val stage = stageCompleted.stageInfo.stage
- activeStages -= stage
- completedStages += stage
- trimIfNecessary(completedStages)
- }
-
- /** If stages is too large, remove and garbage collect old stages */
- def trimIfNecessary(stages: ListBuffer[Stage]) {
- if (stages.size > RETAINED_STAGES) {
- val toRemove = RETAINED_STAGES / 10
- stages.takeRight(toRemove).foreach( s => {
- stageToTaskInfos.remove(s.id)
- })
- stages.trimEnd(toRemove)
- }
- }
-
- override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) =
- activeStages += stageSubmitted.stage
-
- override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
- val sid = taskEnd.task.stageId
- val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
- taskEnd.reason match {
- case e: ExceptionFailure =>
- stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1
- (Some(e), e.metrics)
- case _ =>
- stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1
- (None, Some(taskEnd.taskMetrics))
- }
- val taskList = stageToTaskInfos.getOrElse(
- sid, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
- taskList += ((taskEnd.taskInfo, metrics, failureInfo))
- stageToTaskInfos(sid) = taskList
- }
-
- override def onJobEnd(jobEnd: SparkListenerJobEnd) {
- jobEnd match {
- case end: SparkListenerJobEnd =>
- end.jobResult match {
- case JobFailed(ex, Some(stage)) =>
- activeStages -= stage
- failedStages += stage
- trimIfNecessary(failedStages)
- case _ =>
- }
- case _ =>
- }
- }
-
- /** Is this stage's input from a shuffle read. */
- def hasShuffleRead(stageID: Int): Boolean = {
- // This is written in a slightly complicated way to avoid having to scan all tasks
- for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
- if (s._2 != null) return s._2.flatMap(m => m.shuffleReadMetrics).isDefined
- }
- return false // No tasks have finished for this stage
- }
-
- /** Is this stage's output to a shuffle write. */
- def hasShuffleWrite(stageID: Int): Boolean = {
- // This is written in a slightly complicated way to avoid having to scan all tasks
- for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
- if (s._2 != null) return s._2.flatMap(m => m.shuffleWriteMetrics).isDefined
- }
- return false // No tasks have finished for this stage
- }
-}
diff --git a/core/src/main/scala/spark/ui/jobs/StagePage.scala b/core/src/main/scala/spark/ui/jobs/StagePage.scala
deleted file mode 100644
index 292966f23a..0000000000
--- a/core/src/main/scala/spark/ui/jobs/StagePage.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import java.util.Date
-
-import javax.servlet.http.HttpServletRequest
-
-import scala.xml.Node
-
-import spark.ui.UIUtils._
-import spark.ui.Page._
-import spark.util.Distribution
-import spark.{ExceptionFailure, Utils}
-import spark.scheduler.cluster.TaskInfo
-import spark.executor.TaskMetrics
-
-/** Page showing statistics and task list for a given stage */
-private[spark] class StagePage(parent: JobProgressUI) {
- def listener = parent.listener
- val dateFmt = parent.dateFmt
-
- def render(request: HttpServletRequest): Seq[Node] = {
- val stageId = request.getParameter("id").toInt
-
- if (!listener.stageToTaskInfos.contains(stageId)) {
- val content =
- <div>
- <h2>Summary Metrics</h2> No tasks have finished yet
- <h2>Tasks</h2> No tasks have finished yet
- </div>
- return headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs)
- }
-
- val tasks = listener.stageToTaskInfos(stageId)
-
- val shuffleRead = listener.hasShuffleRead(stageId)
- val shuffleWrite = listener.hasShuffleWrite(stageId)
-
- val taskHeaders: Seq[String] =
- Seq("Task ID", "Duration", "Locality Level", "Worker", "Launch Time") ++
- {if (shuffleRead) Seq("Shuffle Read") else Nil} ++
- {if (shuffleWrite) Seq("Shuffle Write") else Nil} ++
- Seq("Details")
-
- val taskTable = listingTable(taskHeaders, taskRow, tasks)
-
- // Excludes tasks which failed and have incomplete metrics
- val validTasks = tasks.filter(t => Option(t._2).isDefined)
-
- val summaryTable: Option[Seq[Node]] =
- if (validTasks.size == 0) {
- None
- }
- else {
- val serviceTimes = validTasks.map{case (info, metrics, exception) =>
- metrics.get.executorRunTime.toDouble}
- val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
- ms => parent.formatDuration(ms.toLong))
-
- def getQuantileCols(data: Seq[Double]) =
- Distribution(data).get.getQuantiles().map(d => Utils.memoryBytesToString(d.toLong))
-
- val shuffleReadSizes = validTasks.map {
- case(info, metrics, exception) =>
- metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
- }
- val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes)
-
- val shuffleWriteSizes = validTasks.map {
- case(info, metrics, exception) =>
- metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
- }
- val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
-
- val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
- if (shuffleRead) shuffleReadQuantiles else Nil,
- if (shuffleWrite) shuffleWriteQuantiles else Nil)
-
- val quantileHeaders = Seq("Metric", "Min", "25%", "50%", "75%", "Max")
- def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
- Some(listingTable(quantileHeaders, quantileRow, listings))
- }
-
- val content =
- <h2>Summary Metrics</h2> ++ summaryTable.getOrElse(Nil) ++ <h2>Tasks</h2> ++ taskTable;
-
- headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs)
- }
-
-
- def taskRow(taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = {
- def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] =
- trace.map(e => <span style="display:block;">{e.toString}</span>)
- val (info, metrics, exception) = taskData
- <tr>
- <td>{info.taskId}</td>
- <td sorttable_customkey={metrics.map{m => m.executorRunTime.toString}.getOrElse("1")}>
- {metrics.map{m => parent.formatDuration(m.executorRunTime)}.getOrElse("")}
- </td>
- <td>{info.taskLocality}</td>
- <td>{info.hostPort}</td>
- <td>{dateFmt.format(new Date(info.launchTime))}</td>
- {metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
- <td>{Utils.memoryBytesToString(s.remoteBytesRead)}</td>}.getOrElse("")}
- {metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
- <td>{Utils.memoryBytesToString(s.shuffleBytesWritten)}</td>}.getOrElse("")}
- <td>{exception.map(e =>
- <span>
- {e.className} ({e.description})<br/>
- {fmtStackTrace(e.stackTrace)}
- </span>).getOrElse("")}
- </td>
- </tr>
- }
-}
diff --git a/core/src/test/resources/test_metrics_config.properties b/core/src/test/resources/test_metrics_config.properties
new file mode 100644
index 0000000000..056a158456
--- /dev/null
+++ b/core/src/test/resources/test_metrics_config.properties
@@ -0,0 +1,6 @@
+*.sink.console.period = 10
+*.sink.console.unit = seconds
+*.source.jvm.class = org.apache.spark.metrics.source.JvmSource
+master.sink.console.period = 20
+master.sink.console.unit = minutes
+
diff --git a/core/src/test/resources/test_metrics_system.properties b/core/src/test/resources/test_metrics_system.properties
new file mode 100644
index 0000000000..6f5ecea93a
--- /dev/null
+++ b/core/src/test/resources/test_metrics_system.properties
@@ -0,0 +1,7 @@
+*.sink.console.period = 10
+*.sink.console.unit = seconds
+test.sink.console.class = org.apache.spark.metrics.sink.ConsoleSink
+test.sink.dummy.class = org.apache.spark.metrics.sink.DummySink
+test.source.dummy.class = org.apache.spark.metrics.source.DummySource
+test.sink.console.period = 20
+test.sink.console.unit = minutes
diff --git a/core/src/test/scala/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 0af175f316..4434f3b87c 100644
--- a/core/src/test/scala/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
@@ -23,7 +23,7 @@ import collection.mutable
import java.util.Random
import scala.math.exp
import scala.math.signum
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
diff --git a/core/src/test/scala/spark/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/BroadcastSuite.scala
index 785721ece8..b3a53d928b 100644
--- a/core/src/test/scala/spark/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/BroadcastSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index a84c89e3c9..d9103aebb7 100644
--- a/core/src/test/scala/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
import java.io.File
-import spark.rdd._
-import spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark.SparkContext._
import storage.StorageLevel
+import org.apache.spark.util.Utils
class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
initLogging()
@@ -99,7 +100,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
test("ShuffledRDD") {
testCheckpointing(rdd => {
// Creating ShuffledRDD directly as PairRDDFunctions.combineByKey produces a MapPartitionedRDD
- new ShuffledRDD(rdd.map(x => (x % 2, 1)), partitioner)
+ new ShuffledRDD[Int, Int, (Int, Int)](rdd.map(x => (x % 2, 1)), partitioner)
})
}
diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index e11efe459c..7a856d4081 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import network.ConnectionManagerId
import org.scalatest.FunSuite
diff --git a/core/src/test/scala/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index ed16b9d8ef..01a72d8401 100644
--- a/core/src/test/scala/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io.File
@@ -26,6 +26,7 @@ import org.scalatest.FunSuite
import org.scalatest.concurrent.Timeouts
import org.scalatest.prop.TableDrivenPropertyChecks._
import org.scalatest.time.SpanSugar._
+import org.apache.spark.util.Utils
class DriverSuite extends FunSuite with Timeouts {
test("driver should exit after finishing") {
@@ -34,7 +35,7 @@ class DriverSuite extends FunSuite with Timeouts {
val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
forAll(masters) { (master: String) =>
failAfter(30 seconds) {
- Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master),
+ Utils.execute(Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master),
new File(System.getenv("SPARK_HOME")))
}
}
diff --git a/core/src/test/scala/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index 6c847b8fef..af448fcb37 100644
--- a/core/src/test/scala/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -15,14 +15,12 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
-import org.scalatest.prop.Checkers
-
-import scala.collection.mutable.ArrayBuffer
import SparkContext._
+import org.apache.spark.util.NonSerializable
// Common state shared by FailureSuite-launched tasks. We use a global object
// for this because any local variables used in the task closures will rightfully
@@ -40,7 +38,7 @@ object FailureSuiteState {
}
class FailureSuite extends FunSuite with LocalSparkContext {
-
+
// Run a 3-task map job in which task 1 deterministically fails once, and check
// whether the job completes successfully and we ran 4 tasks in total.
test("failure in a single-stage job") {
@@ -66,7 +64,7 @@ class FailureSuite extends FunSuite with LocalSparkContext {
test("failure in a two-stage job") {
sc = new SparkContext("local[1,1]", "test")
val results = sc.makeRDD(1 to 3).map(x => (x, x)).groupByKey(3).map {
- case (k, v) =>
+ case (k, v) =>
FailureSuiteState.synchronized {
FailureSuiteState.tasksRun += 1
if (k == 1 && FailureSuiteState.tasksFailed == 0) {
@@ -87,12 +85,40 @@ class FailureSuite extends FunSuite with LocalSparkContext {
sc = new SparkContext("local[1,1]", "test")
val results = sc.makeRDD(1 to 3).map(x => new NonSerializable)
- val thrown = intercept[spark.SparkException] {
+ val thrown = intercept[SparkException] {
results.collect()
}
- assert(thrown.getClass === classOf[spark.SparkException])
+ assert(thrown.getClass === classOf[SparkException])
+ assert(thrown.getMessage.contains("NotSerializableException"))
+
+ FailureSuiteState.clear()
+ }
+
+ test("failure because task closure is not serializable") {
+ sc = new SparkContext("local[1,1]", "test")
+ val a = new NonSerializable
+
+ // Non-serializable closure in the final result stage
+ val thrown = intercept[SparkException] {
+ sc.parallelize(1 to 10, 2).map(x => a).count()
+ }
+ assert(thrown.getClass === classOf[SparkException])
assert(thrown.getMessage.contains("NotSerializableException"))
+ // Non-serializable closure in an earlier stage
+ val thrown1 = intercept[SparkException] {
+ sc.parallelize(1 to 10, 2).map(x => (x, a)).partitionBy(new HashPartitioner(3)).count()
+ }
+ assert(thrown1.getClass === classOf[SparkException])
+ assert(thrown1.getMessage.contains("NotSerializableException"))
+
+ // Non-serializable closure in foreach function
+ val thrown2 = intercept[SparkException] {
+ sc.parallelize(1 to 10, 2).foreach(x => println(a))
+ }
+ assert(thrown2.getClass === classOf[SparkException])
+ assert(thrown2.getMessage.contains("NotSerializableException"))
+
FailureSuiteState.clear()
}
diff --git a/core/src/test/scala/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index 242ae971f8..35d1d41af1 100644
--- a/core/src/test/scala/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import com.google.common.io.Files
import org.scalatest.FunSuite
diff --git a/core/src/test/scala/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 1e2c257c4b..7b82a4cdd9 100644
--- a/core/src/test/scala/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.io.{FileWriter, PrintWriter, File}
diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
index aaf03e683b..8a869c9005 100644
--- a/core/src/test/scala/spark/JavaAPISuite.java
+++ b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package spark;
+package org.apache.spark;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
+import com.google.common.base.Optional;
import scala.Tuple2;
import com.google.common.base.Charsets;
@@ -37,15 +38,15 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import spark.api.java.JavaDoubleRDD;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.*;
-import spark.partial.BoundedDouble;
-import spark.partial.PartialResult;
-import spark.storage.StorageLevel;
-import spark.util.StatCounter;
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.partial.BoundedDouble;
+import org.apache.spark.partial.PartialResult;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.util.StatCounter;
// The test suite itself is Serializable so that anonymous Function implementations can be
@@ -198,6 +199,35 @@ public class JavaAPISuite implements Serializable {
}
@Test
+ public void leftOuterJoin() {
+ JavaPairRDD<Integer, Integer> rdd1 = sc.parallelizePairs(Arrays.asList(
+ new Tuple2<Integer, Integer>(1, 1),
+ new Tuple2<Integer, Integer>(1, 2),
+ new Tuple2<Integer, Integer>(2, 1),
+ new Tuple2<Integer, Integer>(3, 1)
+ ));
+ JavaPairRDD<Integer, Character> rdd2 = sc.parallelizePairs(Arrays.asList(
+ new Tuple2<Integer, Character>(1, 'x'),
+ new Tuple2<Integer, Character>(2, 'y'),
+ new Tuple2<Integer, Character>(2, 'z'),
+ new Tuple2<Integer, Character>(4, 'w')
+ ));
+ List<Tuple2<Integer,Tuple2<Integer,Optional<Character>>>> joined =
+ rdd1.leftOuterJoin(rdd2).collect();
+ Assert.assertEquals(5, joined.size());
+ Tuple2<Integer,Tuple2<Integer,Optional<Character>>> firstUnmatched =
+ rdd1.leftOuterJoin(rdd2).filter(
+ new Function<Tuple2<Integer, Tuple2<Integer, Optional<Character>>>, Boolean>() {
+ @Override
+ public Boolean call(Tuple2<Integer, Tuple2<Integer, Optional<Character>>> tup)
+ throws Exception {
+ return !tup._2()._2().isPresent();
+ }
+ }).first();
+ Assert.assertEquals(3, firstUnmatched._1().intValue());
+ }
+
+ @Test
public void foldReduce() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
Function2<Integer, Integer, Integer> add = new Function2<Integer, Integer, Integer>() {
@@ -326,7 +356,9 @@ public class JavaAPISuite implements Serializable {
Assert.assertEquals(20/6.0, rdd.mean(), 0.01);
Assert.assertEquals(20/6.0, rdd.mean(), 0.01);
Assert.assertEquals(6.22222, rdd.variance(), 0.01);
+ Assert.assertEquals(7.46667, rdd.sampleVariance(), 0.01);
Assert.assertEquals(2.49444, rdd.stdev(), 0.01);
+ Assert.assertEquals(2.73252, rdd.sampleStdev(), 0.01);
Double first = rdd.first();
List<Double> take = rdd.take(5);
@@ -716,7 +748,7 @@ public class JavaAPISuite implements Serializable {
}
};
- JavaRDD<Integer> sizes = rdd1.zipPartitions(sizesFn, rdd2);
+ JavaRDD<Integer> sizes = rdd1.zipPartitions(rdd2, sizesFn);
Assert.assertEquals("[3, 2, 3, 2]", sizes.collect().toString());
}
diff --git a/core/src/test/scala/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
index ddc212d290..6ec124da9c 100644
--- a/core/src/test/scala/spark/LocalSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.Suite
import org.scalatest.BeforeAndAfterEach
diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index ce6cec0451..6013320eaa 100644
--- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
import akka.actor._
-import spark.scheduler.MapStatus
-import spark.storage.BlockManagerId
-import spark.util.AkkaUtils
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.AkkaUtils
class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
@@ -112,22 +112,22 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
"akka://spark@localhost:" + boundPort + "/user/MapOutputTracker")
masterTracker.registerShuffle(10, 1)
- masterTracker.incrementGeneration()
- slaveTracker.updateGeneration(masterTracker.getGeneration)
+ masterTracker.incrementEpoch()
+ slaveTracker.updateEpoch(masterTracker.getEpoch)
intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
val compressedSize1000 = MapOutputTracker.compressSize(1000L)
val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
masterTracker.registerMapOutput(10, 0, new MapStatus(
BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
- masterTracker.incrementGeneration()
- slaveTracker.updateGeneration(masterTracker.getGeneration)
+ masterTracker.incrementEpoch()
+ slaveTracker.updateEpoch(masterTracker.getEpoch)
assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
- masterTracker.incrementGeneration()
- slaveTracker.updateGeneration(masterTracker.getGeneration)
+ masterTracker.incrementEpoch()
+ slaveTracker.updateEpoch(masterTracker.getEpoch)
intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
// failure should be cached
diff --git a/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala b/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala
new file mode 100644
index 0000000000..5a18dd13ff
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala
@@ -0,0 +1,28 @@
+package org.apache.spark
+
+import org.scalatest.FunSuite
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.{RDD, PartitionPruningRDD}
+
+
+class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext {
+
+ test("Pruned Partitions inherit locality prefs correctly") {
+ class TestPartition(i: Int) extends Partition {
+ def index = i
+ }
+ val rdd = new RDD[Int](sc, Nil) {
+ override protected def getPartitions = {
+ Array[Partition](
+ new TestPartition(1),
+ new TestPartition(2),
+ new TestPartition(3))
+ }
+ def compute(split: Partition, context: TaskContext) = {Iterator()}
+ }
+ val prunedRDD = PartitionPruningRDD.create(rdd, {x => if (x==2) true else false})
+ val p = prunedRDD.partitions(0)
+ assert(p.index == 2)
+ assert(prunedRDD.partitions.length == 1)
+ }
+}
diff --git a/core/src/test/scala/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index b1e0b2b4d0..7d938917f2 100644
--- a/core/src/test/scala/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -15,13 +15,16 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
-import org.scalatest.FunSuite
-import scala.collection.mutable.ArrayBuffer
-import SparkContext._
-import spark.util.StatCounter
import scala.math.abs
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.util.StatCounter
+import org.apache.spark.rdd.RDD
class PartitioningSuite extends FunSuite with SharedSparkContext {
diff --git a/core/src/test/scala/spark/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala
index 35c04710a3..2e851d892d 100644
--- a/core/src/test/scala/spark/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
import SparkContext._
diff --git a/core/src/test/scala/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 70c24515be..97cbca09bf 100644
--- a/core/src/test/scala/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.Suite
import org.scalatest.BeforeAndAfterAll
diff --git a/core/src/test/scala/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
index 6bad6c1d13..e121b162ad 100644
--- a/core/src/test/scala/spark/ShuffleNettySuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.BeforeAndAfterAll
diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 3a56c26bef..db717865db 100644
--- a/core/src/test/scala/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -15,27 +15,22 @@
* limitations under the License.
*/
-package spark
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashSet
+package org.apache.spark
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
-import org.scalatest.prop.Checkers
-import org.scalacheck.Arbitrary._
-import org.scalacheck.Gen
-import org.scalacheck.Prop._
-import com.google.common.io.Files
+import org.apache.spark.SparkContext._
+import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
+import org.apache.spark.rdd.{RDD, SubtractedRDD, CoGroupedRDD, OrderedRDDFunctions, ShuffledRDD}
+import org.apache.spark.util.MutablePair
+import org.apache.spark.serializer.KryoSerializer
-import spark.rdd.ShuffledRDD
-import spark.SparkContext._
class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
- test("groupByKey with compression") {
+ test("groupByKey without compression") {
try {
- System.setProperty("spark.shuffle.compress", "true")
+ System.setProperty("spark.shuffle.compress", "false")
sc = new SparkContext("local", "test")
val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
val groups = pairs.groupByKey(4).collect()
@@ -45,7 +40,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
val valuesFor2 = groups.find(_._1 == 2).get._2
assert(valuesFor2.toList.sorted === List(1))
} finally {
- System.setProperty("spark.blockManager.compress", "false")
+ System.setProperty("spark.shuffle.compress", "true")
}
}
@@ -55,12 +50,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
val a = sc.parallelize(1 to 10, 2)
val b = a.map { x =>
- (x, new ShuffleSuite.NonJavaSerializableClass(x * 2))
+ (x, new NonJavaSerializableClass(x * 2))
}
// If the Kryo serializer is not used correctly, the shuffle would fail because the
// default Java serializer cannot handle the non serializable class.
- val c = new ShuffledRDD(b, new HashPartitioner(NUM_BLOCKS),
- classOf[spark.KryoSerializer].getName)
+ val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)](
+ b, new HashPartitioner(NUM_BLOCKS)).setSerializer(classOf[KryoSerializer].getName)
val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
assert(c.count === 10)
@@ -77,11 +72,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
sc = new SparkContext("local-cluster[2,1,512]", "test")
val a = sc.parallelize(1 to 10, 2)
val b = a.map { x =>
- (x, new ShuffleSuite.NonJavaSerializableClass(x * 2))
+ (x, new NonJavaSerializableClass(x * 2))
}
// If the Kryo serializer is not used correctly, the shuffle would fail because the
// default Java serializer cannot handle the non serializable class.
- val c = new ShuffledRDD(b, new HashPartitioner(3), classOf[spark.KryoSerializer].getName)
+ val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)](
+ b, new HashPartitioner(3)).setSerializer(classOf[KryoSerializer].getName)
assert(c.count === 10)
}
@@ -96,7 +92,8 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
// NOTE: The default Java serializer doesn't create zero-sized blocks.
// So, use Kryo
- val c = new ShuffledRDD(b, new HashPartitioner(10), classOf[spark.KryoSerializer].getName)
+ val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10))
+ .setSerializer(classOf[KryoSerializer].getName)
val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
assert(c.count === 4)
@@ -121,7 +118,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
val b = a.map(x => (x, x*2))
// NOTE: The default Java serializer should create zero-sized blocks
- val c = new ShuffledRDD(b, new HashPartitioner(10))
+ val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10))
val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
assert(c.count === 4)
@@ -135,6 +132,72 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
// We should have at most 4 non-zero sized partitions
assert(nonEmptyBlocks.size <= 4)
}
+
+ test("shuffle using mutable pairs") {
+ // Use a local cluster with 2 processes to make sure there are both local and remote blocks
+ sc = new SparkContext("local-cluster[2,1,512]", "test")
+ def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
+ val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
+ val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
+ val results = new ShuffledRDD[Int, Int, MutablePair[Int, Int]](pairs, new HashPartitioner(2))
+ .collect()
+
+ data.foreach { pair => results should contain (pair) }
+ }
+
+ test("sorting using mutable pairs") {
+ // This is not in SortingSuite because of the local cluster setup.
+ // Use a local cluster with 2 processes to make sure there are both local and remote blocks
+ sc = new SparkContext("local-cluster[2,1,512]", "test")
+ def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
+ val data = Array(p(1, 11), p(3, 33), p(100, 100), p(2, 22))
+ val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
+ val results = new OrderedRDDFunctions[Int, Int, MutablePair[Int, Int]](pairs)
+ .sortByKey().collect()
+ results(0) should be (p(1, 11))
+ results(1) should be (p(2, 22))
+ results(2) should be (p(3, 33))
+ results(3) should be (p(100, 100))
+ }
+
+ test("cogroup using mutable pairs") {
+ // Use a local cluster with 2 processes to make sure there are both local and remote blocks
+ sc = new SparkContext("local-cluster[2,1,512]", "test")
+ def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
+ val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
+ val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"), p(3, "3"))
+ val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2)
+ val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2)
+ val results = new CoGroupedRDD[Int](Seq(pairs1, pairs2), new HashPartitioner(2)).collectAsMap()
+
+ assert(results(1)(0).length === 3)
+ assert(results(1)(0).contains(1))
+ assert(results(1)(0).contains(2))
+ assert(results(1)(0).contains(3))
+ assert(results(1)(1).length === 2)
+ assert(results(1)(1).contains("11"))
+ assert(results(1)(1).contains("12"))
+ assert(results(2)(0).length === 1)
+ assert(results(2)(0).contains(1))
+ assert(results(2)(1).length === 1)
+ assert(results(2)(1).contains("22"))
+ assert(results(3)(0).length === 0)
+ assert(results(3)(1).contains("3"))
+ }
+
+ test("subtract mutable pairs") {
+ // Use a local cluster with 2 processes to make sure there are both local and remote blocks
+ sc = new SparkContext("local-cluster[2,1,512]", "test")
+ def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
+ val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1), p(3, 33))
+ val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"))
+ val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2)
+ val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2)
+ val results = new SubtractedRDD(pairs1, pairs2, new HashPartitioner(2)).collect()
+ results should have length (1)
+ // substracted rdd return results as Tuple2
+ results(0) should be ((3, 33))
+ }
}
object ShuffleSuite {
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
new file mode 100644
index 0000000000..939fe51801
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.FunSuite
+import org.apache.spark.SparkContext._
+
+class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
+ test("getPersistentRDDs only returns RDDs that are marked as cached") {
+ sc = new SparkContext("local", "test")
+ assert(sc.getPersistentRDDs.isEmpty === true)
+
+ val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
+ assert(sc.getPersistentRDDs.isEmpty === true)
+
+ rdd.cache()
+ assert(sc.getPersistentRDDs.size === 1)
+ assert(sc.getPersistentRDDs.values.head === rdd)
+ }
+
+ test("getPersistentRDDs returns an immutable map") {
+ sc = new SparkContext("local", "test")
+ val rdd1 = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
+
+ val myRdds = sc.getPersistentRDDs
+ assert(myRdds.size === 1)
+ assert(myRdds.values.head === rdd1)
+
+ val rdd2 = sc.makeRDD(Array(5, 6, 7, 8), 1).cache()
+
+ // getPersistentRDDs should have 2 RDDs, but myRdds should not change
+ assert(sc.getPersistentRDDs.size === 2)
+ assert(myRdds.size === 1)
+ }
+
+ test("getRDDStorageInfo only reports on RDDs that actually persist data") {
+ sc = new SparkContext("local", "test")
+ val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
+
+ assert(sc.getRDDStorageInfo.size === 0)
+
+ rdd.collect()
+ assert(sc.getRDDStorageInfo.size === 1)
+ }
+}
diff --git a/core/src/test/scala/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
index f2acd0bd3c..69383ddfb8 100644
--- a/core/src/test/scala/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import java.util.concurrent.Semaphore
import java.util.concurrent.atomic.AtomicBoolean
diff --git a/core/src/test/scala/spark/UnpersistSuite.scala b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
index 93977d16f4..46a2da1724 100644
--- a/core/src/test/scala/spark/UnpersistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.FunSuite
import org.scalatest.concurrent.Timeouts._
import org.scalatest.time.{Span, Millis}
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
class UnpersistSuite extends FunSuite with LocalSparkContext {
test("unpersist RDD") {
diff --git a/core/src/test/scala/spark/ZippedPartitionsSuite.scala b/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala
index 5e6d7b09d8..618b9c113b 100644
--- a/core/src/test/scala/spark/ZippedPartitionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import scala.collection.immutable.NumericRange
@@ -40,7 +40,7 @@ class ZippedPartitionsSuite extends FunSuite with SharedSparkContext {
val data2 = sc.makeRDD(Array("1", "2", "3", "4", "5", "6"), 2)
val data3 = sc.makeRDD(Array(1.0, 2.0), 2)
- val zippedRDD = data1.zipPartitions(ZippedPartitionsSuite.procZippedData, data2, data3)
+ val zippedRDD = data1.zipPartitions(data2, data3)(ZippedPartitionsSuite.procZippedData)
val obtainedSizes = zippedRDD.collect()
val expectedSizes = Array(2, 3, 1, 2, 3, 1)
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
new file mode 100644
index 0000000000..fd6f69041a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+
+import org.scalatest.FunSuite
+
+
+class CompressionCodecSuite extends FunSuite {
+
+ def testCodec(codec: CompressionCodec) {
+ // Write 1000 integers to the output stream, compressed.
+ val outputStream = new ByteArrayOutputStream()
+ val out = codec.compressedOutputStream(outputStream)
+ for (i <- 1 until 1000) {
+ out.write(i % 256)
+ }
+ out.close()
+
+ // Read the 1000 integers back.
+ val inputStream = new ByteArrayInputStream(outputStream.toByteArray)
+ val in = codec.compressedInputStream(inputStream)
+ for (i <- 1 until 1000) {
+ assert(in.read() === i % 256)
+ }
+ in.close()
+ }
+
+ test("default compression codec") {
+ val codec = CompressionCodec.createCodec()
+ assert(codec.getClass === classOf[SnappyCompressionCodec])
+ testCodec(codec)
+ }
+
+ test("lzf compression codec") {
+ val codec = CompressionCodec.createCodec(classOf[LZFCompressionCodec].getName)
+ assert(codec.getClass === classOf[LZFCompressionCodec])
+ testCodec(codec)
+ }
+
+ test("snappy compression codec") {
+ val codec = CompressionCodec.createCodec(classOf[SnappyCompressionCodec].getName)
+ assert(codec.getClass === classOf[SnappyCompressionCodec])
+ testCodec(codec)
+ }
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
new file mode 100644
index 0000000000..58c94a162d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
+ var filePath: String = _
+
+ before {
+ filePath = getClass.getClassLoader.getResource("test_metrics_config.properties").getFile()
+ }
+
+ test("MetricsConfig with default properties") {
+ val conf = new MetricsConfig(Option("dummy-file"))
+ conf.initialize()
+
+ assert(conf.properties.size() === 5)
+ assert(conf.properties.getProperty("test-for-dummy") === null)
+
+ val property = conf.getInstance("random")
+ assert(property.size() === 3)
+ assert(property.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
+ assert(property.getProperty("sink.servlet.uri") === "/metrics/json")
+ assert(property.getProperty("sink.servlet.sample") === "false")
+ }
+
+ test("MetricsConfig with properties set") {
+ val conf = new MetricsConfig(Option(filePath))
+ conf.initialize()
+
+ val masterProp = conf.getInstance("master")
+ assert(masterProp.size() === 6)
+ assert(masterProp.getProperty("sink.console.period") === "20")
+ assert(masterProp.getProperty("sink.console.unit") === "minutes")
+ assert(masterProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
+ assert(masterProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
+ assert(masterProp.getProperty("sink.servlet.uri") === "/metrics/master/json")
+ assert(masterProp.getProperty("sink.servlet.sample") === "false")
+
+ val workerProp = conf.getInstance("worker")
+ assert(workerProp.size() === 6)
+ assert(workerProp.getProperty("sink.console.period") === "10")
+ assert(workerProp.getProperty("sink.console.unit") === "seconds")
+ assert(workerProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
+ assert(workerProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
+ assert(workerProp.getProperty("sink.servlet.uri") === "/metrics/json")
+ assert(workerProp.getProperty("sink.servlet.sample") === "false")
+ }
+
+ test("MetricsConfig with subProperties") {
+ val conf = new MetricsConfig(Option(filePath))
+ conf.initialize()
+
+ val propCategories = conf.propertyCategories
+ assert(propCategories.size === 3)
+
+ val masterProp = conf.getInstance("master")
+ val sourceProps = conf.subProperties(masterProp, MetricsSystem.SOURCE_REGEX)
+ assert(sourceProps.size === 1)
+ assert(sourceProps("jvm").getProperty("class") === "org.apache.spark.metrics.source.JvmSource")
+
+ val sinkProps = conf.subProperties(masterProp, MetricsSystem.SINK_REGEX)
+ assert(sinkProps.size === 2)
+ assert(sinkProps.contains("console"))
+ assert(sinkProps.contains("servlet"))
+
+ val consoleProps = sinkProps("console")
+ assert(consoleProps.size() === 2)
+
+ val servletProps = sinkProps("servlet")
+ assert(servletProps.size() === 3)
+ }
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
new file mode 100644
index 0000000000..7181333adf
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.apache.spark.deploy.master.MasterSource
+
+class MetricsSystemSuite extends FunSuite with BeforeAndAfter {
+ var filePath: String = _
+
+ before {
+ filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile()
+ System.setProperty("spark.metrics.conf", filePath)
+ }
+
+ test("MetricsSystem with default config") {
+ val metricsSystem = MetricsSystem.createMetricsSystem("default")
+ val sources = metricsSystem.sources
+ val sinks = metricsSystem.sinks
+
+ assert(sources.length === 0)
+ assert(sinks.length === 0)
+ assert(!metricsSystem.getServletHandlers.isEmpty)
+ }
+
+ test("MetricsSystem with sources add") {
+ val metricsSystem = MetricsSystem.createMetricsSystem("test")
+ val sources = metricsSystem.sources
+ val sinks = metricsSystem.sinks
+
+ assert(sources.length === 0)
+ assert(sinks.length === 1)
+ assert(!metricsSystem.getServletHandlers.isEmpty)
+
+ val source = new MasterSource(null)
+ metricsSystem.registerSource(source)
+ assert(sources.length === 1)
+ }
+}
diff --git a/core/src/test/scala/spark/rdd/JdbcRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala
index dc8ca941c1..3d39a31252 100644
--- a/core/src/test/scala/spark/rdd/JdbcRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark
import org.scalatest.{ BeforeAndAfter, FunSuite }
-import spark.SparkContext._
-import spark.rdd.JdbcRDD
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.JdbcRDD
import java.sql._
class JdbcRDDSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
diff --git a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index b102eaf4e6..31f97fc139 100644
--- a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -15,21 +15,17 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashSet
import org.scalatest.FunSuite
-import org.scalatest.prop.Checkers
-import org.scalacheck.Arbitrary._
-import org.scalacheck.Gen
-import org.scalacheck.Prop._
import com.google.common.io.Files
+import org.apache.spark.SparkContext._
+import org.apache.spark.{Partitioner, SharedSparkContext}
-import spark.rdd.ShuffledRDD
-import spark.SparkContext._
class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
test("groupByKey") {
diff --git a/core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
index d1276d541f..a80afdee7e 100644
--- a/core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.rdd
+package org.apache.spark.rdd
import scala.collection.immutable.NumericRange
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index cbddf4e523..adc971050e 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -15,14 +15,17 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import scala.collection.mutable.HashMap
import org.scalatest.FunSuite
import org.scalatest.concurrent.Timeouts._
import org.scalatest.time.{Span, Millis}
-import spark.SparkContext._
-import spark.rdd.{CoalescedRDD, CoGroupedRDD, EmptyRDD, PartitionPruningRDD, ShuffledRDD}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import scala.collection.parallel.mutable
+import org.apache.spark._
+import org.apache.spark.rdd.CoalescedRDDPartition
class RDDSuite extends FunSuite with SharedSparkContext {
@@ -170,9 +173,69 @@ class RDDSuite extends FunSuite with SharedSparkContext {
// we can optionally shuffle to keep the upstream parallel
val coalesced5 = data.coalesce(1, shuffle = true)
- assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _]] !=
+ assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _, _]] !=
null)
}
+ test("cogrouped RDDs with locality") {
+ val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b"))))
+ val coal3 = data3.coalesce(3)
+ val list3 = coal3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition].preferredLocation)
+ assert(list3.sorted === Array("a","b","c"), "Locality preferences are dropped")
+
+ // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5
+ val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i+2)).map{ j => "m" + (j%6)})))
+ val coalesced1 = data.coalesce(3)
+ assert(coalesced1.collect().toList.sorted === (1 to 9).toList, "Data got *lost* in coalescing")
+
+ val splits = coalesced1.glom().collect().map(_.toList).toList
+ assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length)
+
+ assert(splits.forall(_.length >= 1) === true, "Some partitions were empty")
+
+ // If we try to coalesce into more partitions than the original RDD, it should just
+ // keep the original number of partitions.
+ val coalesced4 = data.coalesce(20)
+ val listOfLists = coalesced4.glom().collect().map(_.toList).toList
+ val sortedList = listOfLists.sortWith{ (x, y) => !x.isEmpty && (y.isEmpty || (x(0) < y(0))) }
+ assert( sortedList === (1 to 9).
+ map{x => List(x)}.toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back")
+ }
+
+ test("cogrouped RDDs with locality, large scale (10K partitions)") {
+ // large scale experiment
+ import collection.mutable
+ val rnd = scala.util.Random
+ val partitions = 10000
+ val numMachines = 50
+ val machines = mutable.ListBuffer[String]()
+ (1 to numMachines).foreach(machines += "m"+_)
+
+ val blocks = (1 to partitions).map(i =>
+ { (i, Array.fill(3)(machines(rnd.nextInt(machines.size))).toList) } )
+
+ val data2 = sc.makeRDD(blocks)
+ val coalesced2 = data2.coalesce(numMachines*2)
+
+ // test that you get over 90% locality in each group
+ val minLocality = coalesced2.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
+ .foldLeft(1.)((perc, loc) => math.min(perc,loc))
+ assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%")
+
+ // test that the groups are load balanced with 100 +/- 20 elements in each
+ val maxImbalance = coalesced2.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].parents.size)
+ .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev))
+ assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance)
+
+ val data3 = sc.makeRDD(blocks).map(i => i*2) // derived RDD to test *current* pref locs
+ val coalesced3 = data3.coalesce(numMachines*2)
+ val minLocality2 = coalesced3.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
+ .foldLeft(1.)((perc, loc) => math.min(perc,loc))
+ assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " +
+ (minLocality2*100.).toInt + "%")
+ }
test("zipped RDDs") {
val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
diff --git a/core/src/test/scala/spark/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index b933c4aab8..2f7bd370fc 100644
--- a/core/src/test/scala/spark/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -15,12 +15,14 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.rdd
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfter
import org.scalatest.matchers.ShouldMatchers
-import SparkContext._
+
+import org.apache.spark.{Logging, SharedSparkContext}
+import org.apache.spark.SparkContext._
class SortingSuite extends FunSuite with SharedSparkContext with ShouldMatchers with Logging {
diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index f802b66cf1..94f66c94c6 100644
--- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -15,29 +15,26 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import scala.collection.mutable.{Map, HashMap}
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfter
-import spark.LocalSparkContext
+import org.apache.spark.LocalSparkContext
+import org.apache.spark.MapOutputTracker
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.Partition
+import org.apache.spark.TaskContext
+import org.apache.spark.{Dependency, ShuffleDependency, OneToOneDependency}
+import org.apache.spark.{FetchFailed, Success, TaskEndReason}
+import org.apache.spark.storage.{BlockManagerId, BlockManagerMaster}
-import spark.storage.BlockManager
-import spark.storage.BlockManagerId
-import spark.storage.BlockManagerMaster
-import spark.{Dependency, ShuffleDependency, OneToOneDependency}
-import spark.FetchFailedException
-import spark.MapOutputTracker
-import spark.RDD
-import spark.SparkContext
-import spark.SparkException
-import spark.Partition
-import spark.TaskContext
-import spark.TaskEndReason
-
-import spark.{FetchFailed, Success}
+import org.apache.spark.scheduler.cluster.Pool
+import org.apache.spark.scheduler.cluster.SchedulingMode
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
/**
* Tests for DAGScheduler. These tests directly call the event processing functions in DAGScheduler
@@ -56,11 +53,13 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
/** Set of TaskSets the DAGScheduler has requested executed. */
val taskSets = scala.collection.mutable.Buffer[TaskSet]()
val taskScheduler = new TaskScheduler() {
+ override def rootPool: Pool = null
+ override def schedulingMode: SchedulingMode = SchedulingMode.NONE
override def start() = {}
override def stop() = {}
override def submitTasks(taskSet: TaskSet) = {
// normally done by TaskSetManager
- taskSet.tasks.foreach(_.generation = mapOutputTracker.getGeneration)
+ taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch)
taskSets += taskSet
}
override def setListener(listener: TaskSchedulerListener) = {}
@@ -300,10 +299,10 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
val reduceRdd = makeRdd(2, List(shuffleDep))
submit(reduceRdd, Array(0, 1))
// pretend we were told hostA went away
- val oldGeneration = mapOutputTracker.getGeneration
+ val oldEpoch = mapOutputTracker.getEpoch
runEvent(ExecutorLost("exec-hostA"))
- val newGeneration = mapOutputTracker.getGeneration
- assert(newGeneration > oldGeneration)
+ val newEpoch = mapOutputTracker.getEpoch
+ assert(newEpoch > oldEpoch)
val noAccum = Map[Long, Any]()
val taskSet = taskSets(0)
// should be ignored for being too old
@@ -312,8 +311,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum, null, null))
// should be ignored for being too old
runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum, null, null))
- // should work because it's a new generation
- taskSet.tasks(1).generation = newGeneration
+ // should work because it's a new epoch
+ taskSet.tasks(1).epoch = newEpoch
runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum, null, null))
assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA")))
@@ -402,12 +401,14 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
assert(results === Map(0 -> 42))
}
- /** Assert that the supplied TaskSet has exactly the given preferredLocations. Note, converts taskSet's locations to host only. */
- private def assertLocations(taskSet: TaskSet, locations: Seq[Seq[String]]) {
- assert(locations.size === taskSet.tasks.size)
- for ((expectLocs, taskLocs) <-
- taskSet.tasks.map(_.preferredLocations).zip(locations)) {
- assert(expectLocs.map(loc => spark.Utils.parseHostPort(loc)._1) === taskLocs)
+ /**
+ * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
+ * Note that this checks only the host and not the executor ID.
+ */
+ private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]) {
+ assert(hosts.size === taskSet.tasks.size)
+ for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) {
+ assert(taskLocs.map(_.host) === expectedLocs)
}
}
diff --git a/core/src/test/scala/spark/scheduler/JobLoggerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala
index 0f855c38da..cece60dda7 100644
--- a/core/src/test/scala/spark/scheduler/JobLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala
@@ -15,15 +15,19 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.util.Properties
import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable
+
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
-import scala.collection.mutable
-import spark._
-import spark.SparkContext._
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
@@ -57,7 +61,7 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
val shuffleMapStage = new Stage(1, parentRdd, Some(shuffleDep), Nil, jobID, None)
val rootStage = new Stage(0, rootRdd, None, List(shuffleMapStage), jobID, None)
- joblogger.onStageSubmitted(SparkListenerStageSubmitted(rootStage, 4))
+ joblogger.onStageSubmitted(SparkListenerStageSubmitted(rootStage, 4, null))
joblogger.getRddNameTest(parentRdd) should be (parentRdd.getClass.getName)
parentRdd.setName("MyRDD")
joblogger.getRddNameTest(parentRdd) should be ("MyRDD")
diff --git a/core/src/test/scala/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 392d67d67b..aac7c207cb 100644
--- a/core/src/test/scala/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import org.scalatest.FunSuite
-import spark.{SparkContext, LocalSparkContext}
+import org.apache.spark.{SparkContext, LocalSparkContext}
import scala.collection.mutable
import org.scalatest.matchers.ShouldMatchers
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
/**
*
diff --git a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 95a6eee2fc..e31a116a75 100644
--- a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -15,15 +15,15 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfter
-import spark.TaskContext
-import spark.RDD
-import spark.SparkContext
-import spark.Partition
-import spark.LocalSparkContext
+import org.apache.spark.TaskContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.Partition
+import org.apache.spark.LocalSparkContext
class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
diff --git a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala
index 8f81d0b6ee..92ad9f09b2 100644
--- a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala
@@ -15,25 +15,25 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler.cluster
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfter
-import spark._
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark._
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster._
import scala.collection.mutable.ArrayBuffer
import java.util.Properties
-class DummyTaskSetManager(
+class FakeTaskSetManager(
initPriority: Int,
initStageId: Int,
initNumTasks: Int,
clusterScheduler: ClusterScheduler,
taskSet: TaskSet)
- extends ClusterTaskSetManager(clusterScheduler,taskSet) {
+ extends ClusterTaskSetManager(clusterScheduler, taskSet) {
parent = null
weight = 1
@@ -72,10 +72,16 @@ class DummyTaskSetManager(
override def executorLost(executorId: String, host: String): Unit = {
}
- override def slaveOffer(execId: String, host: String, avaiableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = {
+ override def resourceOffer(
+ execId: String,
+ host: String,
+ availableCpus: Int,
+ maxLocality: TaskLocality.TaskLocality)
+ : Option[TaskDescription] =
+ {
if (tasksFinished + runningTasks < numTasks) {
increaseRunningTasks(1)
- return Some(new TaskDescription(0, execId, "task 0:0", null))
+ return Some(new TaskDescription(0, execId, "task 0:0", 0, null))
}
return None
}
@@ -98,17 +104,10 @@ class DummyTaskSetManager(
}
}
-class DummyTask(stageId: Int) extends Task[Int](stageId)
-{
- def run(attemptId: Long): Int = {
- return 0
- }
-}
-
class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging {
- def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): DummyTaskSetManager = {
- new DummyTaskSetManager(priority, stage, numTasks, cs , taskSet)
+ def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): FakeTaskSetManager = {
+ new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet)
}
def resourceOffer(rootPool: Pool): Int = {
@@ -118,7 +117,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
logInfo("parentName:%s, parent running tasks:%d, name:%s,runningTasks:%d".format(manager.parent.name, manager.parent.runningTasks, manager.name, manager.runningTasks))
}
for (taskSet <- taskSetQueue) {
- taskSet.slaveOffer("execId_1", "hostname_1", 1) match {
+ taskSet.resourceOffer("execId_1", "hostname_1", 1, TaskLocality.ANY) match {
case Some(task) =>
return taskSet.stageId
case None => {}
@@ -135,7 +134,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
sc = new SparkContext("local", "ClusterSchedulerSuite")
val clusterScheduler = new ClusterScheduler(sc)
var tasks = ArrayBuffer[Task[_]]()
- val task = new DummyTask(0)
+ val task = new FakeTask(0)
tasks += task
val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
@@ -162,7 +161,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
sc = new SparkContext("local", "ClusterSchedulerSuite")
val clusterScheduler = new ClusterScheduler(sc)
var tasks = ArrayBuffer[Task[_]]()
- val task = new DummyTask(0)
+ val task = new FakeTask(0)
tasks += task
val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
@@ -219,7 +218,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
sc = new SparkContext("local", "ClusterSchedulerSuite")
val clusterScheduler = new ClusterScheduler(sc)
var tasks = ArrayBuffer[Task[_]]()
- val task = new DummyTask(0)
+ val task = new FakeTask(0)
tasks += task
val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
new file mode 100644
index 0000000000..ff70a2cdf0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable
+
+import org.scalatest.FunSuite
+
+import org.apache.spark._
+import org.apache.spark.scheduler._
+import org.apache.spark.executor.TaskMetrics
+import java.nio.ByteBuffer
+import org.apache.spark.util.{Utils, FakeClock}
+
+/**
+ * A mock ClusterScheduler implementation that just remembers information about tasks started and
+ * feedback received from the TaskSetManagers. Note that it's important to initialize this with
+ * a list of "live" executors and their hostnames for isExecutorAlive and hasExecutorsAliveOnHost
+ * to work, and these are required for locality in ClusterTaskSetManager.
+ */
+class FakeClusterScheduler(sc: SparkContext, liveExecutors: (String, String)* /* execId, host */)
+ extends ClusterScheduler(sc)
+{
+ val startedTasks = new ArrayBuffer[Long]
+ val endedTasks = new mutable.HashMap[Long, TaskEndReason]
+ val finishedManagers = new ArrayBuffer[TaskSetManager]
+
+ val executors = new mutable.HashMap[String, String] ++ liveExecutors
+
+ listener = new TaskSchedulerListener {
+ def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+ startedTasks += taskInfo.index
+ }
+
+ def taskEnded(
+ task: Task[_],
+ reason: TaskEndReason,
+ result: Any,
+ accumUpdates: mutable.Map[Long, Any],
+ taskInfo: TaskInfo,
+ taskMetrics: TaskMetrics)
+ {
+ endedTasks(taskInfo.index) = reason
+ }
+
+ def executorGained(execId: String, host: String) {}
+
+ def executorLost(execId: String) {}
+
+ def taskSetFailed(taskSet: TaskSet, reason: String) {}
+ }
+
+ def removeExecutor(execId: String): Unit = executors -= execId
+
+ override def taskSetFinished(manager: TaskSetManager): Unit = finishedManagers += manager
+
+ override def isExecutorAlive(execId: String): Boolean = executors.contains(execId)
+
+ override def hasExecutorsAliveOnHost(host: String): Boolean = executors.values.exists(_ == host)
+}
+
+class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
+ import TaskLocality.{ANY, PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL}
+
+ val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
+
+ test("TaskSet with no preferences") {
+ sc = new SparkContext("local", "test")
+ val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
+ val taskSet = createTaskSet(1)
+ val manager = new ClusterTaskSetManager(sched, taskSet)
+
+ // Offer a host with no CPUs
+ assert(manager.resourceOffer("exec1", "host1", 0, ANY) === None)
+
+ // Offer a host with process-local as the constraint; this should work because the TaskSet
+ // above won't have any locality preferences
+ val taskOption = manager.resourceOffer("exec1", "host1", 2, TaskLocality.PROCESS_LOCAL)
+ assert(taskOption.isDefined)
+ val task = taskOption.get
+ assert(task.executorId === "exec1")
+ assert(sched.startedTasks.contains(0))
+
+ // Re-offer the host -- now we should get no more tasks
+ assert(manager.resourceOffer("exec1", "host1", 2, PROCESS_LOCAL) === None)
+
+ // Tell it the task has finished
+ manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0))
+ assert(sched.endedTasks(0) === Success)
+ assert(sched.finishedManagers.contains(manager))
+ }
+
+ test("multiple offers with no preferences") {
+ sc = new SparkContext("local", "test")
+ val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
+ val taskSet = createTaskSet(3)
+ val manager = new ClusterTaskSetManager(sched, taskSet)
+
+ // First three offers should all find tasks
+ for (i <- 0 until 3) {
+ val taskOption = manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL)
+ assert(taskOption.isDefined)
+ val task = taskOption.get
+ assert(task.executorId === "exec1")
+ }
+ assert(sched.startedTasks.toSet === Set(0, 1, 2))
+
+ // Re-offer the host -- now we should get no more tasks
+ assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None)
+
+ // Finish the first two tasks
+ manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0))
+ manager.statusUpdate(1, TaskState.FINISHED, createTaskResult(1))
+ assert(sched.endedTasks(0) === Success)
+ assert(sched.endedTasks(1) === Success)
+ assert(!sched.finishedManagers.contains(manager))
+
+ // Finish the last task
+ manager.statusUpdate(2, TaskState.FINISHED, createTaskResult(2))
+ assert(sched.endedTasks(2) === Success)
+ assert(sched.finishedManagers.contains(manager))
+ }
+
+ test("basic delay scheduling") {
+ sc = new SparkContext("local", "test")
+ val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+ val taskSet = createTaskSet(4,
+ Seq(TaskLocation("host1", "exec1")),
+ Seq(TaskLocation("host2", "exec2")),
+ Seq(TaskLocation("host1"), TaskLocation("host2", "exec2")),
+ Seq() // Last task has no locality prefs
+ )
+ val clock = new FakeClock
+ val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+
+ // First offer host1, exec1: first task should be chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
+
+ // Offer host1, exec1 again: the last task, which has no prefs, should be chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 3)
+
+ // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None)
+
+ clock.advance(LOCALITY_WAIT)
+
+ // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None)
+
+ // Offer host1, exec1 again, at NODE_LOCAL level: we should choose task 2
+ assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL).get.index == 2)
+
+ // Offer host1, exec1 again, at NODE_LOCAL level: nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL) === None)
+
+ // Offer host1, exec1 again, at ANY level: nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None)
+
+ clock.advance(LOCALITY_WAIT)
+
+ // Offer host1, exec1 again, at ANY level: task 1 should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1)
+
+ // Offer host1, exec1 again, at ANY level: nothing should be chosen as we've launched all tasks
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None)
+ }
+
+ test("delay scheduling with fallback") {
+ sc = new SparkContext("local", "test")
+ val sched = new FakeClusterScheduler(sc,
+ ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3"))
+ val taskSet = createTaskSet(5,
+ Seq(TaskLocation("host1")),
+ Seq(TaskLocation("host2")),
+ Seq(TaskLocation("host2")),
+ Seq(TaskLocation("host3")),
+ Seq(TaskLocation("host2"))
+ )
+ val clock = new FakeClock
+ val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+
+ // First offer host1: first task should be chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
+
+ // Offer host1 again: nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None)
+
+ clock.advance(LOCALITY_WAIT)
+
+ // Offer host1 again: second task (on host2) should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1)
+
+ // Offer host1 again: third task (on host2) should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2)
+
+ // Offer host2: fifth task (also on host2) should get chosen
+ assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 4)
+
+ // Now that we've launched a local task, we should no longer launch the task for host3
+ assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None)
+
+ clock.advance(LOCALITY_WAIT)
+
+ // After another delay, we can go ahead and launch that task non-locally
+ assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 3)
+ }
+
+ test("delay scheduling with failed hosts") {
+ sc = new SparkContext("local", "test")
+ val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+ val taskSet = createTaskSet(3,
+ Seq(TaskLocation("host1")),
+ Seq(TaskLocation("host2")),
+ Seq(TaskLocation("host3"))
+ )
+ val clock = new FakeClock
+ val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+
+ // First offer host1: first task should be chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
+
+ // Offer host1 again: third task should be chosen immediately because host3 is not up
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2)
+
+ // After this, nothing should get chosen
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None)
+
+ // Now mark host2 as dead
+ sched.removeExecutor("exec2")
+ manager.executorLost("exec2", "host2")
+
+ // Task 1 should immediately be launched on host1 because its original host is gone
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1)
+
+ // Now that all tasks have launched, nothing new should be launched anywhere else
+ assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None)
+ assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None)
+ }
+
+ /**
+ * Utility method to create a TaskSet, potentially setting a particular sequence of preferred
+ * locations for each task (given as varargs) if this sequence is not empty.
+ */
+ def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
+ if (prefLocs.size != 0 && prefLocs.size != numTasks) {
+ throw new IllegalArgumentException("Wrong number of task locations")
+ }
+ val tasks = Array.tabulate[Task[_]](numTasks) { i =>
+ new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil)
+ }
+ new TaskSet(tasks, 0, 0, 0, null)
+ }
+
+ def createTaskResult(id: Int): ByteBuffer = {
+ ByteBuffer.wrap(Utils.serialize(new TaskResult[Int](id, mutable.Map.empty, new TaskMetrics)))
+ }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala
new file mode 100644
index 0000000000..2f12aaed18
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import org.apache.spark.scheduler.{TaskLocation, Task}
+
+class FakeTask(stageId: Int, prefLocs: Seq[TaskLocation] = Nil) extends Task[Int](stageId) {
+ override def run(attemptId: Long): Int = 0
+
+ override def preferredLocations: Seq[TaskLocation] = prefLocs
+}
diff --git a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala
index 14bb58731b..111340a65c 100644
--- a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler.local
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfter
-import spark._
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark._
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.{ConcurrentMap, HashMap}
import java.util.concurrent.Semaphore
@@ -57,23 +57,23 @@ object TaskThreadInfo {
* 1. each thread contains one job.
* 2. each job contains one stage.
* 3. each stage only contains one task.
- * 4. each task(launched) must be lanched orderly(using threadToStarted) to make sure
- * it will get cpu core resource, and will wait to finished after user manually
- * release "Lock" and then cluster will contain another free cpu cores.
- * 5. each task(pending) must use "sleep" to make sure it has been added to taskSetManager queue,
+ * 4. each task(launched) must be lanched orderly(using threadToStarted) to make sure
+ * it will get cpu core resource, and will wait to finished after user manually
+ * release "Lock" and then cluster will contain another free cpu cores.
+ * 5. each task(pending) must use "sleep" to make sure it has been added to taskSetManager queue,
* thus it will be scheduled later when cluster has free cpu cores.
*/
class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
def createThread(threadIndex: Int, poolName: String, sc: SparkContext, sem: Semaphore) {
-
+
TaskThreadInfo.threadToRunning(threadIndex) = false
val nums = sc.parallelize(threadIndex to threadIndex, 1)
TaskThreadInfo.threadToLock(threadIndex) = new Lock()
TaskThreadInfo.threadToStarted(threadIndex) = new CountDownLatch(1)
new Thread {
if (poolName != null) {
- sc.addLocalProperties("spark.scheduler.cluster.fair.pool",poolName)
+ sc.setLocalProperty("spark.scheduler.cluster.fair.pool", poolName)
}
override def run() {
val ans = nums.map(number => {
@@ -88,7 +88,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
}
}.start()
}
-
+
test("Local FIFO scheduler end-to-end test") {
System.setProperty("spark.cluster.schedulingmode", "FIFO")
sc = new SparkContext("local[4]", "test")
@@ -103,8 +103,8 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
createThread(4,null,sc,sem)
TaskThreadInfo.threadToStarted(4).await()
// thread 5 and 6 (stage pending)must meet following two points
- // 1. stages (taskSetManager) of jobs in thread 5 and 6 should be add to taskSetManager
- // queue before executing TaskThreadInfo.threadToLock(1).jobFinished()
+ // 1. stages (taskSetManager) of jobs in thread 5 and 6 should be add to taskSetManager
+ // queue before executing TaskThreadInfo.threadToLock(1).jobFinished()
// 2. priority of stage in thread 5 should be prior to priority of stage in thread 6
// So I just use "sleep" 1s here for each thread.
// TODO: any better solution?
@@ -112,24 +112,24 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
Thread.sleep(1000)
createThread(6,null,sc,sem)
Thread.sleep(1000)
-
+
assert(TaskThreadInfo.threadToRunning(1) === true)
assert(TaskThreadInfo.threadToRunning(2) === true)
assert(TaskThreadInfo.threadToRunning(3) === true)
assert(TaskThreadInfo.threadToRunning(4) === true)
assert(TaskThreadInfo.threadToRunning(5) === false)
assert(TaskThreadInfo.threadToRunning(6) === false)
-
+
TaskThreadInfo.threadToLock(1).jobFinished()
TaskThreadInfo.threadToStarted(5).await()
-
+
assert(TaskThreadInfo.threadToRunning(1) === false)
assert(TaskThreadInfo.threadToRunning(2) === true)
assert(TaskThreadInfo.threadToRunning(3) === true)
assert(TaskThreadInfo.threadToRunning(4) === true)
assert(TaskThreadInfo.threadToRunning(5) === true)
assert(TaskThreadInfo.threadToRunning(6) === false)
-
+
TaskThreadInfo.threadToLock(3).jobFinished()
TaskThreadInfo.threadToStarted(6).await()
@@ -139,7 +139,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
assert(TaskThreadInfo.threadToRunning(4) === true)
assert(TaskThreadInfo.threadToRunning(5) === true)
assert(TaskThreadInfo.threadToRunning(6) === true)
-
+
TaskThreadInfo.threadToLock(2).jobFinished()
TaskThreadInfo.threadToLock(4).jobFinished()
TaskThreadInfo.threadToLock(5).jobFinished()
@@ -160,18 +160,18 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
TaskThreadInfo.threadToStarted(20).await()
createThread(30,"3",sc,sem)
TaskThreadInfo.threadToStarted(30).await()
-
+
assert(TaskThreadInfo.threadToRunning(10) === true)
assert(TaskThreadInfo.threadToRunning(20) === true)
assert(TaskThreadInfo.threadToRunning(30) === true)
-
+
createThread(11,"1",sc,sem)
TaskThreadInfo.threadToStarted(11).await()
createThread(21,"2",sc,sem)
TaskThreadInfo.threadToStarted(21).await()
createThread(31,"3",sc,sem)
TaskThreadInfo.threadToStarted(31).await()
-
+
assert(TaskThreadInfo.threadToRunning(11) === true)
assert(TaskThreadInfo.threadToRunning(21) === true)
assert(TaskThreadInfo.threadToRunning(31) === true)
@@ -185,19 +185,19 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
assert(TaskThreadInfo.threadToRunning(12) === true)
assert(TaskThreadInfo.threadToRunning(22) === true)
assert(TaskThreadInfo.threadToRunning(32) === false)
-
+
TaskThreadInfo.threadToLock(10).jobFinished()
TaskThreadInfo.threadToStarted(32).await()
-
+
assert(TaskThreadInfo.threadToRunning(32) === true)
- //1. Similar with above scenario, sleep 1s for stage of 23 and 33 to be added to taskSetManager
+ //1. Similar with above scenario, sleep 1s for stage of 23 and 33 to be added to taskSetManager
// queue so that cluster will assign free cpu core to stage 23 after stage 11 finished.
//2. priority of 23 and 33 will be meaningless as using fair scheduler here.
createThread(23,"2",sc,sem)
createThread(33,"3",sc,sem)
Thread.sleep(1000)
-
+
TaskThreadInfo.threadToLock(11).jobFinished()
TaskThreadInfo.threadToStarted(23).await()
@@ -206,7 +206,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
TaskThreadInfo.threadToLock(12).jobFinished()
TaskThreadInfo.threadToStarted(33).await()
-
+
assert(TaskThreadInfo.threadToRunning(33) === true)
TaskThreadInfo.threadToLock(20).jobFinished()
@@ -217,7 +217,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext {
TaskThreadInfo.threadToLock(31).jobFinished()
TaskThreadInfo.threadToLock(32).jobFinished()
TaskThreadInfo.threadToLock(33).jobFinished()
-
- sem.acquire(11)
+
+ sem.acquire(11)
}
}
diff --git a/core/src/test/scala/spark/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index c3323dcbb3..0164dda0ba 100644
--- a/core/src/test/scala/spark/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.serializer
import scala.collection.mutable
-import scala.collection.immutable
-import org.scalatest.FunSuite
-import com.esotericsoftware.kryo._
+import com.esotericsoftware.kryo.Kryo
-import SparkContext._
+import org.scalatest.FunSuite
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.serializer.KryoTest._
-class KryoSerializerSuite extends FunSuite {
+class KryoSerializerSuite extends FunSuite with SharedSparkContext {
test("basic types") {
val ser = (new KryoSerializer).newInstance()
def check[T](t: T) {
@@ -53,6 +53,7 @@ class KryoSerializerSuite extends FunSuite {
check(Array(true, false, true))
check(Array('a', 'b', 'c'))
check(Array[Int]())
+ check(Array(Array("1", "2"), Array("1", "2", "3", "4")))
}
test("pairs") {
@@ -103,7 +104,6 @@ class KryoSerializerSuite extends FunSuite {
}
test("custom registrator") {
- import spark.test._
System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName)
val ser = (new KryoSerializer).newInstance()
@@ -123,14 +123,65 @@ class KryoSerializerSuite extends FunSuite {
val hashMap = new java.util.HashMap[String, String]
hashMap.put("foo", "bar")
check(hashMap)
-
+
+ System.clearProperty("spark.kryo.registrator")
+ }
+
+ test("kryo with collect") {
+ val control = 1 :: 2 :: Nil
+ val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_)).collect().map(_.x)
+ assert(control === result.toSeq)
+ }
+
+ test("kryo with parallelize") {
+ val control = 1 :: 2 :: Nil
+ val result = sc.parallelize(control.map(new ClassWithoutNoArgConstructor(_))).map(_.x).collect()
+ assert (control === result.toSeq)
+ }
+
+ test("kryo with parallelize for specialized tuples") {
+ assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).count === 3)
+ }
+
+ test("kryo with parallelize for primitive arrays") {
+ assert (sc.parallelize( Array(1, 2, 3) ).count === 3)
+ }
+
+ test("kryo with collect for specialized tuples") {
+ assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === (1, 11))
+ }
+
+ test("kryo with reduce") {
+ val control = 1 :: 2 :: Nil
+ val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))
+ .reduce((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x
+ assert(control.sum === result)
+ }
+
+ // TODO: this still doesn't work
+ ignore("kryo with fold") {
+ val control = 1 :: 2 :: Nil
+ val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))
+ .fold(new ClassWithoutNoArgConstructor(10))((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x
+ assert(10 + control.sum === result)
+ }
+
+ override def beforeAll() {
+ System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+ System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName)
+ super.beforeAll()
+ }
+
+ override def afterAll() {
+ super.afterAll()
System.clearProperty("spark.kryo.registrator")
+ System.clearProperty("spark.serializer")
}
}
-package test {
+object KryoTest {
case class CaseClass(i: Int, s: String) {}
-
+
class ClassWithNoArgConstructor {
var x: Int = 0
override def equals(other: Any) = other match {
diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index b719d65342..038a9acb85 100644
--- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.storage
+package org.apache.spark.storage
import java.nio.ByteBuffer
@@ -29,11 +29,8 @@ import org.scalatest.concurrent.Timeouts._
import org.scalatest.matchers.ShouldMatchers._
import org.scalatest.time.SpanSugar._
-import spark.JavaSerializer
-import spark.KryoSerializer
-import spark.SizeEstimator
-import spark.util.AkkaUtils
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.{SizeEstimator, Utils, AkkaUtils, ByteBufferInputStream}
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester {
@@ -56,7 +53,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
System.setProperty("spark.hostPort", "localhost:" + boundPort)
master = new BlockManagerMaster(
- actorSystem.actorOf(Props(new spark.storage.BlockManagerMasterActor(true))))
+ actorSystem.actorOf(Props(new BlockManagerMasterActor(true))))
// Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
oldArch = System.setProperty("os.arch", "amd64")
@@ -65,7 +62,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
val initialize = PrivateMethod[Unit]('initialize)
SizeEstimator invokePrivate initialize()
// Set some value ...
- System.setProperty("spark.hostPort", spark.Utils.localHostName() + ":" + 1111)
+ System.setProperty("spark.hostPort", Utils.localHostName() + ":" + 1111)
}
after {
@@ -105,10 +102,10 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
assert(level2 === level1, "level2 is not same as level1")
assert(level2.eq(level1), "level2 is not the same object as level1")
assert(level3 != level1, "level3 is same as level1")
- val bytes1 = spark.Utils.serialize(level1)
- val level1_ = spark.Utils.deserialize[StorageLevel](bytes1)
- val bytes2 = spark.Utils.serialize(level2)
- val level2_ = spark.Utils.deserialize[StorageLevel](bytes2)
+ val bytes1 = Utils.serialize(level1)
+ val level1_ = Utils.deserialize[StorageLevel](bytes1)
+ val bytes2 = Utils.serialize(level2)
+ val level2_ = Utils.deserialize[StorageLevel](bytes2)
assert(level1_ === level1, "Deserialized level1 not same as original level1")
assert(level1_.eq(level1), "Deserialized level1 not the same object as original level2")
assert(level2_ === level2, "Deserialized level2 not same as original level2")
@@ -122,10 +119,10 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
assert(id2 === id1, "id2 is not same as id1")
assert(id2.eq(id1), "id2 is not the same object as id1")
assert(id3 != id1, "id3 is same as id1")
- val bytes1 = spark.Utils.serialize(id1)
- val id1_ = spark.Utils.deserialize[BlockManagerId](bytes1)
- val bytes2 = spark.Utils.serialize(id2)
- val id2_ = spark.Utils.deserialize[BlockManagerId](bytes2)
+ val bytes1 = Utils.serialize(id1)
+ val id1_ = Utils.deserialize[BlockManagerId](bytes1)
+ val bytes2 = Utils.serialize(id2)
+ val id2_ = Utils.deserialize[BlockManagerId](bytes2)
assert(id1_ === id1, "Deserialized id1 is not same as original id1")
assert(id1_.eq(id1), "Deserialized id1 is not the same object as original id1")
assert(id2_ === id2, "Deserialized id2 is not same as original id2")
diff --git a/core/src/test/scala/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 56c1fed6ad..3321fb5eb7 100644
--- a/core/src/test/scala/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.ui
+package org.apache.spark.ui
import scala.util.{Failure, Success, Try}
import java.net.ServerSocket
@@ -24,14 +24,15 @@ import org.eclipse.jetty.server.Server
class UISuite extends FunSuite {
test("jetty port increases under contention") {
- val startPort = 33333
+ val startPort = 3030
val server = new Server(startPort)
server.start()
val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq())
val (jettyServer2, boundPort2) = JettyUtils.startJettyServer("localhost", startPort, Seq())
- assert(boundPort1 === startPort + 1)
- assert(boundPort2 === startPort + 2)
+ // Allow some wiggle room in case ports on the machine are under contention
+ assert(boundPort1 > startPort && boundPort1 < startPort + 10)
+ assert(boundPort2 > boundPort1 && boundPort2 < boundPort1 + 10)
}
test("jetty binds to port 0 correctly") {
diff --git a/core/src/test/scala/spark/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 7d2831e19c..0ed366fb70 100644
--- a/core/src/test/scala/spark/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import java.io.NotSerializableException
import org.scalatest.FunSuite
-import spark.LocalSparkContext._
-import SparkContext._
+
+import org.apache.spark.SparkContext
+import org.apache.spark.LocalSparkContext._
class ClosureCleanerSuite extends FunSuite {
test("closures inside an object") {
diff --git a/core/src/test/scala/spark/util/DistributionSuite.scala b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala
index 6578b55e82..63642461e4 100644
--- a/core/src/test/scala/spark/util/DistributionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
diff --git a/core/src/test/scala/org/apache/spark/util/FakeClock.scala b/core/src/test/scala/org/apache/spark/util/FakeClock.scala
new file mode 100644
index 0000000000..0a45917b08
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/FakeClock.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+class FakeClock extends Clock {
+ private var time = 0L
+
+ def advance(millis: Long): Unit = time += millis
+
+ def getTime(): Long = time
+}
diff --git a/core/src/test/scala/spark/util/NextIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
index fdbd43d941..45867463a5 100644
--- a/core/src/test/scala/spark/util/NextIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
diff --git a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala
index 4c0044202f..a9dd0b1a5b 100644
--- a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.util
+package org.apache.spark.util
import org.scalatest.FunSuite
import java.io.ByteArrayOutputStream
diff --git a/core/src/test/scala/spark/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 1ef812dfbd..4e40dcbdee 100644
--- a/core/src/test/scala/spark/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import org.scalatest.FunSuite
import org.scalatest.BeforeAndAfterAll
diff --git a/core/src/test/scala/spark/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 31c3b25c50..e2859caf58 100644
--- a/core/src/test/scala/spark/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark
+package org.apache.spark.util
import com.google.common.base.Charsets
import com.google.common.io.Files
@@ -26,14 +26,14 @@ import scala.util.Random
class UtilsSuite extends FunSuite {
- test("memoryBytesToString") {
- assert(Utils.memoryBytesToString(10) === "10.0 B")
- assert(Utils.memoryBytesToString(1500) === "1500.0 B")
- assert(Utils.memoryBytesToString(2000000) === "1953.1 KB")
- assert(Utils.memoryBytesToString(2097152) === "2.0 MB")
- assert(Utils.memoryBytesToString(2306867) === "2.2 MB")
- assert(Utils.memoryBytesToString(5368709120L) === "5.0 GB")
- assert(Utils.memoryBytesToString(5L * 1024L * 1024L * 1024L * 1024L) === "5.0 TB")
+ test("bytesToString") {
+ assert(Utils.bytesToString(10) === "10.0 B")
+ assert(Utils.bytesToString(1500) === "1500.0 B")
+ assert(Utils.bytesToString(2000000) === "1953.1 KB")
+ assert(Utils.bytesToString(2097152) === "2.0 MB")
+ assert(Utils.bytesToString(2306867) === "2.2 MB")
+ assert(Utils.bytesToString(5368709120L) === "5.0 GB")
+ assert(Utils.bytesToString(5L * 1024L * 1024L * 1024L * 1024L) === "5.0 TB")
}
test("copyStream") {
diff --git a/docs/README.md b/docs/README.md
index c2b3497bb3..dfcf753553 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,6 @@
Welcome to the Spark documentation!
-This readme will walk you through navigating and building the Spark documentation, which is included here with the Spark source code. You can also find documentation specific to release versions of Spark at http://spark-project.org/documentation.html.
+This readme will walk you through navigating and building the Spark documentation, which is included here with the Spark source code. You can also find documentation specific to release versions of Spark at http://spark.incubator.apache.org/documentation.html.
Read on to learn more about viewing documentation in plain text (i.e., markdown) or building the documentation yourself. Why build it yourself? So that you have the docs that corresponds to whichever version of Spark you currently have checked out of revision control.
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index f06ab2d5b0..84749fda4e 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -61,19 +61,25 @@
<a href="#" class="dropdown-toggle" data-toggle="dropdown">Programming Guides<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="quick-start.html">Quick Start</a></li>
- <li><a href="scala-programming-guide.html">Scala</a></li>
- <li><a href="java-programming-guide.html">Java</a></li>
- <li><a href="python-programming-guide.html">Python</a></li>
+ <li><a href="scala-programming-guide.html">Spark in Scala</a></li>
+ <li><a href="java-programming-guide.html">Spark in Java</a></li>
+ <li><a href="python-programming-guide.html">Spark in Python</a></li>
+ <li class="divider"></li>
<li><a href="streaming-programming-guide.html">Spark Streaming</a></li>
+ <li><a href="mllib-guide.html">MLlib (Machine Learning)</a></li>
+ <li><a href="bagel-programming-guide.html">Bagel (Pregel on Spark)</a></li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a href="api/core/index.html">Spark Java/Scala (Scaladoc)</a></li>
- <li><a href="api/pyspark/index.html">Spark Python (Epydoc)</a></li>
- <li><a href="api/streaming/index.html">Spark Streaming Java/Scala (Scaladoc) </a></li>
+ <li><a href="api/core/index.html">Spark Core for Java/Scala</a></li>
+ <li><a href="api/pyspark/index.html">Spark Core for Python</a></li>
+ <li class="divider"></li>
+ <li><a href="api/streaming/index.html">Spark Streaming</a></li>
+ <li><a href="api/mllib/index.html">MLlib (Machine Learning)</a></li>
+ <li><a href="api/bagel/index.html">Bagel (Pregel on Spark)</a></li>
</ul>
</li>
@@ -90,10 +96,10 @@
<li class="dropdown">
<a href="api.html" class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
- <li><a href="building-with-maven.html">Building Spark with Maven</a></li>
<li><a href="configuration.html">Configuration</a></li>
<li><a href="tuning.html">Tuning Guide</a></li>
- <li><a href="bagel-programming-guide.html">Bagel (Pregel on Spark)</a></li>
+ <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
+ <li><a href="building-with-maven.html">Building Spark with Maven</a></li>
<li><a href="contributing-to-spark.html">Contributing to Spark</a></li>
</ul>
</li>
@@ -135,9 +141,15 @@
<hr>-->
- <!--<footer>
- <p></p>
- </footer>-->
+ <footer>
+ <hr>
+ <p style="text-align: center; veritcal-align: middle; color: #999;">
+ Apache Spark is an effort undergoing incubation at the Apache Software Foundation.
+ <a href="http://incubator.apache.org">
+ <img style="margin-left: 20px;" src="img/incubator-logo.png" />
+ </a>
+ </p>
+ </footer>
</div> <!-- /container -->
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 45ef4bba82..c574ea7f5c 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -18,9 +18,9 @@
require 'fileutils'
include FileUtils
-if ENV['SKIP_API'] != '1'
+if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1')
# Build Scaladoc for Java/Scala
- projects = ["core", "examples", "repl", "bagel", "streaming"]
+ projects = ["core", "examples", "repl", "bagel", "streaming", "mllib"]
puts "Moving to project root and building scaladoc."
curr_dir = pwd
diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md
index 8a0fa42d94..583684913d 100644
--- a/docs/bagel-programming-guide.md
+++ b/docs/bagel-programming-guide.md
@@ -9,16 +9,15 @@ In the Pregel programming model, jobs run as a sequence of iterations called _su
This guide shows the programming model and features of Bagel by walking through an example implementation of PageRank on Bagel.
-## Linking with Bagel
+# Linking with Bagel
-To write a Bagel application, you will need to add Spark, its dependencies, and Bagel to your CLASSPATH:
+To use Bagel in your program, add the following SBT or Maven dependency:
-1. Run `sbt/sbt update` to fetch Spark's dependencies, if you haven't already done so.
-2. Run `sbt/sbt assembly` to build Spark and its dependencies into one JAR (`core/target/spark-core-assembly-{{site.SPARK_VERSION}}.jar`)
-3. Run `sbt/sbt package` build the Bagel JAR (`bagel/target/scala_{{site.SCALA_VERSION}}/spark-bagel_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar`).
-4. Add these two JARs to your CLASSPATH.
+ groupId = org.apache.spark
+ artifactId = spark-bagel_{{site.SCALA_VERSION}}
+ version = {{site.SPARK_VERSION}}
-## Programming Model
+# Programming Model
Bagel operates on a graph represented as a [distributed dataset](scala-programming-guide.html) of (K, V) pairs, where keys are vertex IDs and values are vertices plus their associated state. In each superstep, Bagel runs a user-specified compute function on each vertex that takes as input the current vertex state and a list of messages sent to that vertex during the previous superstep, and returns the new vertex state and a list of outgoing messages.
@@ -29,8 +28,8 @@ representing the current PageRank of the vertex, and similarly extend
the `Message` and `Edge` classes. Note that these need to be marked `@serializable` to allow Spark to transfer them across machines. We also import the Bagel types and implicit conversions.
{% highlight scala %}
-import spark.bagel._
-import spark.bagel.Bagel._
+import org.apache.spark.bagel._
+import org.apache.spark.bagel.Bagel._
@serializable class PREdge(val targetId: String) extends Edge
@@ -89,7 +88,7 @@ Finally, we print the results.
println(result.map(v => "%s\t%s\n".format(v.id, v.rank)).collect.mkString)
{% endhighlight %}
-### Combiners
+## Combiners
Sending a message to another vertex generally involves expensive communication over the network. For certain algorithms, it's possible to reduce the amount of communication using _combiners_. For example, if the compute function receives integer messages and only uses their sum, it's possible for Bagel to combine multiple messages to the same vertex by summing them.
@@ -97,7 +96,7 @@ For combiner support, Bagel can optionally take a set of combiner functions that
_Example: PageRank with combiners_
-### Aggregators
+## Aggregators
Aggregators perform a reduce across all vertices after each superstep, and provide the result to each vertex in the next superstep.
@@ -105,11 +104,11 @@ For aggregator support, Bagel can optionally take an aggregator function that re
_Example_
-### Operations
+## Operations
-Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/mesos/spark/blob/master/bagel/src/main/scala/spark/bagel/Bagel.scala) for details.
+Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/apache/incubator-spark/blob/master/bagel/src/main/scala/spark/bagel/Bagel.scala) for details.
-#### Actions
+### Actions
{% highlight scala %}
/*** Full form ***/
@@ -133,7 +132,7 @@ Bagel.run(sc, vertices, messages, numSplits)(compute)
// and returns (newVertex: V, outMessages: Array[M])
{% endhighlight %}
-#### Types
+### Types
{% highlight scala %}
trait Combiner[M, C] {
@@ -156,6 +155,10 @@ trait Message[K] {
}
{% endhighlight %}
-## Where to Go from Here
+# Where to Go from Here
-Two example jobs, PageRank and shortest path, are included in `bagel/src/main/scala/spark/bagel/examples`. You can run them by passing the class name to the `run` script included in Spark -- for example, `./run spark.bagel.examples.WikipediaPageRank`. Each example program prints usage help when run without any arguments.
+Two example jobs, PageRank and shortest path, are included in `examples/src/main/scala/org/apache/spark/examples/bagel`. You can run them by passing the class name to the `run-example` script included in Spark; e.g.:
+
+ ./run-example org.apache.spark.examples.bagel.WikipediaPageRank
+
+Each example program prints usage help when run without any arguments.
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 04cd79d039..7ecb601ddd 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -8,50 +8,60 @@ title: Building Spark with Maven
Building Spark using Maven Requires Maven 3 (the build process is tested with Maven 3.0.4) and Java 1.6 or newer.
-Building with Maven requires that a Hadoop profile be specified explicitly at the command line, there is no default. There are two profiles to choose from, one for building for Hadoop 1 or Hadoop 2.
-for Hadoop 1 (using 0.20.205.0) use:
+## Setting up Maven's Memory Usage ##
- $ mvn -Phadoop1 clean install
+You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings:
+ export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
-for Hadoop 2 (using 2.0.0-mr1-cdh4.1.1) use:
+If you don't run this, you may see errors like the following:
- $ mvn -Phadoop2 clean install
+ [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
+ [ERROR] PermGen space -> [Help 1]
-It uses the scala-maven-plugin which supports incremental and continuous compilation. E.g.
+ [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
+ [ERROR] Java heap space -> [Help 1]
- $ mvn -Phadoop2 scala:cc
+You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
-…should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively.
+## Specifying the Hadoop version ##
-## Spark Tests in Maven ##
+Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default.
-Tests are run by default via the scalatest-maven-plugin. With this you can do things like:
+For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use:
-Skip test execution (but not compilation):
+ # Apache Hadoop 1.2.1
+ $ mvn -Dhadoop.version=1.2.1 -DskipTests clean package
- $ mvn -DskipTests -Phadoop2 clean install
+ # Cloudera CDH 4.2.0 with MapReduce v1
+ $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
-To run a specific test suite:
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should also enable the "hadoop2-yarn" profile:
- $ mvn -Phadoop2 -Dsuites=spark.repl.ReplSuite test
+ # Apache Hadoop 2.0.5-alpha
+ $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.5-alpha -DskipTests clean package
+ # Cloudera CDH 4.2.0 with MapReduce v2
+ $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 -DskipTests clean package
-## Setting up JVM Memory Usage Via Maven ##
-You might run into the following errors if you're using a vanilla installation of Maven:
+## Spark Tests in Maven ##
- [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
- [ERROR] PermGen space -> [Help 1]
+Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). Some of the require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time. You can then run the tests with `mvn -Dhadoop.version=... test`.
- [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
- [ERROR] Java heap space -> [Help 1]
+The ScalaTest plugin also supports running only a specific test suite as follows:
+
+ $ mvn -Dhadoop.version=... -Dsuites=spark.repl.ReplSuite test
+
+
+## Continuous Compilation ##
-To fix these, you can do the following:
+We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
- export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128M"
+ $ mvn scala:cc
+should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively.
## Using With IntelliJ IDEA ##
@@ -59,8 +69,8 @@ This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the
## Building Spark Debian Packages ##
-It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the deb profile:
+It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the following profiles:
- $ mvn -Phadoop2,deb clean install
+ $ mvn -Prepl-bin -Pdeb clean package
The debian package can then be found under repl/target. We added the short commit hash to the file name so that we can distinguish individual packages build for SNAPSHOT versions.
diff --git a/docs/configuration.md b/docs/configuration.md
index 5c06897cae..58e9434bdc 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -5,50 +5,14 @@ title: Spark Configuration
Spark provides three main locations to configure the system:
-* [Environment variables](#environment-variables) for launching Spark workers, which can
- be set either in your driver program or in the `conf/spark-env.sh` script.
-* [Java system properties](#system-properties), which control internal configuration parameters and can be set either
- programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through the
- `SPARK_JAVA_OPTS` environment variable in `spark-env.sh`.
+* [Java system properties](#system-properties), which control internal configuration parameters and can be set
+ either programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through
+ JVM arguments.
+* [Environment variables](#environment-variables) for configuring per-machine settings such as the IP address,
+ which can be set in the `conf/spark-env.sh` script.
* [Logging configuration](#configuring-logging), which is done through `log4j.properties`.
-# Environment Variables
-
-Spark determines how to initialize the JVM on worker nodes, or even on the local node when you run `spark-shell`,
-by running the `conf/spark-env.sh` script in the directory where it is installed. This script does not exist by default
-in the Git repository, but but you can create it by copying `conf/spark-env.sh.template`. Make sure that you make
-the copy executable.
-
-Inside `spark-env.sh`, you *must* set at least the following two variables:
-
-* `SCALA_HOME`, to point to your Scala installation, or `SCALA_LIBRARY_PATH` to point to the directory for Scala
- library JARs (if you install Scala as a Debian or RPM package, there is no `SCALA_HOME`, but these libraries
- are in a separate path, typically /usr/share/java; look for `scala-library.jar`).
-* `MESOS_NATIVE_LIBRARY`, if you are [running on a Mesos cluster](running-on-mesos.html).
-
-In addition, there are four other variables that control execution. These should be set *in the environment that
-launches the job's driver program* instead of `spark-env.sh`, because they will be automatically propagated to
-workers. Setting these per-job instead of in `spark-env.sh` ensures that different jobs can have different settings
-for these variables.
-
-* `SPARK_JAVA_OPTS`, to add JVM options. This includes any system properties that you'd like to pass with `-D`.
-* `SPARK_CLASSPATH`, to add elements to Spark's classpath.
-* `SPARK_LIBRARY_PATH`, to add search directories for native libraries.
-* `SPARK_MEM`, to set the amount of memory used per node. This should be in the same format as the
- JVM's -Xmx option, e.g. `300m` or `1g`. Note that this option will soon be deprecated in favor of
- the `spark.executor.memory` system property, so we recommend using that in new code.
-
-Beware that if you do set these variables in `spark-env.sh`, they will override the values set by user programs,
-which is undesirable; if you prefer, you can choose to have `spark-env.sh` set them only if the user program
-hasn't, as follows:
-
-{% highlight bash %}
-if [ -z "$SPARK_JAVA_OPTS" ] ; then
- SPARK_JAVA_OPTS="-verbose:gc"
-fi
-{% endhighlight %}
-
# System Properties
To set a system property for configuring Spark, you need to either pass it with a -D flag to the JVM (for example `java -Dspark.cores.max=5 MyProgram`) or call `System.setProperty` in your code *before* creating your Spark context, as follows:
@@ -67,18 +31,18 @@ there are at least five properties that you will commonly want to control:
<td>spark.executor.memory</td>
<td>512m</td>
<td>
- Amount of memory to use per executor process, in the same format as JVM memory strings (e.g. `512m`, `2g`).
+ Amount of memory to use per executor process, in the same format as JVM memory strings (e.g. <code>512m</code>, <code>2g</code>).
</td>
</tr>
<tr>
<td>spark.serializer</td>
- <td>spark.JavaSerializer</td>
+ <td>org.apache.spark.serializer.<br />JavaSerializer</td>
<td>
Class to use for serializing objects that will be sent over the network or need to be cached
in serialized form. The default of Java serialization works with any Serializable Java object but is
- quite slow, so we recommend <a href="tuning.html">using <code>spark.KryoSerializer</code>
- and configuring Kryo serialization</a> when speed is necessary. Can be any subclass of
- <a href="api/core/index.html#spark.Serializer"><code>spark.Serializer</code></a>).
+ quite slow, so we recommend <a href="tuning.html">using <code>org.apache.spark.serializer.KryoSerializer</code>
+ and configuring Kryo serialization</a> when speed is necessary. Can be any subclass of
+ <a href="api/core/index.html#org.apache.spark.serializer.Serializer"><code>org.apache.spark.Serializer</code></a>.
</td>
</tr>
<tr>
@@ -86,8 +50,8 @@ there are at least five properties that you will commonly want to control:
<td>(none)</td>
<td>
If you use Kryo serialization, set this class to register your custom classes with Kryo.
- You need to set it to a class that extends
- <a href="api/core/index.html#spark.KryoRegistrator"><code>spark.KryoRegistrator</code></a>).
+ It should be set to a class that extends
+ <a href="api/core/index.html#org.apache.spark.serializer.KryoRegistrator"><code>KryoRegistrator</code></a>.
See the <a href="tuning.html#data-serialization">tuning guide</a> for more details.
</td>
</tr>
@@ -97,7 +61,7 @@ there are at least five properties that you will commonly want to control:
<td>
Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored
on disk. This should be on a fast, local disk in your system. It can also be a comma-separated
- list of multiple directories.
+ list of multiple directories on different disks.
</td>
</tr>
<tr>
@@ -106,7 +70,8 @@ there are at least five properties that you will commonly want to control:
<td>
When running on a <a href="spark-standalone.html">standalone deploy cluster</a> or a
<a href="running-on-mesos.html#mesos-run-modes">Mesos cluster in "coarse-grained"
- sharing mode</a>, how many CPU cores to request at most. The default will use all available cores.
+ sharing mode</a>, how many CPU cores to request at most. The default will use all available cores
+ offered by the cluster manager.
</td>
</tr>
</table>
@@ -146,7 +111,7 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td>spark.ui.port</td>
- <td>33000</td>
+ <td>3030</td>
<td>
Port for your application's dashboard, which shows memory and workload data
</td>
@@ -181,6 +146,21 @@ Apart from these, the following properties are also available, and may be useful
</td>
</tr>
<tr>
+ <td>spark.io.compression.codec</td>
+ <td>org.apache.spark.io.<br />SnappyCompressionCodec</td>
+ <td>
+ The compression codec class to use for various compressions. By default, Spark provides two
+ codecs: <code>org.apache.spark.io.LZFCompressionCodec</code> and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
+ </td>
+</tr>
+<tr>
+ <td>spark.io.compression.snappy.block.size</td>
+ <td>32768</td>
+ <td>
+ Block size (in bytes) used in Snappy compression, in the case when Snappy compression codec is used.
+ </td>
+</tr>
+<tr>
<td>spark.reducer.maxMbInFlight</td>
<td>48</td>
<td>
@@ -191,7 +171,7 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td>spark.closure.serializer</td>
- <td>spark.JavaSerializer</td>
+ <td>org.apache.spark.serializer.<br />JavaSerializer</td>
<td>
Serializer class to use for closures. Generally Java is fine unless your distributed functions
(e.g. map functions) reference large objects in the driver program.
@@ -218,7 +198,7 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td>spark.broadcast.factory</td>
- <td>spark.broadcast.HttpBroadcastFactory</td>
+ <td>org.apache.spark.broadcast.<br />HttpBroadcastFactory</td>
<td>
Which broadcast implementation to use.
</td>
@@ -228,8 +208,34 @@ Apart from these, the following properties are also available, and may be useful
<td>3000</td>
<td>
Number of milliseconds to wait to launch a data-local task before giving up and launching it
- in a non-data-local location. You should increase this if your tasks are long and you are seeing
- poor data locality, but the default generally works well.
+ on a less-local node. The same wait will be used to step through multiple locality levels
+ (process-local, node-local, rack-local and then any). It is also possible to customize the
+ waiting time for each level by setting <code>spark.locality.wait.node</code>, etc.
+ You should increase this setting if your tasks are long and see poor locality, but the
+ default usually works well.
+ </td>
+</tr>
+<tr>
+ <td>spark.locality.wait.process</td>
+ <td>spark.locality.wait</td>
+ <td>
+ Customize the locality wait for process locality. This affects tasks that attempt to access
+ cached data in a particular executor process.
+ </td>
+</tr>
+<tr>
+ <td>spark.locality.wait.node</td>
+ <td>spark.locality.wait</td>
+ <td>
+ Customize the locality wait for node locality. For example, you can set this to 0 to skip
+ node locality and search immediately for rack locality (if your cluster has rack information).
+ </td>
+</tr>
+<tr>
+ <td>spark.locality.wait.rack</td>
+ <td>spark.locality.wait</td>
+ <td>
+ Customize the locality wait for rack locality.
</td>
</tr>
<tr>
@@ -280,7 +286,7 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td>spark.cleaner.ttl</td>
- <td>(disable)</td>
+ <td>(infinite)</td>
<td>
Duration (seconds) of how long Spark will remember any metadata (stages generated, tasks generated, etc.).
Periodic cleanups will ensure that metadata older than this duration will be forgetten. This is
@@ -295,9 +301,43 @@ Apart from these, the following properties are also available, and may be useful
Duration (milliseconds) of how long to batch new objects coming from network receivers.
</td>
</tr>
+<tr>
+ <td>spark.task.maxFailures</td>
+ <td>4</td>
+ <td>
+ Number of individual task failures before giving up on the job.
+ Should be greater than or equal to 1. Number of allowed retries = this value - 1.
+ </td>
+</tr>
</table>
+# Environment Variables
+
+Certain Spark settings can also be configured through environment variables, which are read from the `conf/spark-env.sh`
+script in the directory where Spark is installed. These variables are meant to be for machine-specific settings, such
+as library search paths. While Java system properties can also be set here, for application settings, we recommend setting
+these properties within the application instead of in `spark-env.sh` so that different applications can use different
+settings.
+
+Note that `conf/spark-env.sh` does not exist by default when Spark is installed. However, you can copy
+`conf/spark-env.sh.template` to create it. Make sure you make the copy executable.
+
+The following variables can be set in `spark-env.sh`:
+
+* `SPARK_LOCAL_IP`, to configure which IP address of the machine to bind to.
+* `SPARK_LIBRARY_PATH`, to add search directories for native libraries.
+* `SPARK_CLASSPATH`, to add elements to Spark's classpath that you want to be present for _all_ applications.
+ Note that applications can also add dependencies for themselves through `SparkContext.addJar` -- we recommend
+ doing that when possible.
+* `SPARK_JAVA_OPTS`, to add JVM options. This includes Java options like garbage collector settings and any system
+ properties that you'd like to pass with `-D` (e.g., `-Dspark.local.dir=/disk1,/disk2`).
+* Options for the Spark [standalone cluster scripts](spark-standalone.html#cluster-launch-scripts), such as number of cores
+ to use on each machine and maximum memory.
+
+Since `spark-env.sh` is a shell script, some of these can be set programmatically -- for example, you might
+compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface.
+
# Configuring Logging
Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a `log4j.properties`
diff --git a/docs/css/bootstrap.min.css b/docs/css/bootstrap.min.css
index eb48138e08..3444ed79e2 100644
--- a/docs/css/bootstrap.min.css
+++ b/docs/css/bootstrap.min.css
@@ -6,4 +6,4 @@
* http://www.apache.org/licenses/LICENSE-2.0
*
* Designed and built with all the love in the world @twitter by @mdo and @fat.
- */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0098cc;background-color:#0098cc;background-image:-moz-linear-gradient(top,#0098cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0098cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0098cc,#0087b3);background-image:-o-linear-gradient(top,#0098cc,#0087b3);background-image:linear-gradient(to bottom,#0098cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0098cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0098cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0098cc,#0087b3);background-image:-moz-linear-gradient(top,#0098cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0098cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0098cc,#0087b3);background-image:-o-linear-gradient(top,#0098cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0098cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#e2f2e2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e2f2e2));background-image:-webkit-linear-gradient(top,#fff,#e2f2e2);background-image:-o-linear-gradient(top,#fff,#e2f2e2);background-image:linear-gradient(to bottom,#fff,#e2f2e2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffddff',endColorstr='#ffe2f2e2',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed}
+ */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0088cc;background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#d6f2e2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#d6f2e2));background-image:-webkit-linear-gradient(top,#fff,#d2e2f2);background-image:-o-linear-gradient(top,#fff,#d2e2f2);background-image:linear-gradient(to bottom,#fff,#d2e2f2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffd2e2f2',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed}
diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index eab8a0ff20..da0c06e2a6 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -4,10 +4,11 @@ title: Running Spark on EC2
---
The `spark-ec2` script, located in Spark's `ec2` directory, allows you
-to launch, manage and shut down Spark clusters on Amazon EC2. It automatically sets up Mesos, Spark and HDFS
-on the cluster for you.
-This guide describes how to use `spark-ec2` to launch clusters, how to run jobs on them, and how to shut them down.
-It assumes you've already signed up for an EC2 account on the [Amazon Web Services site](http://aws.amazon.com/).
+to launch, manage and shut down Spark clusters on Amazon EC2. It automatically
+sets up Spark, Shark and HDFS on the cluster for you. This guide describes
+how to use `spark-ec2` to launch clusters, how to run jobs on them, and how
+to shut them down. It assumes you've already signed up for an EC2 account
+on the [Amazon Web Services site](http://aws.amazon.com/).
`spark-ec2` is designed to manage multiple named clusters. You can
launch a new cluster (telling the script its size and giving it a name),
@@ -59,18 +60,22 @@ RAM). Refer to the Amazon pages about [EC2 instance
types](http://aws.amazon.com/ec2/instance-types) and [EC2
pricing](http://aws.amazon.com/ec2/#pricing) for information about other
instance types.
+- `--region=<EC2_REGION>` specifies an EC2 region in which to launch
+instances. The default region is `us-east-1`.
- `--zone=<EC2_ZONE>` can be used to specify an EC2 availability zone
to launch instances in. Sometimes, you will get an error because there
is not enough capacity in one zone, and you should try to launch in
-another. This happens mostly with the `m1.large` instance types;
-extra-large (both `m1.xlarge` and `c1.xlarge`) instances tend to be more
-available.
+another.
- `--ebs-vol-size=GB` will attach an EBS volume with a given amount
of space to each node so that you can have a persistent HDFS cluster
on your nodes across cluster restarts (see below).
- `--spot-price=PRICE` will launch the worker nodes as
[Spot Instances](http://aws.amazon.com/ec2/spot-instances/),
bidding for the given maximum price (in dollars).
+- `--spark-version=VERSION` will pre-load the cluster with the
+ specified version of Spark. VERSION can be a version number
+ (e.g. "0.7.3") or a specific git hash. By default, a recent
+ version will be used.
- If one of your launches fails due to e.g. not having the right
permissions on your private key file, you can run `launch` with the
`--resume` option to restart the setup process on an existing cluster.
@@ -99,9 +104,8 @@ permissions on your private key file, you can run `launch` with the
`spark-ec2` to attach a persistent EBS volume to each node for
storing the persistent HDFS.
- Finally, if you get errors while running your jobs, look at the slave's logs
- for that job inside of the Mesos work directory (/mnt/mesos-work). You can
- also view the status of the cluster using the Mesos web UI
- (`http://<master-hostname>:8080`).
+ for that job inside of the scheduler work directory (/root/spark/work). You can
+ also view the status of the cluster using the web UI: `http://<master-hostname>:8080`.
# Configuration
@@ -140,22 +144,14 @@ section.
# Limitations
-- `spark-ec2` currently only launches machines in the US-East region of EC2.
- It should not be hard to make it launch VMs in other zones, but you will need
- to create your own AMIs in them.
- Support for "cluster compute" nodes is limited -- there's no way to specify a
locality group. However, you can launch slave nodes in your
`<clusterName>-slaves` group manually and then use `spark-ec2 launch
--resume` to start a cluster with them.
-- Support for spot instances is limited.
If you have a patch or suggestion for one of these limitations, feel free to
[contribute](contributing-to-spark.html) it!
-# Using a Newer Spark Version
-
-The Spark EC2 machine images may not come with the latest version of Spark. To use a newer version, you can run `git pull` to pull in `/root/spark` to pull in the latest version of Spark from `git`, and build it using `sbt/sbt compile`. You will also need to copy it to all the other nodes in the cluster using `~/spark-ec2/copy-dir /root/spark`.
-
# Accessing Data in S3
Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md
new file mode 100644
index 0000000000..e5f054cb14
--- /dev/null
+++ b/docs/hardware-provisioning.md
@@ -0,0 +1,69 @@
+---
+layout: global
+title: Hardware Provisioning
+---
+
+A common question received by Spark developers is how to configure hardware for it. While the right
+hardware will depend on the situation, we make the following recommendations.
+
+# Storage Systems
+
+Because most Spark jobs will likely have to read input data from an external storage system (e.g.
+the Hadoop File System, or HBase), it is important to place it **as close to this system as
+possible**. We recommend the following:
+
+* If at all possible, run Spark on the same nodes as HDFS. The simplest way is to set up a Spark
+[standalone mode cluster](spark-standalone.html) on the same nodes, and configure Spark and
+Hadoop's memory and CPU usage to avoid interference (for Hadoop, the relevant options are
+`mapred.child.java.opts` for the per-task memory and `mapred.tasktracker.map.tasks.maximum`
+and `mapred.tasktracker.reduce.tasks.maximum` for number of tasks). Alternatively, you can run
+Hadoop and Spark on a common cluster manager like [Mesos](running-on-mesos.html) or
+[Hadoop YARN](running-on-yarn.html).
+
+* If this is not possible, run Spark on different nodes in the same local-area network as HDFS.
+
+* For low-latency data stores like HBase, it may be preferrable to run computing jobs on different
+nodes than the storage system to avoid interference.
+
+# Local Disks
+
+While Spark can perform a lot of its computation in memory, it still uses local disks to store
+data that doesn't fit in RAM, as well as to preserve intermediate output between stages. We
+recommend having **4-8 disks** per node, configured _without_ RAID (just as separate mount points).
+In Linux, mount the disks with the [`noatime` option](http://www.centos.org/docs/5/html/Global_File_System/s2-manage-mountnoatime.html)
+to reduce unnecessary writes. In Spark, [configure](configuration.html) the `spark.local.dir`
+variable to be a comma-separated list of the local disks. If you are running HDFS, it's fine to
+use the same disks as HDFS.
+
+# Memory
+
+In general, Spark can run well with anywhere from **8 GB to hundreds of gigabytes** of memory per
+machine. In all cases, we recommend allocating only at most 75% of the memory for Spark; leave the
+rest for the operating system and buffer cache.
+
+How much memory you will need will depend on your application. To determine how much your
+application uses for a certain dataset size, load part of your dataset in a Spark RDD and use the
+Storage tab of Spark's monitoring UI (`http://<driver-node>:3030`) to see its size in memory.
+Note that memory usage is greatly affected by storage level and serialization format -- see
+the [tuning guide](tuning.html) for tips on how to reduce it.
+
+Finally, note that the Java VM does not always behave well with more than 200 GB of RAM. If you
+purchase machines with more RAM than this, you can run _multiple worker JVMs per node_. In
+Spark's [standalone mode](spark-standalone.html), you can set the number of workers per node
+with the `SPARK_WORKER_INSTANCES` variable in `conf/spark-env.sh`, and the number of cores
+per worker with `SPARK_WORKER_CORES`.
+
+# Network
+
+In our experience, when the data is in memory, a lot of Spark applications are network-bound.
+Using a **10 Gigabit** or higher network is the best way to make these applications faster.
+This is especially true for "distributed reduce" applications such as group-bys, reduce-bys, and
+SQL joins. In any given application, you can see how much data Spark shuffles across the network
+from the application's monitoring UI (`http://<driver-node>:3030`).
+
+# CPU Cores
+
+Spark scales well to tens of CPU cores per machine because it performes minimal sharing between
+threads. You should likely provision at least **8-16 cores** per machine. Depending on the CPU
+cost of your workload, you may also need more: once data is in memory, most applications are
+either CPU- or network-bound.
diff --git a/docs/img/incubator-logo.png b/docs/img/incubator-logo.png
new file mode 100644
index 0000000000..33ca7f6227
--- /dev/null
+++ b/docs/img/incubator-logo.png
Binary files differ
diff --git a/docs/index.md b/docs/index.md
index 0c4add45dc..3cf9cc1c64 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -3,50 +3,52 @@ layout: global
title: Spark Overview
---
-Spark is a MapReduce-like cluster computing framework designed for low-latency iterative jobs and interactive use from an interpreter.
-It provides clean, language-integrated APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html), with a rich array of parallel operators.
+Apache Spark is a fast and general-purpose cluster computing system.
+It provides high-level APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html) that make parallel jobs easy to write, and an optimized engine that supports general computation graphs.
Spark can run on the Apache Mesos cluster manager, Hadoop YARN, Amazon EC2, or without an independent resource manager ("standalone mode").
# Downloading
-Get Spark by visiting the [downloads page](http://spark-project.org/downloads.html) of the Spark website. This documentation is for Spark version {{site.SPARK_VERSION}}.
+Get Spark by visiting the [downloads page](http://spark.incubator.apache.org/downloads.html) of the Apache Spark site. This documentation is for Spark version {{site.SPARK_VERSION}}.
# Building
-Spark requires [Scala {{site.SCALA_VERSION}}](http://www.scala-lang.org/). You will need to have Scala's `bin` directory in your `PATH`,
-or you will need to set the `SCALA_HOME` environment variable to point
-to where you've installed Scala. Scala must also be accessible through one
-of these methods on slave nodes on your cluster.
-
Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with it. To compile the code, go into the top-level Spark directory and run
- sbt/sbt package
+ sbt/sbt assembly
-Spark also supports building using Maven. If you would like to build using Maven, see the [instructions for building Spark with Maven](building-with-maven.html).
+For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/).
# Testing the Build
-Spark comes with a number of sample programs in the `examples` directory.
-To run one of the samples, use `./run <class> <params>` in the top-level Spark directory
-(the `run` script sets up the appropriate paths and launches that program).
-For example, `./run spark.examples.SparkPi` will run a sample program that estimates Pi. Each of the
-examples prints usage help if no params are given.
+Spark comes with several sample programs in the `examples` directory.
+To run one of the samples, use `./run-example <class> <params>` in the top-level Spark directory
+(the `run-example` script sets up the appropriate paths and launches that program).
+For example, try `./run-example org.apache.spark.examples.SparkPi local`.
+Each example prints usage help when run with no parameters.
Note that all of the sample programs take a `<master>` parameter specifying the cluster URL
to connect to. This can be a [URL for a distributed cluster](scala-programming-guide.html#master-urls),
or `local` to run locally with one thread, or `local[N]` to run locally with N threads. You should start by using
`local` for testing.
-Finally, Spark can be used interactively from a modified version of the Scala interpreter that you can start through
-`./spark-shell`. This is a great way to learn Spark.
+Finally, Spark can be used interactively through modified versions of the Scala shell (`./spark-shell`) or
+Python interpreter (`./pyspark`). These are a great way to learn Spark.
# A Note About Hadoop Versions
-Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
+Spark uses the Hadoop-client library to talk to HDFS and other Hadoop-supported
storage systems. Because the HDFS protocol has changed in different versions of
-Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `HADOOP_VERSION` variable at the top
-of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
+Hadoop, you must build Spark against the same version that your cluster uses.
+By default, Spark links to Hadoop 1.0.4. You can change this by setting the
+`SPARK_HADOOP_VERSION` variable when compiling:
+
+ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
+
+In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set
+`SPARK_YARN` to `true`:
+
+ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
# Where to Go from Here
@@ -54,15 +56,20 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
* [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
* [Spark Programming Guide](scala-programming-guide.html): an overview of Spark concepts, and details on the Scala API
-* [Java Programming Guide](java-programming-guide.html): using Spark from Java
-* [Python Programming Guide](python-programming-guide.html): using Spark from Python
-* [Spark Streaming Guide](streaming-programming-guide.html): using the alpha release of Spark Streaming
+ * [Java Programming Guide](java-programming-guide.html): using Spark from Java
+ * [Python Programming Guide](python-programming-guide.html): using Spark from Python
+* [Spark Streaming](streaming-programming-guide.html): using the alpha release of Spark Streaming
+* [MLlib (Machine Learning)](mllib-guide.html): Spark's built-in machine learning library
+* [Bagel (Pregel on Spark)](bagel-programming-guide.html): simple graph processing model
**API Docs:**
-* [Spark Java/Scala (Scaladoc)](api/core/index.html)
-* [Spark Python (Epydoc)](api/pyspark/index.html)
-* [Spark Streaming Java/Scala (Scaladoc)](api/streaming/index.html)
+* [Spark for Java/Scala (Scaladoc)](api/core/index.html)
+* [Spark for Python (Epydoc)](api/pyspark/index.html)
+* [Spark Streaming for Java/Scala (Scaladoc)](api/streaming/index.html)
+* [MLlib (Machine Learning) for Java/Scala (Scaladoc)](api/mllib/index.html)
+* [Bagel (Pregel on Spark) for Scala (Scaladoc)](api/bagel/index.html)
+
**Deployment guides:**
@@ -74,27 +81,27 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`).
**Other documents:**
-* [Building Spark With Maven](building-with-maven.html): Build Spark using the Maven build tool
* [Configuration](configuration.html): customize Spark via its configuration system
* [Tuning Guide](tuning.html): best practices to optimize performance and memory use
-* [Bagel](bagel-programming-guide.html): an implementation of Google's Pregel on Spark
+* [Hardware Provisioning](hardware-provisioning.html): recommendations for cluster hardware
+* [Building Spark with Maven](building-with-maven.html): Build Spark using the Maven build tool
* [Contributing to Spark](contributing-to-spark.html)
**External resources:**
-* [Spark Homepage](http://www.spark-project.org)
-* [Mailing List](http://groups.google.com/group/spark-users): ask questions about Spark here
-* [AMP Camp](http://ampcamp.berkeley.edu/): a two-day training camp at UC Berkeley that featured talks and exercises
- about Spark, Shark, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/agenda-2012),
+* [Spark Homepage](http://spark.incubator.apache.org)
+* [Mailing Lists](http://spark.incubator.apache.org/mailing-lists.html): ask questions about Spark here
+* [AMP Camps](http://ampcamp.berkeley.edu/): a series of training camps at UC Berkeley that featured talks and
+ exercises about Spark, Shark, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/agenda-2012),
[slides](http://ampcamp.berkeley.edu/agenda-2012) and [exercises](http://ampcamp.berkeley.edu/exercises-2012) are
available online for free.
-* [Code Examples](http://spark-project.org/examples.html): more are also available in the [examples subfolder](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/examples) of Spark
+* [Code Examples](http://spark.incubator.apache.org/examples.html): more are also available in the [examples subfolder](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/) of Spark
* [Paper Describing Spark](http://www.cs.berkeley.edu/~matei/papers/2012/nsdi_spark.pdf)
* [Paper Describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf)
# Community
-To get help using Spark or keep up with Spark development, sign up for the [spark-users mailing list](http://groups.google.com/group/spark-users).
+To get help using Spark or keep up with Spark development, sign up for the [user mailing list](http://spark.incubator.apache.org/mailing-lists.html).
If you're in the San Francisco Bay Area, there's a regular [Spark meetup](http://www.meetup.com/spark-users/) every few weeks. Come by to meet the developers and other users.
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
index ae8257b539..53085cc671 100644
--- a/docs/java-programming-guide.md
+++ b/docs/java-programming-guide.md
@@ -10,9 +10,9 @@ easy to follow even if you don't know Scala.
This guide will show how to use the Spark features described there in Java.
The Spark Java API is defined in the
-[`spark.api.java`](api/core/index.html#spark.api.java.package) package, and includes
-a [`JavaSparkContext`](api/core/index.html#spark.api.java.JavaSparkContext) for
-initializing Spark and [`JavaRDD`](api/core/index.html#spark.api.java.JavaRDD) classes,
+[`org.apache.spark.api.java`](api/core/index.html#org.apache.spark.api.java.package) package, and includes
+a [`JavaSparkContext`](api/core/index.html#org.apache.spark.api.java.JavaSparkContext) for
+initializing Spark and [`JavaRDD`](api/core/index.html#org.apache.spark.api.java.JavaRDD) classes,
which support the same methods as their Scala counterparts but take Java functions and return
Java data and collection types. The main differences have to do with passing functions to RDD
operations (e.g. map) and handling RDDs of different types, as discussed next.
@@ -23,12 +23,12 @@ There are a few key differences between the Java and Scala APIs:
* Java does not support anonymous or first-class functions, so functions must
be implemented by extending the
- [`spark.api.java.function.Function`](api/core/index.html#spark.api.java.function.Function),
- [`Function2`](api/core/index.html#spark.api.java.function.Function2), etc.
+ [`org.apache.spark.api.java.function.Function`](api/core/index.html#org.apache.spark.api.java.function.Function),
+ [`Function2`](api/core/index.html#org.apache.spark.api.java.function.Function2), etc.
classes.
* To maintain type safety, the Java API defines specialized Function and RDD
classes for key-value pairs and doubles. For example,
- [`JavaPairRDD`](api/core/index.html#spark.api.java.JavaPairRDD)
+ [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD)
stores key-value pairs.
* RDD methods like `collect()` and `countByKey()` return Java collections types,
such as `java.util.List` and `java.util.Map`.
@@ -44,8 +44,8 @@ In the Scala API, these methods are automatically added using Scala's
[implicit conversions](http://www.scala-lang.org/node/130) mechanism.
In the Java API, the extra methods are defined in the
-[`JavaPairRDD`](api/core/index.html#spark.api.java.JavaPairRDD)
-and [`JavaDoubleRDD`](api/core/index.html#spark.api.java.JavaDoubleRDD)
+[`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD)
+and [`JavaDoubleRDD`](api/core/index.html#org.apache.spark.api.java.JavaDoubleRDD)
classes. RDD methods like `map` are overloaded by specialized `PairFunction`
and `DoubleFunction` classes, allowing them to return RDDs of the appropriate
types. Common methods like `filter` and `sample` are implemented by
@@ -75,7 +75,7 @@ class has a single abstract method, `call()`, that must be implemented.
## Storage Levels
RDD [storage level](scala-programming-guide.html#rdd-persistence) constants, such as `MEMORY_AND_DISK`, are
-declared in the [spark.api.java.StorageLevels](api/core/index.html#spark.api.java.StorageLevels) class. To
+declared in the [org.apache.spark.api.java.StorageLevels](api/core/index.html#org.apache.spark.api.java.StorageLevels) class. To
define your own storage level, you can use StorageLevels.create(...).
@@ -92,8 +92,8 @@ The Java API supports other Spark features, including
As an example, we will implement word count using the Java API.
{% highlight java %}
-import spark.api.java.*;
-import spark.api.java.function.*;
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.*;
JavaSparkContext sc = new JavaSparkContext(...);
JavaRDD<String> lines = ctx.textFile("hdfs://...");
@@ -179,7 +179,7 @@ just a matter of style.
# Javadoc
We currently provide documentation for the Java API as Scaladoc, in the
-[`spark.api.java` package](api/core/index.html#spark.api.java.package), because
+[`org.apache.spark.api.java` package](api/core/index.html#org.apache.spark.api.java.package), because
some of the classes are implemented in Scala. The main downside is that the types and function
definitions show Scala syntax (for example, `def reduce(func: Function2[T, T]): T` instead of
`T reduce(Function2<T, T> func)`).
@@ -189,7 +189,10 @@ We hope to generate documentation with Java-style syntax in the future.
# Where to Go from Here
Spark includes several sample programs using the Java API in
-[`examples/src/main/java`](https://github.com/mesos/spark/tree/master/examples/src/main/java/spark/examples). You can run them by passing the class name to the
-`run` script included in Spark -- for example, `./run
-spark.examples.JavaWordCount`. Each example program prints usage help when run
+[`examples/src/main/java`](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/examples). You can run them by passing the class name to the
+`run-example` script included in Spark; for example:
+
+ ./run-example org.apache.spark.examples.JavaWordCount
+
+Each example program prints usage help when run
without any arguments.
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
new file mode 100644
index 0000000000..c897f8b36c
--- /dev/null
+++ b/docs/mllib-guide.md
@@ -0,0 +1,6 @@
+---
+layout: global
+title: Machine Learning Library (MLlib)
+---
+
+Coming soon.
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index e8aaac74d0..8a539fe774 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -10,14 +10,15 @@ To learn the basics of Spark, we recommend reading through the
easy to follow even if you don't know Scala.
This guide will show how to use the Spark features described there in Python.
+
# Key Differences in the Python API
There are a few key differences between the Python and Scala APIs:
* Python is dynamically typed, so RDDs can hold objects of different types.
* PySpark does not currently support the following Spark features:
- - Special functions on RDDs of doubles, such as `mean` and `stdev`
- - `lookup`, `sample` and `sort`
+ - `lookup`
+ - `sort`
- `persist` at storage levels other than `MEMORY_ONLY`
- Execution on Windows -- this is slated for a future release
@@ -50,6 +51,7 @@ PySpark will automatically ship these functions to workers, along with any objec
Instances of classes will be serialized and shipped to workers by PySpark, but classes themselves cannot be automatically distributed to workers.
The [Standalone Use](#standalone-use) section describes how to ship code dependencies to workers.
+
# Installing and Configuring PySpark
PySpark requires Python 2.6 or higher.
@@ -68,7 +70,7 @@ The script automatically adds the `pyspark` package to the `PYTHONPATH`.
The `pyspark` script launches a Python interpreter that is configured to run PySpark jobs. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
{% highlight bash %}
-$ sbt/sbt package
+$ sbt/sbt assembly
$ ./pyspark
{% endhighlight %}
@@ -81,16 +83,41 @@ The Python shell can be used explore data interactively and is a simple way to l
>>> help(pyspark) # Show all pyspark functions
{% endhighlight %}
-By default, the `pyspark` shell creates SparkContext that runs jobs locally.
-To connect to a non-local cluster, set the `MASTER` environment variable.
+By default, the `pyspark` shell creates SparkContext that runs jobs locally on a single core.
+To connect to a non-local cluster, or use multiple cores, set the `MASTER` environment variable.
For example, to use the `pyspark` shell with a [standalone Spark cluster](spark-standalone.html):
{% highlight bash %}
$ MASTER=spark://IP:PORT ./pyspark
{% endhighlight %}
+Or, to use four cores on the local machine:
+
+{% highlight bash %}
+$ MASTER=local[4] ./pyspark
+{% endhighlight %}
+
+
+## IPython
+
+It is also possible to launch PySpark in [IPython](http://ipython.org), the enhanced Python interpreter.
+To do this, simply set the `IPYTHON` variable to `1` when running `pyspark`:
+
+{% highlight bash %}
+$ IPYTHON=1 ./pyspark
+{% endhighlight %}
+
+Alternatively, you can customize the `ipython` command by setting `IPYTHON_OPTS`. For example, to launch
+the [IPython Notebook](http://ipython.org/notebook.html) with PyLab graphing support:
+
+{% highlight bash %}
+$ IPYTHON_OPTS="notebook --pylab inline" ./pyspark
+{% endhighlight %}
+
+IPython also works on a cluster or on multiple cores if you set the `MASTER` environment variable.
+
-# Standalone Use
+# Standalone Programs
PySpark can also be used from standalone Python scripts by creating a SparkContext in your script and running the script using `pyspark`.
The Quick Start guide includes a [complete example](quick-start.html#a-standalone-job-in-python) of a standalone Python job.
@@ -105,10 +132,14 @@ sc = SparkContext("local", "Job Name", pyFiles=['MyFile.py', 'lib.zip', 'app.egg
Files listed here will be added to the `PYTHONPATH` and shipped to remote worker machines.
Code dependencies can be added to an existing SparkContext using its `addPyFile()` method.
+
# Where to Go from Here
-PySpark includes several sample programs in the [`python/examples` folder](https://github.com/mesos/spark/tree/master/python/examples).
-You can run them by passing the files to the `pyspark` script -- for example `./pyspark python/examples/wordcount.py`.
+PySpark includes several sample programs in the [`python/examples` folder](https://github.com/apache/incubator-spark/tree/master/python/examples).
+You can run them by passing the files to the `pyspark` script; e.g.:
+
+ ./pyspark python/examples/wordcount.py
+
Each program prints usage help when run without arguments.
We currently provide [API documentation](api/pyspark/index.html) for the Python API as Epydoc.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 335643536a..70c3df8095 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -12,7 +12,7 @@ See the [programming guide](scala-programming-guide.html) for a more complete re
To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run:
{% highlight bash %}
-$ sbt/sbt package
+$ sbt/sbt assembly
{% endhighlight %}
# Interactive Analysis with the Spark Shell
@@ -53,7 +53,7 @@ scala> textFile.filter(line => line.contains("Spark")).count() // How many lines
res3: Long = 15
{% endhighlight %}
-## More On RDD Operations
+## More on RDD Operations
RDD actions and transformations can be used for more complex computations. Let's say we want to find the line with the most words:
{% highlight scala %}
@@ -108,8 +108,8 @@ We'll create a very simple Spark job in Scala. So simple, in fact, that it's nam
{% highlight scala %}
/*** SimpleJob.scala ***/
-import spark.SparkContext
-import SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
object SimpleJob {
def main(args: Array[String]) {
@@ -126,7 +126,7 @@ object SimpleJob {
This job simply counts the number of lines containing 'a' and the number containing 'b' in the Spark README. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. Unlike the earlier examples with the Spark shell, which initializes its own SparkContext, we initialize a SparkContext as part of the job. We pass the SparkContext constructor four arguments, the type of scheduler we want to use (in this case, a local scheduler), a name for the job, the directory where Spark is installed, and a name for the jar file containing the job's sources. The final two arguments are needed in a distributed setting, where Spark is running across several nodes, so we include them for completeness. Spark will automatically ship the jar files you list to slave nodes.
-This file depends on the Spark API, so we'll also include an sbt configuration file, `simple.sbt` which explains that Spark is a dependency. This file also adds two repositories which host Spark dependencies:
+This file depends on the Spark API, so we'll also include an sbt configuration file, `simple.sbt` which explains that Spark is a dependency. This file also adds a repository that Spark depends on:
{% highlight scala %}
name := "Simple Project"
@@ -135,14 +135,18 @@ version := "1.0"
scalaVersion := "{{site.SCALA_VERSION}}"
-libraryDependencies += "org.spark-project" %% "spark-core" % "{{site.SPARK_VERSION}}"
+libraryDependencies += "org.apache.spark" %% "spark-core" % "{{site.SPARK_VERSION}}"
-resolvers ++= Seq(
- "Akka Repository" at "http://repo.akka.io/releases/",
- "Spray Repository" at "http://repo.spray.cc/")
+resolvers += "Akka Repository" at "http://repo.akka.io/releases/"
{% endhighlight %}
-Of course, for sbt to work correctly, we'll need to layout `SimpleJob.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the job's code, then use `sbt run` to execute our example job.
+If you also wish to read data from Hadoop's HDFS, you will also need to add a dependency on `hadoop-client` for your version of HDFS:
+
+{% highlight scala %}
+libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "<your-hdfs-version>"
+{% endhighlight %}
+
+Finally, for sbt to work correctly, we'll need to layout `SimpleJob.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the job's code, then use `sbt run` to execute our example job.
{% highlight bash %}
$ find .
@@ -159,8 +163,6 @@ $ sbt run
Lines with a: 46, Lines with b: 23
{% endhighlight %}
-This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS.
-
# A Standalone Job In Java
Now say we wanted to write a standalone job using the Java API. We will walk through doing this with Maven. If you are using other build systems, consider using the Spark assembly JAR described in the developer guide.
@@ -168,8 +170,8 @@ We'll create a very simple Spark job, `SimpleJob.java`:
{% highlight java %}
/*** SimpleJob.java ***/
-import spark.api.java.*;
-import spark.api.java.function.Function;
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
public class SimpleJob {
public static void main(String[] args) {
@@ -205,17 +207,13 @@ To build the job, we also write a Maven `pom.xml` file that lists Spark as a dep
<version>1.0</version>
<repositories>
<repository>
- <id>Spray.cc repository</id>
- <url>http://repo.spray.cc</url>
- </repository>
- <repository>
<id>Akka repository</id>
<url>http://repo.akka.io/releases</url>
</repository>
</repositories>
<dependencies>
<dependency> <!-- Spark dependency -->
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-core_{{site.SCALA_VERSION}}</artifactId>
<version>{{site.SPARK_VERSION}}</version>
</dependency>
@@ -223,6 +221,16 @@ To build the job, we also write a Maven `pom.xml` file that lists Spark as a dep
</project>
{% endhighlight %}
+If you also wish to read data from Hadoop's HDFS, you will also need to add a dependency on `hadoop-client` for your version of HDFS:
+
+{% highlight xml %}
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>...</version>
+ </dependency>
+{% endhighlight %}
+
We lay out these files according to the canonical Maven directory structure:
{% highlight bash %}
$ find .
@@ -242,8 +250,6 @@ $ mvn exec:java -Dexec.mainClass="SimpleJob"
Lines with a: 46, Lines with b: 23
{% endhighlight %}
-This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS.
-
# A Standalone Job In Python
Now we will show how to write a standalone job using the Python API (PySpark).
@@ -280,4 +286,33 @@ $ ./pyspark SimpleJob.py
Lines with a: 46, Lines with b: 23
{% endhighlight python %}
-This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS.
+# Running Jobs on a Cluster
+
+There are a few additional considerations when running jobs on a
+[Spark](spark-standalone.html), [YARN](running-on-yarn.html), or
+[Mesos](running-on-mesos.html) cluster.
+
+### Including Your Dependencies
+If your code depends on other projects, you will need to ensure they are also
+present on the slave nodes. A popular approach is to create an
+assembly jar (or "uber" jar) containing your code and its dependencies. Both
+[sbt](https://github.com/sbt/sbt-assembly) and
+[Maven](http://maven.apache.org/plugins/maven-assembly-plugin/)
+have assembly plugins. When creating assembly jars, list Spark
+itself as a `provided` dependency; it need not be bundled since it is
+already present on the slaves. Once you have an assembled jar,
+add it to the SparkContext as shown here. It is also possible to submit
+your dependent jars one-by-one when creating a SparkContext.
+
+### Setting Configuration Options
+Spark includes several configuration options which influence the behavior
+of your job. These should be set as
+[JVM system properties](configuration.html#system-properties) in your
+program. The options will be captured and shipped to all slave nodes.
+
+### Accessing Hadoop Filesystems
+
+The examples here access a local file. To read data from a distributed
+filesystem, such as HDFS, include
+[Hadoop version information](index.html#a-note-about-hadoop-versions)
+in your build file. By default, Spark builds against HDFS 1.0.4.
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index f4a3eb667c..b31f78e8bf 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -9,9 +9,8 @@ Spark can run on private clusters managed by the [Apache Mesos](http://incubator
2. Download Mesos {{site.MESOS_VERSION}} from a [mirror](http://www.apache.org/dyn/closer.cgi/incubator/mesos/mesos-{{site.MESOS_VERSION}}/).
3. Configure Mesos using the `configure` script, passing the location of your `JAVA_HOME` using `--with-java-home`. Mesos comes with "template" configure scripts for different platforms, such as `configure.macosx`, that you can run. See the README file in Mesos for other options. **Note:** If you want to run Mesos without installing it into the default paths on your system (e.g. if you don't have administrative privileges to install it), you should also pass the `--prefix` option to `configure` to tell it where to install. For example, pass `--prefix=/home/user/mesos`. By default the prefix is `/usr/local`.
4. Build Mesos using `make`, and then install it using `make install`.
-5. Create a file called `spark-env.sh` in Spark's `conf` directory, by copying `conf/spark-env.sh.template`, and add the following lines in it:
+5. Create a file called `spark-env.sh` in Spark's `conf` directory, by copying `conf/spark-env.sh.template`, and add the following lines it:
* `export MESOS_NATIVE_LIBRARY=<path to libmesos.so>`. This path is usually `<prefix>/lib/libmesos.so` (where the prefix is `/usr/local` by default). Also, on Mac OS X, the library is called `libmesos.dylib` instead of `.so`.
- * `export SCALA_HOME=<path to Scala directory>`.
6. Copy Spark and Mesos to the _same_ paths on all the nodes in the cluster (or, for Mesos, `make install` on every node).
7. Configure Mesos for deployment:
* On your master node, edit `<prefix>/var/mesos/deploy/masters` to list your master and `<prefix>/var/mesos/deploy/slaves` to list the slaves, where `<prefix>` is the prefix where you installed Mesos (`/usr/local` by default).
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 66fb8d73e8..fe5334ffdc 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -3,50 +3,33 @@ layout: global
title: Launching Spark on YARN
---
-Experimental support for running over a [YARN (Hadoop
+Support for running on [YARN (Hadoop
NextGen)](http://hadoop.apache.org/docs/r2.0.2-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html)
-cluster was added to Spark in version 0.6.0. This was merged into master as part of 0.7 effort.
-To build spark core with YARN support, please use the hadoop2-yarn profile.
-Ex: mvn -Phadoop2-yarn clean install
+was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0.
-# Building spark core consolidated jar.
+# Building a YARN-Enabled Assembly JAR
-We need a consolidated spark core jar (which bundles all the required dependencies) to run Spark jobs on a yarn cluster.
-This can be built either through sbt or via maven.
+We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster.
+This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows:
-- Building spark assembled jar via sbt.
- It is a manual process of enabling it in project/SparkBuild.scala.
-Please comment out the
- HADOOP_VERSION, HADOOP_MAJOR_VERSION and HADOOP_YARN
-variables before the line 'For Hadoop 2 YARN support'
-Next, uncomment the subsequent 3 variable declaration lines (for these three variables) which enable hadoop yarn support.
+ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly
-Assembly of the jar Ex:
+The assembled JAR will be something like this:
+`./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
- ./sbt/sbt clean assembly
-
-The assembled jar would typically be something like :
-`./core/target/spark-core-assembly-0.8.0-SNAPSHOT.jar`
-
-
-- Building spark assembled jar via Maven.
- Use the hadoop2-yarn profile and execute the package target.
-
-Something like this. Ex:
-
- mvn -Phadoop2-yarn clean package -DskipTests=true
+# Preparations
-This will build the shaded (consolidated) jar. Typically something like :
-`./repl-bin/target/spark-repl-bin-<VERSION>-shaded-hadoop2-yarn.jar`
+- Building a YARN-enabled assembly (see above).
+- Your application code must be packaged into a separate JAR file.
+If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
-# Preparations
+# Configuration
-- Building spark core assembled jar (see above).
-- Your application code must be packaged into a separate JAR file.
+Most of the configs are the same for Spark on YARN as other deploys. See the Configuration page for more information on those. These are configs that are specific to SPARK on YARN.
-If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt package`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
+* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV="JAVA_HOME=/jdk64,FOO=bar"`.
# Launching Spark on YARN
@@ -55,7 +38,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t
The command to launch the YARN Client is as follows:
- SPARK_JAR=<SPARK_YAR_FILE> ./run spark.deploy.yarn.Client \
+ SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class spark.deploy.yarn.Client \
--jar <YOUR_APP_JAR_FILE> \
--class <APP_MAIN_CLASS> \
--args <APP_MAIN_ARGUMENTS> \
@@ -63,12 +46,11 @@ The command to launch the YARN Client is as follows:
--master-memory <MEMORY_FOR_MASTER> \
--worker-memory <MEMORY_PER_WORKER> \
--worker-cores <CORES_PER_WORKER> \
- --user <hadoop_user> \
--queue <queue_name>
For example:
- SPARK_JAR=./core/target/spark-core-assembly-{{site.SPARK_VERSION}}.jar ./run spark.deploy.yarn.Client \
+ SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class spark.deploy.yarn.Client \
--jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \
--class spark.examples.SparkPi \
--args yarn-standalone \
@@ -83,5 +65,4 @@ The above starts a YARN Client programs which periodically polls the Application
- When your application instantiates a Spark context it must use a special "yarn-standalone" master url. This starts the scheduler without forcing it to connect to a cluster. A good way to handle this is to pass "yarn-standalone" as an argument to your program, as shown in the example above.
- We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed.
-- Currently, we have not yet integrated with hadoop security. If --user is present, the hadoop_user specified will be used to run the tasks on the cluster. If unspecified, current user will be used (which should be valid in cluster).
- Once hadoop security support is added, and if hadoop cluster is enabled with security, additional restrictions would apply via delegation tokens passed.
+- The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
index e9cf9ef36f..f7768e55fc 100644
--- a/docs/scala-programming-guide.md
+++ b/docs/scala-programming-guide.md
@@ -17,19 +17,27 @@ This guide shows each of these features and walks through some samples. It assum
# Linking with Spark
-To write a Spark application, you will need to add both Spark and its dependencies to your CLASSPATH. If you use sbt or Maven, Spark is available through Maven Central at:
+Spark {{site.SPARK_VERSION}} uses Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you'll need to use this same version of Scala in your program -- newer major versions may not work.
- groupId = org.spark-project
+To write a Spark application, you need to add a dependency on Spark. If you use SBT or Maven, Spark is available through Maven Central at:
+
+ groupId = org.apache.spark
artifactId = spark-core_{{site.SCALA_VERSION}}
version = {{site.SPARK_VERSION}}
-For other build systems or environments, you can run `sbt/sbt assembly` to build both Spark and its dependencies into one JAR (`core/target/spark-core-assembly-0.6.0.jar`), then add this to your CLASSPATH.
+In addition, if you wish to access an HDFS cluster, you need to add a dependency on `hadoop-client` for your version of HDFS:
+
+ groupId = org.apache.hadoop
+ artifactId = hadoop-client
+ version = <your-hdfs-version>
+
+For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions).
-In addition, you'll need to import some Spark classes and implicit conversions. Add the following lines at the top of your program:
+Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines:
{% highlight scala %}
-import spark.SparkContext
-import SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
{% endhighlight %}
# Initializing Spark
@@ -134,7 +142,7 @@ All transformations in Spark are <i>lazy</i>, in that they do not compute their
By default, each transformed RDD is recomputed each time you run an action on it. However, you may also *persist* an RDD in memory using the `persist` (or `cache`) method, in which case Spark will keep the elements around on the cluster for much faster access the next time you query it. There is also support for persisting datasets on disk, or replicated across the cluster. The next section in this document describes these options.
-The following tables list the transformations and actions currently supported (see also the [RDD API doc](api/core/index.html#spark.RDD) for details):
+The following tables list the transformations and actions currently supported (see also the [RDD API doc](api/core/index.html#org.apache.spark.RDD) for details):
### Transformations
@@ -203,7 +211,7 @@ The following tables list the transformations and actions currently supported (s
</tr>
</table>
-A complete list of transformations is available in the [RDD API doc](api/core/index.html#spark.RDD).
+A complete list of transformations is available in the [RDD API doc](api/core/index.html#org.apache.spark.RDD).
### Actions
@@ -251,7 +259,7 @@ A complete list of transformations is available in the [RDD API doc](api/core/in
</tr>
</table>
-A complete list of actions is available in the [RDD API doc](api/core/index.html#spark.RDD).
+A complete list of actions is available in the [RDD API doc](api/core/index.html#org.apache.spark.RDD).
## RDD Persistence
@@ -259,7 +267,7 @@ One of the most important capabilities in Spark is *persisting* (or *caching*) a
You can mark an RDD to be persisted using the `persist()` or `cache()` methods on it. The first time it is computed in an action, it will be kept in memory on the nodes. The cache is fault-tolerant -- if any partition of an RDD is lost, it will automatically be recomputed using the transformations that originally created it.
-In addition, each RDD can be stored using a different *storage level*, allowing you, for example, to persist the dataset on disk, or persist it in memory but as serialized Java objects (to save space), or even replicate it across nodes. These levels are chosen by passing a [`spark.storage.StorageLevel`](api/core/index.html#spark.storage.StorageLevel) object to `persist()`. The `cache()` method is a shorthand for using the default storage level, which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The complete set of available storage levels is:
+In addition, each RDD can be stored using a different *storage level*, allowing you, for example, to persist the dataset on disk, or persist it in memory but as serialized Java objects (to save space), or even replicate it across nodes. These levels are chosen by passing a [`org.apache.spark.storage.StorageLevel`](api/core/index.html#org.apache.spark.storage.StorageLevel) object to `persist()`. The `cache()` method is a shorthand for using the default storage level, which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The complete set of available storage levels is:
<table class="table">
<tr><th style="width:23%">Storage Level</th><th>Meaning</th></tr>
@@ -310,7 +318,7 @@ We recommend going through the following process to select one:
application). *All* the storage levels provide full fault tolerance by recomputing lost data, but the replicated ones
let you continue running tasks on the RDD without waiting to recompute a lost partition.
-If you want to define your own storage level (say, with replication factor of 3 instead of 2), then use the function factor method `apply()` of the [`StorageLevel`](api/core/index.html#spark.storage.StorageLevel$) singleton object.
+If you want to define your own storage level (say, with replication factor of 3 instead of 2), then use the function factor method `apply()` of the [`StorageLevel`](api/core/index.html#org.apache.spark.storage.StorageLevel$) singleton object.
# Shared Variables
@@ -356,7 +364,11 @@ res2: Int = 10
# Where to Go from Here
You can see some [example Spark programs](http://www.spark-project.org/examples.html) on the Spark website.
-In addition, Spark includes several sample programs in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `run` script included in Spark -- for example, `./run spark.examples.SparkPi`. Each example program prints usage help when run without any arguments.
+In addition, Spark includes several samples in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `run-example` script included in Spark; for example:
+
+ ./run-example org.apache.spark.examples.SparkPi
+
+Each example program prints usage help when run without any arguments.
For help on optimizing your program, the [configuration](configuration.html) and
[tuning](tuning.html) guides provide information on best practices. They are especially important for
diff --git a/docs/spark-debugger.md b/docs/spark-debugger.md
index f6f0988858..d6315d97f4 100644
--- a/docs/spark-debugger.md
+++ b/docs/spark-debugger.md
@@ -2,7 +2,7 @@
layout: global
title: The Spark Debugger
---
-**Summary:** The Spark debugger provides replay debugging for deterministic (logic) errors in Spark programs. It's currently in development, but you can try it out in the [arthur branch](https://github.com/mesos/spark/tree/arthur).
+**Summary:** The Spark debugger provides replay debugging for deterministic (logic) errors in Spark programs. It's currently in development, but you can try it out in the [arthur branch](https://github.com/apache/incubator-spark/tree/arthur).
## Introduction
@@ -19,7 +19,7 @@ For deterministic errors, debugging a Spark program is now as easy as debugging
## Approach
-As your Spark program runs, the slaves report key events back to the master -- for example, RDD creations, RDD contents, and uncaught exceptions. (A full list of event types is in [EventLogging.scala](https://github.com/mesos/spark/blob/arthur/core/src/main/scala/spark/EventLogging.scala).) The master logs those events, and you can load the event log into the debugger after your program is done running.
+As your Spark program runs, the slaves report key events back to the master -- for example, RDD creations, RDD contents, and uncaught exceptions. (A full list of event types is in [EventLogging.scala](https://github.com/apache/incubator-spark/blob/arthur/core/src/main/scala/spark/EventLogging.scala).) The master logs those events, and you can load the event log into the debugger after your program is done running.
_A note on nondeterminism:_ For fault recovery, Spark requires RDD transformations (for example, the function passed to `RDD.map`) to be deterministic. The Spark debugger also relies on this property, and it can also warn you if your transformation is nondeterministic. This works by checksumming the contents of each RDD and comparing the checksums from the original execution to the checksums after recomputing the RDD in the debugger.
diff --git a/docs/spark-simple-tutorial.md b/docs/spark-simple-tutorial.md
deleted file mode 100644
index 9875de62bd..0000000000
--- a/docs/spark-simple-tutorial.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: Tutorial - Running a Simple Spark Application
----
-
-1. Create directory for spark demo:
-
- ~$ mkdir SparkTest
-
-2. Copy the sbt files in ~/spark/sbt directory:
-
- ~/SparkTest$ cp -r ../spark/sbt .
-
-3. Edit the ~/SparkTest/sbt/sbt file to look like this:
-
- #!/bin/bash
- java -Xmx800M -XX:MaxPermSize=150m -jar $(dirname $0)/sbt-launch-*.jar "$@"
-
-4. To build a Spark application, you need Spark and its dependencies in a single Java archive (JAR) file. Create this JAR in Spark's main directory with sbt as:
-
- ~/spark$ sbt/sbt assembly
-
-5. create a source file in ~/SparkTest/src/main/scala directory:
-
- ~/SparkTest/src/main/scala$ vi Test1.scala
-
-6. Make the contain of the Test1.scala file like this:
-
- import spark.SparkContext
- import spark.SparkContext._
- object Test1 {
- def main(args: Array[String]) {
- val sc = new SparkContext("local", "SparkTest")
- println(sc.parallelize(1 to 10).reduce(_ + _))
- System.exit(0)
- }
- }
-
-7. Run the Test1.scala file:
-
- ~/SparkTest$ sbt/sbt run
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 3986c0c79d..9ab6ba0830 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -3,32 +3,21 @@ layout: global
title: Spark Standalone Mode
---
-{% comment %}
-TODO(andyk):
- - Add a table of contents
- - Move configuration towards the end so that it doesn't come first
- - Say the scripts will guess the resource amounts (i.e. # cores) automatically
-{% endcomment %}
-
-In addition to running on top of [Mesos](https://github.com/mesos/mesos), Spark also supports a standalone mode, consisting of one Spark master and several Spark worker processes. You can run the Spark standalone mode either locally (for testing) or on a cluster. If you wish to run on a cluster, we have provided [a set of deploy scripts](#cluster-launch-scripts) to launch a whole cluster.
-
-# Getting Started
-
-Compile Spark with `sbt package` as described in the [Getting Started Guide](index.html). You do not need to install Mesos on your machine if you are using the standalone mode.
+In addition to running on the Mesos or YARN cluster managers, Spark also provides a simple standalone deploy mode. You can launch a standalone cluster either manually, by starting a master and workers by hand, or use our provided [deploy scripts](#cluster-launch-scripts). It is also possible to run these daemons on a single machine for testing.
# Starting a Cluster Manually
You can start a standalone master server by executing:
- ./run spark.deploy.master.Master
+ ./spark-class spark.deploy.master.Master
-Once started, the master will print out a `spark://IP:PORT` URL for itself, which you can use to connect workers to it,
-or pass as the "master" argument to `SparkContext` to connect a job to the cluster. You can also find this URL on
+Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it,
+or pass as the "master" argument to `SparkContext`. You can also find this URL on
the master's web UI, which is [http://localhost:8080](http://localhost:8080) by default.
Similarly, you can start one or more workers and connect them to the master via:
- ./run spark.deploy.worker.Worker spark://IP:PORT
+ ./spark-class spark.deploy.worker.Worker spark://IP:PORT
Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default).
You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS).
@@ -43,7 +32,7 @@ Finally, the following configuration options can be passed to the master and wor
</tr>
<tr>
<td><code>-p PORT</code>, <code>--port PORT</code></td>
- <td>IP address or DNS name to listen on (default: 7077 for master, random for worker)</td>
+ <td>Port for service to listen on (default: 7077 for master, random for worker)</td>
</tr>
<tr>
<td><code>--webui-port PORT</code></td>
@@ -68,7 +57,7 @@ Finally, the following configuration options can be passed to the master and wor
To launch a Spark standalone cluster with the deploy scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file.
-Once you've set up this fine, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
+Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
- `bin/start-master.sh` - Starts a master instance on the machine the script is executed on.
- `bin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
@@ -85,47 +74,56 @@ You can optionally configure the cluster further by setting environment variable
<tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
<tr>
<td><code>SPARK_MASTER_IP</code></td>
- <td>Bind the master to a specific IP address, for example a public one</td>
+ <td>Bind the master to a specific IP address, for example a public one.</td>
</tr>
<tr>
<td><code>SPARK_MASTER_PORT</code></td>
- <td>Start the master on a different port (default: 7077)</td>
+ <td>Start the master on a different port (default: 7077).</td>
</tr>
<tr>
<td><code>SPARK_MASTER_WEBUI_PORT</code></td>
- <td>Port for the master web UI (default: 8080)</td>
+ <td>Port for the master web UI (default: 8080).</td>
</tr>
<tr>
<td><code>SPARK_WORKER_PORT</code></td>
- <td>Start the Spark worker on a specific port (default: random)</td>
+ <td>Start the Spark worker on a specific port (default: random).</td>
</tr>
<tr>
<td><code>SPARK_WORKER_DIR</code></td>
- <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work)</td>
+ <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work).</td>
</tr>
<tr>
<td><code>SPARK_WORKER_CORES</code></td>
- <td>Total number of cores to allow Spark jobs to use on the machine (default: all available cores)</td>
+ <td>Total number of cores to allow Spark jobs to use on the machine (default: all available cores).</td>
</tr>
<tr>
<td><code>SPARK_WORKER_MEMORY</code></td>
- <td>Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's <i>individual</i> memory is configured using <code>SPARK_MEM</code></td>
+ <td>Total amount of memory to allow Spark jobs to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GB); note that each job's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.</td>
</tr>
<tr>
<td><code>SPARK_WORKER_WEBUI_PORT</code></td>
- <td>Port for the worker web UI (default: 8081)</td>
+ <td>Port for the worker web UI (default: 8081).</td>
+ </tr>
+ <tr>
+ <td><code>SPARK_WORKER_INSTANCES</code></td>
+ <td>
+ Number of worker instances to run on each machine (default: 1). You can make this more than 1 if
+ you have have very large machines and would like multiple Spark worker processes. If you do set
+ this, make sure to also set <code>SPARK_WORKER_CORES</code> explicitly to limit the cores per worker,
+ or else each worker will try to use all the cores.
+ </td>
</tr>
<tr>
<td><code>SPARK_DAEMON_MEMORY</code></td>
- <td>Memory to allocate to the Spark master and worker daemons themselves (default: 512m)</td>
+ <td>Memory to allocate to the Spark master and worker daemons themselves (default: 512m).</td>
</tr>
<tr>
<td><code>SPARK_DAEMON_JAVA_OPTS</code></td>
- <td>JVM options for the Spark master and worker daemons themselves (default: none)</td>
+ <td>JVM options for the Spark master and worker daemons themselves (default: none).</td>
</tr>
</table>
-
+**Note:** The launch scripts do not currently support Windows. To run a Spark cluster on Windows, start the master and workers by hand.
# Connecting a Job to the Cluster
@@ -155,5 +153,5 @@ In addition, detailed log output for each job is also written to the work direct
# Running Alongside Hadoop
-You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
+You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index 5476c00d02..4e27d6559c 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -1,23 +1,53 @@
---
layout: global
-title: Tutorial - Spark Streaming, Plugging in a custom receiver.
+title: Spark Streaming Custom Receivers
---
A "Spark Streaming" receiver can be a simple network stream, streams of messages from a message queue, files etc. A receiver can also assume roles more than just receiving data like filtering, preprocessing, to name a few of the possibilities. The api to plug-in any user defined custom receiver is thus provided to encourage development of receivers which may be well suited to ones specific need.
This guide shows the programming model and features by walking through a simple sample receiver and corresponding Spark Streaming application.
+### Writing a Simple Receiver
-## A quick and naive walk-through
+This starts with implementing [NetworkReceiver](api/streaming/index.html#org.apache.spark.streaming.dstream.NetworkReceiver).
-### Write a simple receiver
+The following is a simple socket text-stream receiver.
+
+{% highlight scala %}
+ class SocketTextStreamReceiver(host: String, port: Int(
+ extends NetworkReceiver[String]
+ {
+ protected lazy val blocksGenerator: BlockGenerator =
+ new BlockGenerator(StorageLevel.MEMORY_ONLY_SER_2)
+
+ protected def onStart() = {
+ blocksGenerator.start()
+ val socket = new Socket(host, port)
+ val dataInputStream = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8"))
+ var data: String = dataInputStream.readLine()
+ while (data != null) {
+ blocksGenerator += data
+ data = dataInputStream.readLine()
+ }
+ }
+
+ protected def onStop() {
+ blocksGenerator.stop()
+ }
+ }
+{% endhighlight %}
+
+
+All we did here is extended NetworkReceiver and called blockGenerator's API method (i.e. +=) to push our blocks of data. Please refer to scala-docs of NetworkReceiver for more details.
+
+
+### An Actor as Receiver
This starts with implementing [Actor](#References)
Following is a simple socket text-stream receiver, which is appearently overly simplified using Akka's socket.io api.
{% highlight scala %}
-
class SocketTextStreamReceiver (host:String,
port:Int,
bytesToString: ByteString => String) extends Actor with Receiver {
@@ -29,43 +59,41 @@ Following is a simple socket text-stream receiver, which is appearently overly s
}
}
-
-
{% endhighlight %}
All we did here is mixed in trait Receiver and called pushBlock api method to push our blocks of data. Please refer to scala-docs of Receiver for more details.
-### A sample spark application
+### A Sample Spark Application
* First create a Spark streaming context with master url and batchduration.
{% highlight scala %}
-
val ssc = new StreamingContext(master, "WordCountCustomStreamSource",
Seconds(batchDuration))
-
{% endhighlight %}
-* Plug-in the actor configuration into the spark streaming context and create a DStream.
+* Plug-in the custom receiver into the spark streaming context and create a DStream.
{% highlight scala %}
+ val lines = ssc.networkStream[String](new SocketTextStreamReceiver(
+ "localhost", 8445))
+{% endhighlight %}
+
+* OR Plug-in the actor as receiver into the spark streaming context and create a DStream.
+{% highlight scala %}
val lines = ssc.actorStream[String](Props(new SocketTextStreamReceiver(
"localhost",8445, z => z.utf8String)),"SocketReceiver")
-
{% endhighlight %}
* Process it.
{% highlight scala %}
-
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
wordCounts.print()
ssc.start()
-
-
{% endhighlight %}
* After processing it, stream can be tested using the netcat utility.
@@ -75,12 +103,11 @@ All we did here is mixed in trait Receiver and called pushBlock api method to pu
hello hello
-## Multiple homogeneous/heterogeneous receivers.
+## Multiple Homogeneous/Heterogeneous Receivers.
A DStream union operation is provided for taking union on multiple input streams.
{% highlight scala %}
-
val lines = ssc.actorStream[String](Props(new SocketTextStreamReceiver(
"localhost",8445, z => z.utf8String)),"SocketReceiver")
@@ -89,7 +116,6 @@ A DStream union operation is provided for taking union on multiple input streams
"localhost",8446, z => z.utf8String)),"SocketReceiver")
val union = lines.union(lines2)
-
{% endhighlight %}
Above stream can be easily process as described earlier.
@@ -99,3 +125,4 @@ _A more comprehensive example is provided in the spark streaming examples_
## References
1.[Akka Actor documentation](http://doc.akka.io/docs/akka/2.0.5/scala/actors.html)
+2.[NetworkReceiver](api/streaming/index.html#org.apache.spark.streaming.dstream.NetworkReceiver)
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 8cd1b0cd66..c7df172024 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -13,6 +13,14 @@ A Spark Streaming application is very similar to a Spark application; it consist
This guide shows some how to start programming with DStreams.
+# Linking with Spark Streaming
+
+Add the following SBT or Maven dependency to your project to use Spark Streaming:
+
+ groupId = org.apache.spark
+ artifactId = spark-streaming_{{site.SCALA_VERSION}}
+ version = {{site.SPARK_VERSION}}
+
# Initializing Spark Streaming
The first thing a Spark Streaming program must do is create a `StreamingContext` object, which tells Spark how to access a cluster. A `StreamingContext` can be created by using
@@ -34,7 +42,7 @@ ssc.textFileStream(directory) // Creates a stream by monitoring and process
ssc.socketStream(hostname, port) // Creates a stream that uses a TCP socket to read data from hostname:port
{% endhighlight %}
-We also provide a input streams for Kafka, Flume, Akka actor, etc. For a complete list of input streams, take a look at the [StreamingContext API documentation](api/streaming/index.html#spark.streaming.StreamingContext).
+We also provide a input streams for Kafka, Flume, Akka actor, etc. For a complete list of input streams, take a look at the [StreamingContext API documentation](api/streaming/index.html#org.apache.spark.streaming.StreamingContext).
@@ -156,7 +164,7 @@ Spark Streaming features windowed computations, which allow you to apply transfo
</table>
-A complete list of DStream operations is available in the API documentation of [DStream](api/streaming/index.html#spark.streaming.DStream) and [PairDStreamFunctions](api/streaming/index.html#spark.streaming.PairDStreamFunctions).
+A complete list of DStream operations is available in the API documentation of [DStream](api/streaming/index.html#org.apache.spark.streaming.DStream) and [PairDStreamFunctions](api/streaming/index.html#org.apache.spark.streaming.PairDStreamFunctions).
## Output Operations
When an output operator is called, it triggers the computation of a stream. Currently the following output operators are defined:
@@ -203,11 +211,11 @@ ssc.stop()
{% endhighlight %}
# Example
-A simple example to start off is the [NetworkWordCount](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/NetworkWordCount.scala` .
+A simple example to start off is the [NetworkWordCount](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/NetworkWordCount.scala` .
{% highlight scala %}
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import StreamingContext._
...
// Create the context and set up a network input stream to receive from a host:port
@@ -234,7 +242,7 @@ $ nc -lk 9999
Then, in a different terminal, you can start NetworkWordCount by using
{% highlight bash %}
-$ ./run spark.streaming.examples.NetworkWordCount local[2] localhost 9999
+$ ./run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999
{% endhighlight %}
This will make NetworkWordCount connect to the netcat server. Any lines typed in the terminal running the netcat server will be counted and printed on screen.
@@ -272,7 +280,7 @@ Time: 1357008430000 ms
</td>
</table>
-You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
+You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run-example org.apache.spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
# DStream Persistence
Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`.
@@ -301,6 +309,9 @@ dstream.checkpoint(checkpointInterval) // checkpointInterval must be a multiple
For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
+## Custom Receivers
+Spark comes with a built in support for most common usage scenarios where input stream source can be either a network socket stream to support for a few message queues. Apart from that it is also possible to supply your own custom receiver via a convenient API. Find more details at [Custom Receiver Guide](streaming-custom-receivers.html).
+
# Performance Tuning
Getting the best performance of a Spark Streaming application on a cluster requires a bit of tuning. This section explains a number of the parameters and configurations that can tuned to improve the performance of you application. At a high level, you need to consider two things:
<ol>
@@ -312,7 +323,7 @@ Getting the best performance of a Spark Streaming application on a cluster requi
There are a number of optimizations that can be done in Spark to minimize the processing time of each batch. These have been discussed in detail in [Tuning Guide](tuning.html). This section highlights some of the most important ones.
### Level of Parallelism
-Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`spark.PairDStreamFunctions`](api/streaming/index.html#spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default.
+Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`PairDStreamFunctions`](api/streaming/index.html#org.apache.spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default.
### Data Serialization
The overhead of data serialization can be significant, especially when sub-second batch sizes are to be achieved. There are two aspects to it.
@@ -342,7 +353,7 @@ This value is closely tied with any window operation that is being used. Any win
## Memory Tuning
Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section, we highlight a few customizations that are strongly recommended to minimize GC related pauses in Spark Streaming applications and achieving more consistent batch processing times.
-* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses.
+* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#org.apache.spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#org.apache.spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses.
* **Concurrent garbage collector**: Using the concurrent mark-and-sweep GC further minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the overall processing throughput of the system, its use is still recommended to achieve more consistent batch processing times.
@@ -464,10 +475,10 @@ If the driver had crashed in the middle of the processing of time 3, then it wil
# Java API
-Similar to [Spark's Java API](java-programming-guide.html), we also provide a Java API for Spark Streaming which allows all its features to be accessible from a Java program. This is defined in [spark.streaming.api.java] (api/streaming/index.html#spark.streaming.api.java.package) package and includes [JavaStreamingContext](api/streaming/index.html#spark.streaming.api.java.JavaStreamingContext) and [JavaDStream](api/streaming/index.html#spark.streaming.api.java.JavaDStream) classes that provide the same methods as their Scala counterparts, but take Java functions (that is, Function, and Function2) and return Java data and collection types. Some of the key points to note are:
+Similar to [Spark's Java API](java-programming-guide.html), we also provide a Java API for Spark Streaming which allows all its features to be accessible from a Java program. This is defined in [org.apache.spark.streaming.api.java] (api/streaming/index.html#org.apache.spark.streaming.api.java.package) package and includes [JavaStreamingContext](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext) and [JavaDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaDStream) classes that provide the same methods as their Scala counterparts, but take Java functions (that is, Function, and Function2) and return Java data and collection types. Some of the key points to note are:
-1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#spark.api.java.function.Function) and [Function2](api/core/index.html#spark.api.java.function.Function2)
-1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types.
+1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#org.apache.spark.api.java.function.Function) and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2)
+1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types.
Spark's [Java Programming Guide](java-programming-guide.html) gives more ideas about using the Java API. To extends the ideas presented for the RDDs to DStreams, we present parts of the Java version of the same NetworkWordCount example presented above. The full source code is given at `<spark repo>/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java`
@@ -479,7 +490,7 @@ JavaDStream<String> lines = ssc.socketTextStream(ip, port);
{% endhighlight %}
-Then the `lines` are split into words by using the `flatMap` function and [FlatMapFunction](api/core/index.html#spark.api.java.function.FlatMapFunction).
+Then the `lines` are split into words by using the `flatMap` function and [FlatMapFunction](api/core/index.html#org.apache.spark.api.java.function.FlatMapFunction).
{% highlight java %}
JavaDStream<String> words = lines.flatMap(
@@ -491,7 +502,7 @@ JavaDStream<String> words = lines.flatMap(
});
{% endhighlight %}
-The `words` is then mapped to a [JavaPairDStream](api/streaming/index.html#spark.streaming.api.java.JavaPairDStream) of `(word, 1)` pairs using `map` and [PairFunction](api/core/index.html#spark.api.java.function.PairFunction). This is reduced by using `reduceByKey` and [Function2](api/core/index.html#spark.api.java.function.Function2).
+The `words` is then mapped to a [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) of `(word, 1)` pairs using `map` and [PairFunction](api/core/index.html#org.apache.spark.api.java.function.PairFunction). This is reduced by using `reduceByKey` and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2).
{% highlight java %}
JavaPairDStream<String, Integer> wordCounts = words.map(
@@ -510,8 +521,8 @@ JavaPairDStream<String, Integer> wordCounts = words.map(
{% endhighlight %}
-
# Where to Go from Here
-* API docs - [Scala](api/streaming/index.html#spark.streaming.package) and [Java](api/streaming/index.html#spark.streaming.api.java.package)
-* More examples - [Scala](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/mesos/spark/tree/master/examples/src/main/java/spark/streaming/examples)
+
+* API docs - [Scala](api/streaming/index.html#org.apache.spark.streaming.package) and [Java](api/streaming/index.html#org.apache.spark.streaming.api.java.package)
+* More examples - [Scala](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/spark/streaming/examples)
* [Paper describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf)
diff --git a/docs/tuning.md b/docs/tuning.md
index 5ffca54481..28d88a2659 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -32,24 +32,25 @@ in your operations) and performance. It provides two serialization libraries:
[`java.io.Externalizable`](http://docs.oracle.com/javase/6/docs/api/java/io/Externalizable.html).
Java serialization is flexible but often quite slow, and leads to large
serialized formats for many classes.
-* [Kryo serialization](http://code.google.com/p/kryo/wiki/V1Documentation): Spark can also use
+* [Kryo serialization](http://code.google.com/p/kryo/): Spark can also use
the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly
faster and more compact than Java serialization (often as much as 10x), but does not support all
`Serializable` types and requires you to *register* the classes you'll use in the program in advance
for best performance.
-You can switch to using Kryo by calling `System.setProperty("spark.serializer", "spark.KryoSerializer")`
+You can switch to using Kryo by calling `System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")`
*before* creating your SparkContext. The only reason it is not the default is because of the custom
registration requirement, but we recommend trying it in any network-intensive application.
Finally, to register your classes with Kryo, create a public class that extends
-[`spark.KryoRegistrator`](api/core/index.html#spark.KryoRegistrator) and set the
+[`org.apache.spark.serializer.KryoRegistrator`](api/core/index.html#org.apache.spark.serializer.KryoRegistrator) and set the
`spark.kryo.registrator` system property to point to it, as follows:
{% highlight scala %}
import com.esotericsoftware.kryo.Kryo
+import org.apache.spark.serializer.KryoRegistrator
-class MyRegistrator extends spark.KryoRegistrator {
+class MyRegistrator extends KryoRegistrator {
override def registerClasses(kryo: Kryo) {
kryo.register(classOf[MyClass1])
kryo.register(classOf[MyClass2])
@@ -57,7 +58,7 @@ class MyRegistrator extends spark.KryoRegistrator {
}
// Make sure to set these properties *before* creating a SparkContext!
-System.setProperty("spark.serializer", "spark.KryoSerializer")
+System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
System.setProperty("spark.kryo.registrator", "mypackage.MyRegistrator")
val sc = new SparkContext(...)
{% endhighlight %}
@@ -216,7 +217,7 @@ enough. Spark automatically sets the number of "map" tasks to run on each file a
(though you can control it through optional parameters to `SparkContext.textFile`, etc), and for
distributed "reduce" operations, such as `groupByKey` and `reduceByKey`, it uses the largest
parent RDD's number of partitions. You can pass the level of parallelism as a second argument
-(see the [`spark.PairRDDFunctions`](api/core/index.html#spark.PairRDDFunctions) documentation),
+(see the [`spark.PairRDDFunctions`](api/core/index.html#org.apache.spark.rdd.PairRDDFunctions) documentation),
or set the system property `spark.default.parallelism` to change the default.
In general, we recommend 2-3 tasks per CPU core in your cluster.
diff --git a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh b/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh
deleted file mode 100644
index 50ecf83404..0000000000
--- a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-# These variables are automatically filled in by the mesos-ec2 script.
-export MESOS_MASTERS="{{master_list}}"
-export MESOS_SLAVES="{{slave_list}}"
-export MESOS_ZOO_LIST="{{zoo_list}}"
-export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
-export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
-export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
index 166a884c88..675429c57e 100644
--- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
+++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
@@ -1,11 +1,13 @@
-#!/bin/bash
+#!/usr/bin/env bash
-# These variables are automatically filled in by the mesos-ec2 script.
-export MESOS_MASTERS="{{master_list}}"
-export MESOS_SLAVES="{{slave_list}}"
-export MESOS_ZOO_LIST="{{zoo_list}}"
-export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
-export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
-export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
+# These variables are automatically filled in by the spark-ec2 script.
+export MASTERS="{{master_list}}"
+export SLAVES="{{slave_list}}"
+export HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
+export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
+export SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
export MODULES="{{modules}}"
+export SPARK_VERSION="{{spark_version}}"
+export SHARK_VERSION="{{shark_version}}"
+export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
export SWAP_MB="{{swap}}"
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 2ec3c007fb..932e70db96 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -9,9 +9,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -36,9 +36,8 @@ import boto
from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType
from boto import ec2
-# A static URL from which to figure out the latest Mesos EC2 AMI
-LATEST_AMI_URL = "https://s3.amazonaws.com/mesos-images/ids/latest-spark-0.7"
-
+# A URL prefix from which to fetch AMI information
+AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
# Configure and parse our command-line arguments
def parse_args():
@@ -53,7 +52,7 @@ def parse_args():
help="Seconds to wait for nodes to start (default: 120)")
parser.add_option("-k", "--key-pair",
help="Key pair to use on instances")
- parser.add_option("-i", "--identity-file",
+ parser.add_option("-i", "--identity-file",
help="SSH private key file to use for logging into instances")
parser.add_option("-t", "--instance-type", default="m1.large",
help="Type of instance to launch (default: m1.large). " +
@@ -66,9 +65,14 @@ def parse_args():
help="Availability zone to launch instances in, or 'all' to spread " +
"slaves across multiple (an additional $0.01/Gb for bandwidth" +
"between zones applies)")
- parser.add_option("-a", "--ami", default="latest",
- help="Amazon Machine Image ID to use, or 'latest' to use latest " +
- "available AMI (default: latest)")
+ parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use")
+ parser.add_option("-v", "--spark-version", default="0.7.3",
+ help="Version of Spark to use: 'X.Y.Z' or a specific git hash")
+ parser.add_option("--spark-git-repo",
+ default="https://github.com/mesos/spark",
+ help="Github repo from which to checkout supplied commit hash")
+ parser.add_option("--hadoop-major-version", default="1",
+ help="Major version of Hadoop (default: 1)")
parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port",
help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
"the given local address (for use with login)")
@@ -84,22 +88,16 @@ def parse_args():
parser.add_option("--spot-price", metavar="PRICE", type="float",
help="If specified, launch slaves as spot instances with the given " +
"maximum price (in dollars)")
- parser.add_option("--cluster-type", type="choice", metavar="TYPE",
- choices=["mesos", "standalone"], default="standalone",
- help="'mesos' for a Mesos cluster, 'standalone' for a standalone " +
- "Spark cluster (default: standalone)")
parser.add_option("--ganglia", action="store_true", default=True,
help="Setup Ganglia monitoring on cluster (default: on). NOTE: " +
"the Ganglia page will be publicly accessible")
parser.add_option("--no-ganglia", action="store_false", dest="ganglia",
help="Disable Ganglia monitoring for the cluster")
- parser.add_option("--old-scripts", action="store_true", default=False,
- help="Use old mesos-ec2 scripts, for Spark <= 0.6 AMIs")
parser.add_option("-u", "--user", default="root",
help="The SSH user you want to connect as (default: root)")
parser.add_option("--delete-groups", action="store_true", default=False,
help="When destroying a cluster, delete the security groups that were created")
-
+
(opts, args) = parser.parse_args()
if len(args) != 2:
parser.print_help()
@@ -109,9 +107,6 @@ def parse_args():
print >> stderr, ("ERROR: The -i or --identity-file argument is " +
"required for " + action)
sys.exit(1)
- if opts.cluster_type not in ["mesos", "standalone"] and action == "launch":
- print >> stderr, ("ERROR: Invalid cluster type: " + opts.cluster_type)
- sys.exit(1)
# Boto config check
# http://boto.cloudhackers.com/en/latest/boto_config_tut.html
@@ -158,66 +153,96 @@ def wait_for_instances(conn, instances):
def is_active(instance):
return (instance.state in ['pending', 'running', 'stopping', 'stopped'])
+# Return correct versions of Spark and Shark, given the supplied Spark version
+def get_spark_shark_version(opts):
+ spark_shark_map = {"0.7.3": "0.7.0"}
+ version = opts.spark_version.replace("v", "")
+ if version not in spark_shark_map:
+ print >> stderr, "Don't know about Spark version: %s" % version
+ sys.exit(1)
+ return (version, spark_shark_map[version])
+
+# Attempt to resolve an appropriate AMI given the architecture and
+# region of the request.
+def get_spark_ami(opts):
+ instance_types = {
+ "m1.small": "pvm",
+ "m1.medium": "pvm",
+ "m1.large": "pvm",
+ "m1.xlarge": "pvm",
+ "t1.micro": "pvm",
+ "c1.medium": "pvm",
+ "c1.xlarge": "pvm",
+ "m2.xlarge": "pvm",
+ "m2.2xlarge": "pvm",
+ "m2.4xlarge": "pvm",
+ "cc1.4xlarge": "hvm",
+ "cc2.8xlarge": "hvm",
+ "cg1.4xlarge": "hvm",
+ "hs1.8xlarge": "hvm",
+ "hi1.4xlarge": "hvm",
+ "m3.xlarge": "hvm",
+ "m3.2xlarge": "hvm",
+ "cr1.8xlarge": "hvm"
+ }
+ if opts.instance_type in instance_types:
+ instance_type = instance_types[opts.instance_type]
+ else:
+ instance_type = "pvm"
+ print >> stderr,\
+ "Don't recognize %s, assuming type is pvm" % opts.instance_type
+
+ ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type)
+ try:
+ ami = urllib2.urlopen(ami_path).read().strip()
+ print "Spark AMI: " + ami
+ except:
+ print >> stderr, "Could not resolve AMI at: " + ami_path
+ sys.exit(1)
+
+ return ami
# Launch a cluster of the given name, by setting up its security groups,
# and then starting new instances in them.
-# Returns a tuple of EC2 reservation objects for the master, slave
-# and zookeeper instances (in that order).
+# Returns a tuple of EC2 reservation objects for the master and slaves
# Fails if there already instances running in the cluster's groups.
def launch_cluster(conn, opts, cluster_name):
print "Setting up security groups..."
master_group = get_or_make_group(conn, cluster_name + "-master")
slave_group = get_or_make_group(conn, cluster_name + "-slaves")
- zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
if master_group.rules == []: # Group was just now created
master_group.authorize(src_group=master_group)
master_group.authorize(src_group=slave_group)
- master_group.authorize(src_group=zoo_group)
master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
+ master_group.authorize('tcp', 33000, 33000, '0.0.0.0/0')
master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
- if opts.cluster_type == "mesos":
- master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
+ master_group.authorize('tcp', 3030, 3035, '0.0.0.0/0')
if opts.ganglia:
master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
if slave_group.rules == []: # Group was just now created
slave_group.authorize(src_group=master_group)
slave_group.authorize(src_group=slave_group)
- slave_group.authorize(src_group=zoo_group)
slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
- if zoo_group.rules == []: # Group was just now created
- zoo_group.authorize(src_group=master_group)
- zoo_group.authorize(src_group=slave_group)
- zoo_group.authorize(src_group=zoo_group)
- zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
- zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
- zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
- zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')
# Check if instances are already running in our groups
active_nodes = get_existing_cluster(conn, opts, cluster_name,
die_on_error=False)
if any(active_nodes):
print >> stderr, ("ERROR: There are already instances running in " +
- "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
+ "group %s or %s" % (master_group.name, slave_group.name))
sys.exit(1)
- # Figure out the latest AMI from our static URL
- if opts.ami == "latest":
- try:
- opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip()
- print "Latest Spark AMI: " + opts.ami
- except:
- print >> stderr, "Could not read " + LATEST_AMI_URL
- sys.exit(1)
-
+ # Figure out Spark AMI
+ if opts.ami is None:
+ opts.ami = get_spark_ami(opts)
print "Launching instances..."
try:
@@ -257,7 +282,7 @@ def launch_cluster(conn, opts, cluster_name):
block_device_map = block_map)
my_req_ids += [req.id for req in slave_reqs]
i += 1
-
+
print "Waiting for spot instances to be granted..."
try:
while True:
@@ -284,9 +309,9 @@ def launch_cluster(conn, opts, cluster_name):
print "Canceling spot instance requests"
conn.cancel_spot_instance_requests(my_req_ids)
# Log a warning if any of these requests actually launched instances:
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
+ (master_nodes, slave_nodes) = get_existing_cluster(
conn, opts, cluster_name, die_on_error=False)
- running = len(master_nodes) + len(slave_nodes) + len(zoo_nodes)
+ running = len(master_nodes) + len(slave_nodes)
if running:
print >> stderr, ("WARNING: %d instances are still running" % running)
sys.exit(0)
@@ -327,21 +352,17 @@ def launch_cluster(conn, opts, cluster_name):
master_nodes = master_res.instances
print "Launched master in %s, regid = %s" % (zone, master_res.id)
- zoo_nodes = []
-
# Return all the instances
- return (master_nodes, slave_nodes, zoo_nodes)
+ return (master_nodes, slave_nodes)
# Get the EC2 instances in an existing cluster if available.
-# Returns a tuple of lists of EC2 instance objects for the masters,
-# slaves and zookeeper nodes (in that order).
+# Returns a tuple of lists of EC2 instance objects for the masters and slaves
def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
print "Searching for existing cluster " + cluster_name + "..."
reservations = conn.get_all_instances()
master_nodes = []
slave_nodes = []
- zoo_nodes = []
for res in reservations:
active = [i for i in res.instances if is_active(i)]
if len(active) > 0:
@@ -350,13 +371,11 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
master_nodes += res.instances
elif group_names == [cluster_name + "-slaves"]:
slave_nodes += res.instances
- elif group_names == [cluster_name + "-zoo"]:
- zoo_nodes += res.instances
- if any((master_nodes, slave_nodes, zoo_nodes)):
- print ("Found %d master(s), %d slaves, %d ZooKeeper nodes" %
- (len(master_nodes), len(slave_nodes), len(zoo_nodes)))
+ if any((master_nodes, slave_nodes)):
+ print ("Found %d master(s), %d slaves" %
+ (len(master_nodes), len(slave_nodes)))
if (master_nodes != [] and slave_nodes != []) or not die_on_error:
- return (master_nodes, slave_nodes, zoo_nodes)
+ return (master_nodes, slave_nodes)
else:
if master_nodes == [] and slave_nodes != []:
print "ERROR: Could not find master in group " + cluster_name + "-master"
@@ -369,7 +388,7 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
# Deploy configuration files and run setup scripts on a newly launched
# or started EC2 cluster.
-def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_key):
+def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
master = master_nodes[0].public_dns_name
if deploy_ssh_key:
print "Copying SSH key %s to master..." % opts.identity_file
@@ -377,63 +396,46 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k
scp(master, opts, opts.identity_file, '~/.ssh/id_rsa')
ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa')
- if opts.cluster_type == "mesos":
- modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mesos']
- elif opts.cluster_type == "standalone":
- modules = ['ephemeral-hdfs', 'persistent-hdfs', 'spark-standalone']
+ modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs',
+ 'mapreduce', 'spark-standalone']
+
+ if opts.hadoop_major_version == "1":
+ modules = filter(lambda x: x != "mapreduce", modules)
if opts.ganglia:
modules.append('ganglia')
- if not opts.old_scripts:
- # NOTE: We should clone the repository before running deploy_files to
- # prevent ec2-variables.sh from being overwritten
- ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git")
+ # NOTE: We should clone the repository before running deploy_files to
+ # prevent ec2-variables.sh from being overwritten
+ ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v2")
print "Deploying files to master..."
- deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes,
- zoo_nodes, modules)
+ deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules)
print "Running setup on master..."
- if opts.old_scripts:
- if opts.cluster_type == "mesos":
- setup_mesos_cluster(master, opts)
- elif opts.cluster_type == "standalone":
- setup_standalone_cluster(master, slave_nodes, opts)
- else:
- setup_spark_cluster(master, opts)
+ setup_spark_cluster(master, opts)
print "Done!"
-def setup_mesos_cluster(master, opts):
- ssh(master, opts, "chmod u+x mesos-ec2/setup")
- ssh(master, opts, "mesos-ec2/setup %s %s %s %s" %
- ("generic", "none", "master", opts.swap))
-
def setup_standalone_cluster(master, slave_nodes, opts):
slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes])
ssh(master, opts, "echo \"%s\" > spark/conf/slaves" % (slave_ips))
ssh(master, opts, "/root/spark/bin/start-all.sh")
-
+
def setup_spark_cluster(master, opts):
ssh(master, opts, "chmod u+x spark-ec2/setup.sh")
ssh(master, opts, "spark-ec2/setup.sh")
- if opts.cluster_type == "mesos":
- print "Mesos cluster started at http://%s:8080" % master
- elif opts.cluster_type == "standalone":
- print "Spark standalone cluster started at http://%s:8080" % master
+ print "Spark standalone cluster started at http://%s:8080" % master
if opts.ganglia:
print "Ganglia started at http://%s:5080/ganglia" % master
# Wait for a whole cluster (masters, slaves and ZooKeeper) to start up
-def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes, zoo_nodes):
+def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes):
print "Waiting for instances to start up..."
time.sleep(5)
wait_for_instances(conn, master_nodes)
wait_for_instances(conn, slave_nodes)
- if zoo_nodes != []:
- wait_for_instances(conn, zoo_nodes)
print "Waiting %d more seconds..." % wait_secs
time.sleep(wait_secs)
@@ -454,7 +456,12 @@ def get_num_disks(instance_type):
"m2.4xlarge": 2,
"cc1.4xlarge": 2,
"cc2.8xlarge": 4,
- "cg1.4xlarge": 2
+ "cg1.4xlarge": 2,
+ "hs1.8xlarge": 24,
+ "cr1.8xlarge": 2,
+ "hi1.4xlarge": 2,
+ "m3.xlarge": 0,
+ "m3.2xlarge": 0
}
if instance_type in disks_by_instance:
return disks_by_instance[instance_type]
@@ -469,8 +476,7 @@ def get_num_disks(instance_type):
# cluster (e.g. lists of masters and slaves). Files are only deployed to
# the first master instance in the cluster, and we expect the setup
# script to be run on that instance to copy them to other nodes.
-def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes,
- modules):
+def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
active_master = master_nodes[0].public_dns_name
num_disks = get_num_disks(opts.instance_type)
@@ -483,28 +489,30 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes,
mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i
spark_local_dirs += ",/mnt%d/spark" % i
- if zoo_nodes != []:
- zoo_list = '\n'.join([i.public_dns_name for i in zoo_nodes])
- cluster_url = "zoo://" + ",".join(
- ["%s:2181/mesos" % i.public_dns_name for i in zoo_nodes])
- elif opts.cluster_type == "mesos":
- zoo_list = "NONE"
- cluster_url = "%s:5050" % active_master
- elif opts.cluster_type == "standalone":
- zoo_list = "NONE"
- cluster_url = "%s:7077" % active_master
+ cluster_url = "%s:7077" % active_master
+
+ if "." in opts.spark_version:
+ # Pre-built spark & shark deploy
+ (spark_v, shark_v) = get_spark_shark_version(opts)
+ else:
+ # Spark-only custom deploy
+ spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
+ shark_v = ""
+ modules = filter(lambda x: x != "shark", modules)
template_vars = {
"master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
"active_master": active_master,
"slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
- "zoo_list": zoo_list,
"cluster_url": cluster_url,
"hdfs_data_dirs": hdfs_data_dirs,
"mapred_local_dirs": mapred_local_dirs,
"spark_local_dirs": spark_local_dirs,
"swap": str(opts.swap),
- "modules": '\n'.join(modules)
+ "modules": '\n'.join(modules),
+ "spark_version": spark_v,
+ "shark_version": shark_v,
+ "hadoop_major_version": opts.hadoop_major_version
}
# Create a temp directory in which we will place all the files to be
@@ -528,7 +536,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes,
dest.write(text)
dest.close()
# rsync the whole directory over to the master machine
- command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " +
+ command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " +
"'%s/' '%s@%s:/'") % (opts.identity_file, tmp_dir, opts.user, active_master))
subprocess.check_call(command, shell=True)
# Remove the temp directory we created above
@@ -554,12 +562,12 @@ def ssh(host, opts, command):
except subprocess.CalledProcessError as e:
if (tries > 2):
raise e
- print "Error connecting to host {0}, sleeping 30".format(e)
+ print "Couldn't connect to host {0}, waiting 30 seconds".format(e)
time.sleep(30)
tries = tries + 1
-
-
-
+
+
+
# Gets a list of zones to launch instances in
@@ -593,20 +601,20 @@ def main():
if action == "launch":
if opts.resume:
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
+ (master_nodes, slave_nodes) = get_existing_cluster(
conn, opts, cluster_name)
else:
- (master_nodes, slave_nodes, zoo_nodes) = launch_cluster(
+ (master_nodes, slave_nodes) = launch_cluster(
conn, opts, cluster_name)
- wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes)
- setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, True)
+ wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
+ setup_cluster(conn, master_nodes, slave_nodes, opts, True)
elif action == "destroy":
response = raw_input("Are you sure you want to destroy the cluster " +
cluster_name + "?\nALL DATA ON ALL NODES WILL BE LOST!!\n" +
"Destroy cluster " + cluster_name + " (y/N): ")
if response == "y":
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
+ (master_nodes, slave_nodes) = get_existing_cluster(
conn, opts, cluster_name, die_on_error=False)
print "Terminating master..."
for inst in master_nodes:
@@ -614,15 +622,11 @@ def main():
print "Terminating slaves..."
for inst in slave_nodes:
inst.terminate()
- if zoo_nodes != []:
- print "Terminating zoo..."
- for inst in zoo_nodes:
- inst.terminate()
# Delete security groups as well
if opts.delete_groups:
print "Deleting security groups (this will take some time)..."
- group_names = [cluster_name + "-master", cluster_name + "-slaves", cluster_name + "-zoo"]
+ group_names = [cluster_name + "-master", cluster_name + "-slaves"]
attempt = 1;
while attempt <= 3:
@@ -639,7 +643,7 @@ def main():
from_port=rule.from_port,
to_port=rule.to_port,
src_group=grant)
-
+
# Sleep for AWS eventual-consistency to catch up, and for instances
# to terminate
time.sleep(30) # Yes, it does have to be this long :-(
@@ -650,19 +654,19 @@ def main():
except boto.exception.EC2ResponseError:
success = False;
print "Failed to delete security group " + group.name
-
+
# Unfortunately, group.revoke() returns True even if a rule was not
# deleted, so this needs to be rerun if something fails
if success: break;
-
+
attempt += 1
-
+
if not success:
print "Failed to delete all security groups after 3 tries."
print "Try re-running in a few minutes."
elif action == "login":
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
+ (master_nodes, slave_nodes) = get_existing_cluster(
conn, opts, cluster_name)
master = master_nodes[0].public_dns_name
print "Logging into master " + master + "..."
@@ -673,17 +677,17 @@ def main():
(opts.identity_file, proxy_opt, opts.user, master), shell=True)
elif action == "get-master":
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name)
+ (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
print master_nodes[0].public_dns_name
elif action == "stop":
response = raw_input("Are you sure you want to stop the cluster " +
cluster_name + "?\nDATA ON EPHEMERAL DISKS WILL BE LOST, " +
- "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" +
+ "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" +
"AMAZON EBS IF IT IS EBS-BACKED!!\n" +
"Stop cluster " + cluster_name + " (y/N): ")
if response == "y":
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
+ (master_nodes, slave_nodes) = get_existing_cluster(
conn, opts, cluster_name, die_on_error=False)
print "Stopping master..."
for inst in master_nodes:
@@ -693,15 +697,9 @@ def main():
for inst in slave_nodes:
if inst.state not in ["shutting-down", "terminated"]:
inst.stop()
- if zoo_nodes != []:
- print "Stopping zoo..."
- for inst in zoo_nodes:
- if inst.state not in ["shutting-down", "terminated"]:
- inst.stop()
elif action == "start":
- (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
- conn, opts, cluster_name)
+ (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
print "Starting slaves..."
for inst in slave_nodes:
if inst.state not in ["shutting-down", "terminated"]:
@@ -710,13 +708,8 @@ def main():
for inst in master_nodes:
if inst.state not in ["shutting-down", "terminated"]:
inst.start()
- if zoo_nodes != []:
- print "Starting zoo..."
- for inst in zoo_nodes:
- if inst.state not in ["shutting-down", "terminated"]:
- inst.start()
- wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes)
- setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, False)
+ wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
+ setup_cluster(conn, master_nodes, slave_nodes, opts, False)
else:
print >> stderr, "Invalid action: %s" % action
diff --git a/examples/pom.xml b/examples/pom.xml
index 7a8d08fade..224cf6c96c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -19,22 +19,57 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-examples</artifactId>
<packaging>jar</packaging>
<name>Spark Project Examples</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<dependencies>
<dependency>
- <groupId>org.scala-lang</groupId>
- <artifactId>scala-library</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-streaming</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-mllib</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-bagel</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.94.6</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
@@ -55,182 +90,100 @@
<artifactId>scalacheck_${scala.version}</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.cassandra</groupId>
- <artifactId>cassandra-all</artifactId>
- <version>1.2.5</version>
- <exclusions>
- <exclusion>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
- <artifactId>concurrentlinkedhashmap-lru</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.ning</groupId>
- <artifactId>compress-lzf</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.cassandra.deps</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
+ <dependency>
+ <groupId>org.apache.cassandra</groupId>
+ <artifactId>cassandra-all</artifactId>
+ <version>1.2.5</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
+ <artifactId>concurrentlinkedhashmap-lru</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.ning</groupId>
+ <artifactId>compress-lzf</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.cassandra.deps</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
</dependencies>
- <build>
- <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
- <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
- <plugins>
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
<profiles>
<profile>
- <id>hadoop1</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
<id>hadoop2-yarn</id>
<dependencies>
<dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
<version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
<scope>provided</scope>
</dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
</dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
</profile>
</profiles>
+
+ <build>
+ <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git a/examples/src/main/java/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
index 9485e0cfa9..be0d38589c 100644
--- a/examples/src/main/java/spark/examples/JavaHdfsLR.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
import java.io.Serializable;
import java.util.Arrays;
diff --git a/examples/src/main/java/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
index 2d34776177..5a6afe7eae 100644
--- a/examples/src/main/java/spark/examples/JavaKMeans.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
@@ -15,15 +15,15 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.PairFunction;
-import spark.util.Vector;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.util.Vector;
import java.util.List;
import java.util.Map;
diff --git a/examples/src/main/java/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
index d22684d980..152f029213 100644
--- a/examples/src/main/java/spark/examples/JavaLogQuery.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
import scala.Tuple3;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
import java.io.Serializable;
import java.util.Collections;
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
new file mode 100644
index 0000000000..c5603a639b
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL neighbor URL
+ * URL neighbor URL
+ * URL neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ */
+public class JavaPageRank {
+ private static class Sum extends Function2<Double, Double, Double> {
+ @Override
+ public Double call(Double a, Double b) {
+ return a + b;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 3) {
+ System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>");
+ System.exit(1);
+ }
+
+ JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+
+ // Loads in input file. It should be in format of:
+ // URL neighbor URL
+ // URL neighbor URL
+ // URL neighbor URL
+ // ...
+ JavaRDD<String> lines = ctx.textFile(args[1], 1);
+
+ // Loads all URLs from input file and initialize their neighbors.
+ JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() {
+ @Override
+ public Tuple2<String, String> call(String s) {
+ String[] parts = s.split("\\s+");
+ return new Tuple2<String, String>(parts[0], parts[1]);
+ }
+ }).distinct().groupByKey().cache();
+
+ // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+ JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
+ @Override
+ public Double call(List<String> rs) throws Exception {
+ return 1.0;
+ }
+ });
+
+ // Calculates and updates URL ranks continuously using PageRank algorithm.
+ for (int current = 0; current < Integer.parseInt(args[2]); current++) {
+ // Calculates URL contributions to the rank of other URLs.
+ JavaPairRDD<String, Double> contribs = links.join(ranks).values()
+ .flatMap(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() {
+ @Override
+ public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
+ List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
+ for (String n : s._1) {
+ results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
+ }
+ return results;
+ }
+ });
+
+ // Re-calculates URL ranks based on neighbor contributions.
+ ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
+ @Override
+ public Double call(Double sum) throws Exception {
+ return 0.15 + sum * 0.85;
+ }
+ });
+ }
+
+ // Collects all URL ranks and dump them to console.
+ List<Tuple2<String, Double>> output = ranks.collect();
+ for (Tuple2 tuple : output) {
+ System.out.println(tuple._1 + " has rank: " + tuple._2 + ".");
+ }
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
index d5f42fbb38..4a2380caf5 100644
--- a/examples/src/main/java/spark/examples/JavaSparkPi.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
import java.util.ArrayList;
import java.util.List;
diff --git a/examples/src/main/java/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
index 559d7f9e53..17f21f6b77 100644
--- a/examples/src/main/java/spark/examples/JavaTC.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
import java.util.ArrayList;
import java.util.HashSet;
diff --git a/examples/src/main/java/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
index 1af370c1c3..07d32ad659 100644
--- a/examples/src/main/java/spark/examples/JavaWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
@@ -15,15 +15,15 @@
* limitations under the License.
*/
-package spark.examples;
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.FlatMapFunction;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
import java.util.Arrays;
import java.util.List;
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
new file mode 100644
index 0000000000..628cb892b6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+import scala.Tuple2;
+
+/**
+ * Example using MLLib ALS from Java.
+ */
+public class JavaALS {
+
+ static class ParseRating extends Function<String, Rating> {
+ public Rating call(String line) {
+ StringTokenizer tok = new StringTokenizer(line, ",");
+ int x = Integer.parseInt(tok.nextToken());
+ int y = Integer.parseInt(tok.nextToken());
+ double rating = Double.parseDouble(tok.nextToken());
+ return new Rating(x, y, rating);
+ }
+ }
+
+ static class FeaturesToString extends Function<Tuple2<Object, double[]>, String> {
+ public String call(Tuple2<Object, double[]> element) {
+ return element._1().toString() + "," + Arrays.toString(element._2());
+ }
+ }
+
+ public static void main(String[] args) {
+
+ if (args.length != 5 && args.length != 6) {
+ System.err.println(
+ "Usage: JavaALS <master> <ratings_file> <rank> <iterations> <output_dir> [<blocks>]");
+ System.exit(1);
+ }
+
+ int rank = Integer.parseInt(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+ String outputDir = args[4];
+ int blocks = -1;
+ if (args.length == 6) {
+ blocks = Integer.parseInt(args[5]);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+
+ JavaRDD<Rating> ratings = lines.map(new ParseRating());
+
+ MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
+
+ model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+ outputDir + "/userFeatures");
+ model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+ outputDir + "/productFeatures");
+ System.out.println("Final user/product features written to " + outputDir);
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
new file mode 100644
index 0000000000..cd59a139b9
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.clustering.KMeans;
+import org.apache.spark.mllib.clustering.KMeansModel;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Example using MLLib KMeans from Java.
+ */
+public class JavaKMeans {
+
+ static class ParsePoint extends Function<String, double[]> {
+ public double[] call(String line) {
+ StringTokenizer tok = new StringTokenizer(line, " ");
+ int numTokens = tok.countTokens();
+ double[] point = new double[numTokens];
+ for (int i = 0; i < numTokens; ++i) {
+ point[i] = Double.parseDouble(tok.nextToken());
+ }
+ return point;
+ }
+ }
+
+ public static void main(String[] args) {
+
+ if (args.length < 4) {
+ System.err.println(
+ "Usage: JavaKMeans <master> <input_file> <k> <max_iterations> [<runs>]");
+ System.exit(1);
+ }
+
+ String inputFile = args[1];
+ int k = Integer.parseInt(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+ int runs = 1;
+
+ if (args.length >= 5) {
+ runs = Integer.parseInt(args[4]);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+
+ JavaRDD<double[]> points = lines.map(new ParsePoint());
+
+ KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs);
+
+ System.out.println("Cluster centers:");
+ for (double[] center : model.clusterCenters()) {
+ System.out.println(" " + Arrays.toString(center));
+ }
+ double cost = model.computeCost(points.rdd());
+ System.out.println("Cost: " + cost);
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
new file mode 100644
index 0000000000..258061c8e6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.classification.LogisticRegressionWithSGD;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Logistic regression based classification using ML Lib.
+ */
+public class JavaLR {
+
+ static class ParsePoint extends Function<String, LabeledPoint> {
+ public LabeledPoint call(String line) {
+ String[] parts = line.split(",");
+ double y = Double.parseDouble(parts[0]);
+ StringTokenizer tok = new StringTokenizer(parts[1], " ");
+ int numTokens = tok.countTokens();
+ double[] x = new double[numTokens];
+ for (int i = 0; i < numTokens; ++i) {
+ x[i] = Double.parseDouble(tok.nextToken());
+ }
+ return new LabeledPoint(y, x);
+ }
+ }
+
+ public static void printWeights(double[] a) {
+ System.out.println(Arrays.toString(a));
+ }
+
+ public static void main(String[] args) {
+ if (args.length != 4) {
+ System.err.println("Usage: JavaLR <master> <input_dir> <step_size> <niters>");
+ System.exit(1);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+ JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache();
+ double stepSize = Double.parseDouble(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+
+ // Another way to configure LogisticRegression
+ //
+ // LogisticRegressionWithSGD lr = new LogisticRegressionWithSGD();
+ // lr.optimizer().setNumIterations(iterations)
+ // .setStepSize(stepSize)
+ // .setMiniBatchFraction(1.0);
+ // lr.setIntercept(true);
+ // LogisticRegressionModel model = lr.train(points.rdd());
+
+ LogisticRegressionModel model = LogisticRegressionWithSGD.train(points.rdd(),
+ iterations, stepSize);
+
+ System.out.print("Final w: ");
+ printWeights(model.weights());
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
index 096a9ae219..261813bf2f 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.examples;
+package org.apache.spark.streaming.examples;
-import spark.api.java.function.Function;
-import spark.streaming.*;
-import spark.streaming.api.java.*;
-import spark.streaming.dstream.SparkFlumeEvent;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.streaming.*;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.dstream.SparkFlumeEvent;
/**
* Produces a count of events received from Flume.
diff --git a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
index c54d3f3d59..def87c199b 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-package spark.streaming.examples;
+package org.apache.spark.streaming.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
-import spark.api.java.function.FlatMapFunction;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
-import spark.streaming.Duration;
-import spark.streaming.api.java.JavaDStream;
-import spark.streaming.api.java.JavaPairDStream;
-import spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
/**
* Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
diff --git a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
index 1f4a991542..c8c7389dd1 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-package spark.streaming.examples;
+package org.apache.spark.streaming.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
-import spark.api.java.JavaRDD;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
-import spark.streaming.Duration;
-import spark.streaming.api.java.JavaDStream;
-import spark.streaming.api.java.JavaPairDStream;
-import spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
import java.util.LinkedList;
import java.util.List;
diff --git a/examples/src/main/scala/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 911490cb6c..868ff81f67 100644
--- a/examples/src/main/scala/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
+import org.apache.spark.SparkContext
object BroadcastTest {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/examples/CassandraTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
index 104bfd5204..33bf7151a7 100644
--- a/examples/src/main/scala/spark/examples/CassandraTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
@@ -15,15 +15,15 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import org.apache.hadoop.mapreduce.Job
import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat
import org.apache.cassandra.hadoop.ConfigHelper
import org.apache.cassandra.hadoop.ColumnFamilyInputFormat
import org.apache.cassandra.thrift._
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.nio.ByteBuffer
import java.util.SortedMap
import org.apache.cassandra.db.IColumn
diff --git a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
index 67ddaec8d2..92eb96bd8e 100644
--- a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
+import org.apache.spark.SparkContext
object ExceptionHandlingTest {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index 5cee413615..42c2e0e8e1 100644
--- a/examples/src/main/scala/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object GroupByTest {
diff --git a/examples/src/main/scala/spark/examples/HBaseTest.scala b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
index 4dd6c243ac..efe2e93b0d 100644
--- a/examples/src/main/scala/spark/examples/HBaseTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark._
-import spark.rdd.NewHadoopRDD
+import org.apache.spark._
+import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
diff --git a/examples/src/main/scala/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index 23258336e2..d6a88d3032 100644
--- a/examples/src/main/scala/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark._
+import org.apache.spark._
object HdfsTest {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 7a449a9d72..4af45b2b4a 100644
--- a/examples/src/main/scala/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import scala.math.sqrt
import cern.jet.math._
diff --git a/examples/src/main/scala/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index c1f8d32aa8..fb130ea198 100644
--- a/examples/src/main/scala/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
+import org.apache.spark.util.Vector
object LocalFileLR {
val D = 10 // Numer of dimensions
diff --git a/examples/src/main/scala/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index 0a0bc6f476..f90ea35cd4 100644
--- a/examples/src/main/scala/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
-import spark.SparkContext._
+import org.apache.spark.util.Vector
+import org.apache.spark.SparkContext._
import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
diff --git a/examples/src/main/scala/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index ab99bf1fbe..cd4e9f1af0 100644
--- a/examples/src/main/scala/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
+import org.apache.spark.util.Vector
/**
* Logistic regression based classification.
diff --git a/examples/src/main/scala/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
index ccd69695df..bb7f22ec8d 100644
--- a/examples/src/main/scala/spark/examples/LocalPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import scala.math.random
-import spark._
+import org.apache.spark._
import SparkContext._
object LocalPi {
diff --git a/examples/src/main/scala/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index e815ececf7..17ff3ce764 100644
--- a/examples/src/main/scala/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
/**
* Executes a roll up-style query against Apache logs.
*/
diff --git a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index d0b1cf06e5..f79f0142b8 100644
--- a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
+import org.apache.spark.SparkContext
object MultiBroadcastTest {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index d197bbaf7c..37ddfb5db7 100644
--- a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object SimpleSkewedGroupByTest {
diff --git a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 4641b82444..9c954b2b5b 100644
--- a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object SkewedGroupByTest {
diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index ba0dfd8f9b..814944ba1c 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import scala.math.sqrt
import cern.jet.math._
import cern.colt.matrix._
import cern.colt.matrix.linalg._
-import spark._
+import org.apache.spark._
/**
* Alternating least squares matrix factorization.
diff --git a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index ef6e09a8e8..646682878f 100644
--- a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -15,14 +15,13 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
import scala.math.exp
-import spark.util.Vector
-import spark._
-import spark.deploy.SparkHadoopUtil
-import spark.scheduler.InputFormatInfo
+import org.apache.spark.util.Vector
+import org.apache.spark._
+import org.apache.spark.scheduler.InputFormatInfo
/**
* Logistic regression based classification.
@@ -52,7 +51,7 @@ object SparkHdfsLR {
System.exit(1)
}
val inputPath = args(1)
- val conf = SparkHadoopUtil.newConfiguration()
+ val conf = SparkEnv.get.hadoop.newConfiguration()
val sc = new SparkContext(args(0), "SparkHdfsLR",
System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(),
InputFormatInfo.computePreferredLocations(
diff --git a/examples/src/main/scala/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index 38ed3b149a..f7bf75b4e5 100644
--- a/examples/src/main/scala/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
-import spark.SparkContext
-import spark.util.Vector
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Vector
+import org.apache.spark.SparkContext._
import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
diff --git a/examples/src/main/scala/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 52a0d69744..9ed9fe4d76 100644
--- a/examples/src/main/scala/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import java.util.Random
import scala.math.exp
-import spark.util.Vector
-import spark._
+import org.apache.spark.util.Vector
+import org.apache.spark._
/**
* Logistic regression based classification.
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
new file mode 100644
index 0000000000..2721caf08b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -0,0 +1,46 @@
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.SparkContext
+
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL neighbor URL
+ * URL neighbor URL
+ * URL neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ */
+object SparkPageRank {
+ def main(args: Array[String]) {
+ if (args.length < 3) {
+ System.err.println("Usage: PageRank <master> <file> <number_of_iterations>")
+ System.exit(1)
+ }
+ var iters = args(2).toInt
+ val ctx = new SparkContext(args(0), "PageRank",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ val lines = ctx.textFile(args(1), 1)
+ val links = lines.map{ s =>
+ val parts = s.split("\\s+")
+ (parts(0), parts(1))
+ }.distinct().groupByKey().cache()
+ var ranks = links.mapValues(v => 1.0)
+
+ for (i <- 1 to iters) {
+ val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
+ val size = urls.size
+ urls.map(url => (url, rank / size))
+ }
+ ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
+ }
+
+ val output = ranks.collect()
+ output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
+
+ System.exit(0)
+ }
+}
+
diff --git a/examples/src/main/scala/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index 00560ac9d1..5a2bc9b0d0 100644
--- a/examples/src/main/scala/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
import scala.math.random
-import spark._
+import org.apache.spark._
import SparkContext._
/** Computes an approximation to pi */
diff --git a/examples/src/main/scala/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index bf988a953b..5a7a9d1bd8 100644
--- a/examples/src/main/scala/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.examples
+package org.apache.spark.examples
-import spark._
+import org.apache.spark._
import SparkContext._
import scala.util.Random
import scala.collection.mutable
diff --git a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
index de65e27fe0..cfafbaf23e 100644
--- a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package spark.bagel.examples
+package org.apache.spark.examples.bagel
-import spark._
-import spark.SparkContext._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.serializer.KryoRegistrator
-import spark.bagel._
-import spark.bagel.Bagel._
+import org.apache.spark.bagel._
+import org.apache.spark.bagel.Bagel._
import scala.collection.mutable.ArrayBuffer
diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
index a0c5ac9c18..72b5c7b88e 100644
--- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.bagel.examples
+package org.apache.spark.examples.bagel
-import spark._
-import spark.SparkContext._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
-import spark.bagel._
-import spark.bagel.Bagel._
+import org.apache.spark.bagel._
+import org.apache.spark.bagel.Bagel._
import scala.xml.{XML,NodeSeq}
@@ -37,7 +37,7 @@ object WikipediaPageRank {
System.exit(-1)
}
- System.setProperty("spark.serializer", "spark.KryoSerializer")
+ System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
System.setProperty("spark.kryo.registrator", classOf[PRKryoRegistrator].getName)
val inputFile = args(0)
diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
index 3c54a85f42..ddf6855325 100644
--- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
@@ -15,21 +15,18 @@
* limitations under the License.
*/
-package spark.bagel.examples
+package org.apache.spark.examples.bagel
-import spark._
-import serializer.{DeserializationStream, SerializationStream, SerializerInstance}
-import spark.SparkContext._
-
-import spark.bagel._
-import spark.bagel.Bagel._
-
-import scala.xml.{XML,NodeSeq}
+import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
+import java.nio.ByteBuffer
import scala.collection.mutable.ArrayBuffer
+import scala.xml.{XML, NodeSeq}
-import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
-import java.nio.ByteBuffer
+import org.apache.spark._
+import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
object WikipediaPageRankStandalone {
def main(args: Array[String]) {
@@ -131,7 +128,7 @@ object WikipediaPageRankStandalone {
}
}
-class WPRSerializer extends spark.serializer.Serializer {
+class WPRSerializer extends org.apache.spark.serializer.Serializer {
def newInstance(): SerializerInstance = new WPRSerializerInstance()
}
diff --git a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
index f97174aeae..cd3423a07b 100644
--- a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
import scala.collection.mutable.LinkedList
import scala.util.Random
@@ -25,11 +25,11 @@ import akka.actor.ActorRef
import akka.actor.Props
import akka.actor.actorRef2Scala
-import spark.streaming.Seconds
-import spark.streaming.StreamingContext
-import spark.streaming.StreamingContext.toPairDStreamFunctions
-import spark.streaming.receivers.Receiver
-import spark.util.AkkaUtils
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
+import org.apache.spark.streaming.receivers.Receiver
+import org.apache.spark.util.AkkaUtils
case class SubscribeReceiver(receiverActor: ActorRef)
case class UnsubscribeReceiver(receiverActor: ActorRef)
@@ -80,7 +80,7 @@ class FeederActor extends Actor {
* goes and subscribe to a typical publisher/feeder actor and receives
* data.
*
- * @see [[spark.streaming.examples.FeederActor]]
+ * @see [[org.apache.spark.streaming.examples.FeederActor]]
*/
class SampleActorReceiver[T: ClassManifest](urlOfPublisher: String)
extends Actor with Receiver {
@@ -132,9 +132,9 @@ object FeederActor {
* <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
*
* To run this example locally, you may run Feeder Actor as
- * `$ ./run spark.streaming.examples.FeederActor 127.0.1.1 9999`
+ * `$ ./run-example spark.streaming.examples.FeederActor 127.0.1.1 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
+ * `$ ./run-example spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
*/
object ActorWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
index 3ab4fc2c37..9f6e163454 100644
--- a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.util.IntParam
-import spark.storage.StorageLevel
-import spark.streaming._
+import org.apache.spark.util.IntParam
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming._
/**
* Produces a count of events received from Flume.
diff --git a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
index f5baec242d..bc8564b3ba 100644
--- a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
/**
@@ -28,7 +28,7 @@ import spark.streaming.StreamingContext._
* <directory> is the directory that Spark Streaming will use to find and read new text files.
*
* To run this on your local machine on directory `localdir`, run this example
- * `$ ./run spark.streaming.examples.HdfsWordCount local[2] localdir`
+ * `$ ./run-example spark.streaming.examples.HdfsWordCount local[2] localdir`
* Then create a text file in `localdir` and the words in the file will get counted.
*/
object HdfsWordCount {
diff --git a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
index 4929703ba2..12f939d5a7 100644
--- a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
import java.util.Properties
import kafka.message.Message
import kafka.producer.SyncProducerConfig
import kafka.producer._
-import spark.SparkContext
-import spark.streaming._
-import spark.streaming.StreamingContext._
-import spark.storage.StorageLevel
-import spark.streaming.util.RawTextHelper._
+import org.apache.spark.SparkContext
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.util.RawTextHelper._
/**
* Consumes messages from one or more topics in Kafka and does wordcount.
@@ -37,7 +37,7 @@ import spark.streaming.util.RawTextHelper._
* <numThreads> is the number of threads the kafka consumer should use
*
* Example:
- * `./run spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
+ * `./run-example spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
*/
object KafkaWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
index 150fb5eb9c..e2487dca5f 100644
--- a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
/**
* Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
@@ -29,7 +29,7 @@ import spark.streaming.StreamingContext._
* To run this on your local machine, you need to first run a Netcat server
* `$ nc -lk 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
+ * `$ ./run-example spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
*/
object NetworkWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
index da36c8c23c..fad512eeba 100644
--- a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.RDD
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
import scala.collection.mutable.SynchronizedQueue
diff --git a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
index 7fb680bcc3..0b45c30d20 100644
--- a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
@@ -15,20 +15,20 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.util.IntParam
-import spark.storage.StorageLevel
+import org.apache.spark.util.IntParam
+import org.apache.spark.storage.StorageLevel
-import spark.streaming._
-import spark.streaming.util.RawTextHelper
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.util.RawTextHelper
/**
* Receives text from multiple rawNetworkStreams and counts how many '\n' delimited
* lines have the word 'the' in them. This is useful for benchmarking purposes. This
* will only work with spark.streaming.util.RawTextSender running on all worker nodes
* and with Spark using Kryo serialization (set Java property "spark.serializer" to
- * "spark.KryoSerializer").
+ * "org.apache.spark.serializer.KryoSerializer").
* Usage: RawNetworkGrep <master> <numStreams> <host> <port> <batchMillis>
* <master> is the Spark master URL
* <numStream> is the number rawNetworkStreams, which should be same as number
diff --git a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
index 33ab324732..cb30c4edb3 100644
--- a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming._
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
/**
* Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every second.
@@ -29,7 +29,7 @@ import spark.streaming.StreamingContext._
* To run this on your local machine, you need to first run a Netcat server
* `$ nc -lk 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
+ * `$ ./run-example spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
*/
object StatefulNetworkWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
index 8770abd57e..35b6329ab3 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
import com.twitter.algebird._
-import spark.streaming.StreamingContext._
-import spark.SparkContext._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.SparkContext._
/**
* Illustrates the use of the Count-Min Sketch, from Twitter's Algebird library, to compute
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
index cba5c986be..8bfde2a829 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
import com.twitter.algebird.HyperLogLog._
import com.twitter.algebird.HyperLogLogMonoid
-import spark.streaming.dstream.TwitterInputDStream
+import org.apache.spark.streaming.dstream.TwitterInputDStream
/**
* Illustrates the use of the HyperLogLog algorithm, from Twitter's Algebird library, to compute
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
index 682b99f75e..27aa6b14bf 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
/**
* Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter
diff --git a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
index e264fae609..c8743b9e25 100644
--- a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming.examples
+package org.apache.spark.streaming.examples
import akka.actor.ActorSystem
import akka.actor.actorRef2Scala
import akka.zeromq._
-import spark.streaming.{ Seconds, StreamingContext }
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{ Seconds, StreamingContext }
+import org.apache.spark.streaming.StreamingContext._
import akka.zeromq.Subscribe
/**
@@ -60,9 +60,9 @@ object SimpleZeroMQPublisher {
* <zeroMQurl> and <topic> describe where zeroMq publisher is running.
*
* To run this example locally, you may run publisher as
- * `$ ./run spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
+ * `$ ./run-example spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
* and run the example as
- * `$ ./run spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
+ * `$ ./run-example spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
*/
object ZeroMQWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
index 375d5c9d22..884d6d6f34 100644
--- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming.examples.clickstream
+package org.apache.spark.streaming.examples.clickstream
import java.net.{InetAddress,ServerSocket,Socket,SocketException}
import java.io.{InputStreamReader, BufferedReader, PrintWriter}
@@ -37,8 +37,8 @@ object PageView {
/** Generates streaming events to simulate page views on a website.
*
* This should be used in tandem with PageViewStream.scala. Example:
- * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10
- * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
* */
object PageViewGenerator {
val pages = Map("http://foo.com/" -> .7,
diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
index a24342bebf..8282cc9269 100644
--- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
@@ -15,19 +15,19 @@
* limitations under the License.
*/
-package spark.streaming.examples.clickstream
+package org.apache.spark.streaming.examples.clickstream
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
-import spark.SparkContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.SparkContext._
/** Analyses a streaming dataset of web page views. This class demonstrates several types of
* operators available in Spark streaming.
*
* This should be used in tandem with PageViewStream.scala. Example:
- * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10
- * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
- * */
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+ */
object PageViewStream {
def main(args: Array[String]) {
if (args.length != 3) {
diff --git a/make-distribution.sh b/make-distribution.sh
index 0116215163..91f6278491 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,6 +24,11 @@
# so it is completely self contained.
# It does not contain source or *.class files.
#
+# Optional Arguments
+# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
+# --hadoop VERSION: Builds against specified version of Hadoop.
+# --with-yarn: Enables support for Hadoop YARN.
+#
# Recommended deploy/testing procedure (standalone mode):
# 1) Rsync / deploy the dist/ dir to one host
# 2) cd to deploy dir; ./bin/start-master.sh
@@ -38,21 +43,70 @@ DISTDIR="$FWDIR/dist"
# Get version from SBT
export TERM=dumb # Prevents color codes in SBT output
-VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2)
-echo "Making distribution for Spark $VERSION in $DISTDIR..."
+VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
+
+# Initialize defaults
+SPARK_HADOOP_VERSION=1.0.4
+SPARK_YARN=false
+MAKE_TGZ=false
+
+# Parse arguments
+while (( "$#" )); do
+ case $1 in
+ --hadoop)
+ SPARK_HADOOP_VERSION="$2"
+ shift
+ ;;
+ --with-yarn)
+ SPARK_YARN=true
+ ;;
+ --tgz)
+ MAKE_TGZ=true
+ ;;
+ esac
+ shift
+done
+
+if [ "$MAKE_TGZ" == "true" ]; then
+ echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
+else
+ echo "Making distribution for Spark $VERSION in $DISTDIR..."
+fi
+
+echo "Hadoop version set to $SPARK_HADOOP_VERSION"
+if [ "$SPARK_YARN" == "true" ]; then
+ echo "YARN enabled"
+else
+ echo "YARN disabled"
+fi
# Build fat JAR
-$FWDIR/sbt/sbt "repl/assembly"
+export SPARK_HADOOP_VERSION
+export SPARK_YARN
+"$FWDIR/sbt/sbt" "assembly/assembly"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/jars"
-echo "$VERSION" >$DISTDIR/RELEASE
+echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
# Copy jars
-cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/"
+cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/"
# Copy other things
+mkdir "$DISTDIR"/conf
+cp -r "$FWDIR/conf/*.template" "$DISTDIR"
cp -r "$FWDIR/bin" "$DISTDIR"
-cp -r "$FWDIR/conf" "$DISTDIR"
-cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR"
+cp -r "$FWDIR/python" "$DISTDIR"
+cp "$FWDIR/spark-class" "$DISTDIR"
+cp "$FWDIR/spark-shell" "$DISTDIR"
+cp "$FWDIR/spark-executor" "$DISTDIR"
+cp "$FWDIR/pyspark" "$DISTDIR"
+
+
+if [ "$MAKE_TGZ" == "true" ]; then
+ TARDIR="$FWDIR/spark-$VERSION"
+ cp -r "$DISTDIR" "$TARDIR"
+ tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
+ rm -rf "$TARDIR"
+fi
diff --git a/mllib/pom.xml b/mllib/pom.xml
new file mode 100644
index 0000000000..966caf6835
--- /dev/null
+++ b/mllib/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-mllib</artifactId>
+ <packaging>jar</packaging>
+ <name>Spark Project ML Library</name>
+ <url>http://spark.incubator.apache.org/</url>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.jblas</groupId>
+ <artifactId>jblas</artifactId>
+ <version>1.2.3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scalacheck</groupId>
+ <artifactId>scalacheck_${scala.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.novocode</groupId>
+ <artifactId>junit-interface</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
new file mode 100644
index 0000000000..60cb44ce89
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -0,0 +1,21 @@
+package org.apache.spark.mllib.classification
+
+import org.apache.spark.rdd.RDD
+
+trait ClassificationModel extends Serializable {
+ /**
+ * Predict values for the given data set using the model trained.
+ *
+ * @param testData RDD representing data points to be predicted
+ * @return RDD[Int] where each entry contains the corresponding prediction
+ */
+ def predict(testData: RDD[Array[Double]]): RDD[Double]
+
+ /**
+ * Predict values for a single data point using the model trained.
+ *
+ * @param testData array representing a single data point
+ * @return Int prediction from the trained model
+ */
+ def predict(testData: Array[Double]): Double
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
new file mode 100644
index 0000000000..50aede9c07
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.math.round
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.mllib.util.DataValidators
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Classification model trained using Logistic Regression.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+class LogisticRegressionModel(
+ override val weights: Array[Double],
+ override val intercept: Double)
+ extends GeneralizedLinearModel(weights, intercept)
+ with ClassificationModel with Serializable {
+
+ override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double) = {
+ val margin = dataMatrix.mmul(weightMatrix).get(0) + intercept
+ round(1.0/ (1.0 + math.exp(margin * -1)))
+ }
+}
+
+/**
+ * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ */
+class LogisticRegressionWithSGD private (
+ var stepSize: Double,
+ var numIterations: Int,
+ var regParam: Double,
+ var miniBatchFraction: Double)
+ extends GeneralizedLinearAlgorithm[LogisticRegressionModel]
+ with Serializable {
+
+ val gradient = new LogisticGradient()
+ val updater = new SimpleUpdater()
+ override val optimizer = new GradientDescent(gradient, updater)
+ .setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setRegParam(regParam)
+ .setMiniBatchFraction(miniBatchFraction)
+ override val validators = List(DataValidators.classificationLabels)
+
+ /**
+ * Construct a LogisticRegression object with default parameters
+ */
+ def this() = this(1.0, 100, 0.0, 1.0)
+
+ def createModel(weights: Array[Double], intercept: Double) = {
+ new LogisticRegressionModel(weights, intercept)
+ }
+}
+
+/**
+ * Top-level methods for calling Logistic Regression.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ */
+object LogisticRegressionWithSGD {
+ // NOTE(shivaram): We use multiple train methods instead of default arguments to support
+ // Java programs.
+
+ /**
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+ * gradient descent are initialized using the initial weights provided.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * the number of features in the data.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double])
+ : LogisticRegressionModel =
+ {
+ new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(
+ input, initialWeights)
+ }
+
+ /**
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ miniBatchFraction: Double)
+ : LogisticRegressionModel =
+ {
+ new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(
+ input)
+ }
+
+ /**
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. We use the entire data
+ * set to update the gradient in each iteration.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param stepSize Step size to be used for each iteration of Gradient Descent.
+
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LogisticRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double)
+ : LogisticRegressionModel =
+ {
+ train(input, numIterations, stepSize, 1.0)
+ }
+
+ /**
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
+ * to update the gradient in each iteration.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LogisticRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int)
+ : LogisticRegressionModel =
+ {
+ train(input, numIterations, 1.0, 1.0)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length != 4) {
+ println("Usage: LogisticRegression <master> <input_dir> <step_size> " +
+ "<niters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "LogisticRegression")
+ val data = MLUtils.loadLabeledData(sc, args(1))
+ val model = LogisticRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble)
+
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
new file mode 100644
index 0000000000..3511e24bce
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.math.signum
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.mllib.util.DataValidators
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Model built using SVM.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+class SVMModel(
+ override val weights: Array[Double],
+ override val intercept: Double)
+ extends GeneralizedLinearModel(weights, intercept)
+ with ClassificationModel with Serializable {
+
+ override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double) = {
+ val margin = dataMatrix.dot(weightMatrix) + intercept
+ if (margin < 0) 0.0 else 1.0
+ }
+}
+
+/**
+ * Train an SVM using Stochastic Gradient Descent.
+ * NOTE: Labels used in SVM should be {0, 1}
+ */
+class SVMWithSGD private (
+ var stepSize: Double,
+ var numIterations: Int,
+ var regParam: Double,
+ var miniBatchFraction: Double)
+ extends GeneralizedLinearAlgorithm[SVMModel] with Serializable {
+
+ val gradient = new HingeGradient()
+ val updater = new SquaredL2Updater()
+ override val optimizer = new GradientDescent(gradient, updater)
+ .setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setRegParam(regParam)
+ .setMiniBatchFraction(miniBatchFraction)
+
+ override val validators = List(DataValidators.classificationLabels)
+
+ /**
+ * Construct a SVM object with default parameters
+ */
+ def this() = this(1.0, 100, 1.0, 1.0)
+
+ def createModel(weights: Array[Double], intercept: Double) = {
+ new SVMModel(weights, intercept)
+ }
+}
+
+/**
+ * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}
+ */
+object SVMWithSGD {
+
+ /**
+ * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+ * gradient descent are initialized using the initial weights provided.
+ * NOTE: Labels used in SVM should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * the number of features in the data.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double])
+ : SVMModel =
+ {
+ new SVMWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input,
+ initialWeights)
+ }
+
+ /**
+ * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient.
+ * NOTE: Labels used in SVM should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double)
+ : SVMModel =
+ {
+ new SVMWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
+ }
+
+ /**
+ * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
+ * NOTE: Labels used in SVM should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param stepSize Step size to be used for each iteration of Gradient Descent.
+ * @param regParam Regularization parameter.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a SVMModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double)
+ : SVMModel =
+ {
+ train(input, numIterations, stepSize, regParam, 1.0)
+ }
+
+ /**
+ * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
+ * NOTE: Labels used in SVM should be {0, 1}
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a SVMModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int)
+ : SVMModel =
+ {
+ train(input, numIterations, 1.0, 1.0, 1.0)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length != 5) {
+ println("Usage: SVM <master> <input_dir> <step_size> <regularization_parameter> <niters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "SVM")
+ val data = MLUtils.loadLabeledData(sc, args(1))
+ val model = SVMWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index d875d6de50..edbf77dbcc 100644
--- a/mllib/src/main/scala/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -15,15 +15,16 @@
* limitations under the License.
*/
-package spark.mllib.clustering
+package org.apache.spark.mllib.clustering
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
-import spark.{SparkContext, RDD}
-import spark.SparkContext._
-import spark.Logging
-import spark.mllib.util.MLUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.Logging
+import org.apache.spark.mllib.util.MLUtils
import org.jblas.DoubleMatrix
@@ -112,7 +113,7 @@ class KMeans private (
* Train a K-means model on the given set of points; `data` should be cached for high
* performance, because this is an iterative algorithm.
*/
- def train(data: RDD[Array[Double]]): KMeansModel = {
+ def run(data: RDD[Array[Double]]): KMeansModel = {
// TODO: check whether data is persistent; this needs RDD.storageLevel to be publicly readable
val sc = data.sparkContext
@@ -194,8 +195,8 @@ class KMeans private (
*/
private def initRandom(data: RDD[Array[Double]]): Array[ClusterCenters] = {
// Sample all the cluster centers in one pass to avoid repeated scans
- val sample = data.takeSample(true, runs * k, new Random().nextInt())
- Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k))
+ val sample = data.takeSample(true, runs * k, new Random().nextInt()).toSeq
+ Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).toArray)
}
/**
@@ -210,7 +211,7 @@ class KMeans private (
private def initKMeansParallel(data: RDD[Array[Double]]): Array[ClusterCenters] = {
// Initialize each run's center to a random point
val seed = new Random().nextInt()
- val sample = data.takeSample(true, runs, seed)
+ val sample = data.takeSample(true, runs, seed).toSeq
val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r)))
// On each step, sample 2 * k points on average for each run with probability proportional
@@ -271,7 +272,7 @@ object KMeans {
.setMaxIterations(maxIterations)
.setRuns(runs)
.setInitializationMode(initializationMode)
- .train(data)
+ .run(data)
}
def train(data: RDD[Array[Double]], k: Int, maxIterations: Int, runs: Int): KMeansModel = {
@@ -315,14 +316,15 @@ object KMeans {
}
def main(args: Array[String]) {
- if (args.length != 4) {
- println("Usage: KMeans <master> <input_file> <k> <max_iterations>")
+ if (args.length < 4) {
+ println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
System.exit(1)
}
val (master, inputFile, k, iters) = (args(0), args(1), args(2).toInt, args(3).toInt)
+ val runs = if (args.length >= 5) args(4).toInt else 1
val sc = new SparkContext(master, "KMeans")
- val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble))
- val model = KMeans.train(data, k, iters)
+ val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble)).cache()
+ val model = KMeans.train(data, k, iters, runs)
val cost = model.computeCost(data)
println("Cluster centers:")
for (c <- model.clusterCenters) {
diff --git a/mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index b8f80e80cd..cfc81c985a 100644
--- a/mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.mllib.clustering
+package org.apache.spark.mllib.clustering
-import spark.RDD
-import spark.SparkContext._
-import spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.util.MLUtils
/**
diff --git a/mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
index 89fe7d7e85..baf8251d8f 100644
--- a/mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.mllib.clustering
+package org.apache.spark.mllib.clustering
import scala.util.Random
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
new file mode 100644
index 0000000000..749e7364f4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.optimization
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Class used to compute the gradient for a loss function, given a single data point.
+ */
+abstract class Gradient extends Serializable {
+ /**
+ * Compute the gradient and loss given features of a single data point.
+ *
+ * @param data - Feature values for one data point. Column matrix of size nx1
+ * where n is the number of features.
+ * @param label - Label for this data item.
+ * @param weights - Column matrix containing weights for every feature.
+ *
+ * @return A tuple of 2 elements. The first element is a column matrix containing the computed
+ * gradient and the second element is the loss computed at this data point.
+ *
+ */
+ def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
+ (DoubleMatrix, Double)
+}
+
+/**
+ * Compute gradient and loss for a logistic loss function.
+ */
+class LogisticGradient extends Gradient {
+ override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
+ (DoubleMatrix, Double) = {
+ val margin: Double = -1.0 * data.dot(weights)
+ val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
+
+ val gradient = data.mul(gradientMultiplier)
+ val loss =
+ if (margin > 0) {
+ math.log(1 + math.exp(0 - margin))
+ } else {
+ math.log(1 + math.exp(margin)) - margin
+ }
+
+ (gradient, loss)
+ }
+}
+
+/**
+ * Compute gradient and loss for a Least-squared loss function.
+ */
+class SquaredGradient extends Gradient {
+ override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
+ (DoubleMatrix, Double) = {
+ val diff: Double = data.dot(weights) - label
+
+ val loss = 0.5 * diff * diff
+ val gradient = data.mul(diff)
+
+ (gradient, loss)
+ }
+}
+
+/**
+ * Compute gradient and loss for a Hinge loss function.
+ * NOTE: This assumes that the labels are {0,1}
+ */
+class HingeGradient extends Gradient {
+ override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
+ (DoubleMatrix, Double) = {
+
+ val dotProduct = data.dot(weights)
+
+ // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x)))
+ // Therefore the gradient is -(2y - 1)*x
+ val labelScaled = 2 * label - 1.0
+
+ if (1.0 > labelScaled * dotProduct) {
+ (data.mul(-labelScaled), 1.0 - labelScaled * dotProduct)
+ } else {
+ (DoubleMatrix.zeros(1, weights.length), 0.0)
+ }
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
new file mode 100644
index 0000000000..b77364e08d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.optimization
+
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+
+import org.jblas.DoubleMatrix
+
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * Class used to solve an optimization problem using Gradient Descent.
+ * @param gradient Gradient function to be used.
+ * @param updater Updater to be used to update weights after every iteration.
+ */
+class GradientDescent(var gradient: Gradient, var updater: Updater)
+ extends Optimizer with Logging
+{
+ private var stepSize: Double = 1.0
+ private var numIterations: Int = 100
+ private var regParam: Double = 0.0
+ private var miniBatchFraction: Double = 1.0
+
+ /**
+ * Set the step size per-iteration of SGD. Default 1.0.
+ */
+ def setStepSize(step: Double): this.type = {
+ this.stepSize = step
+ this
+ }
+
+ /**
+ * Set fraction of data to be used for each SGD iteration. Default 1.0.
+ */
+ def setMiniBatchFraction(fraction: Double): this.type = {
+ this.miniBatchFraction = fraction
+ this
+ }
+
+ /**
+ * Set the number of iterations for SGD. Default 100.
+ */
+ def setNumIterations(iters: Int): this.type = {
+ this.numIterations = iters
+ this
+ }
+
+ /**
+ * Set the regularization parameter used for SGD. Default 0.0.
+ */
+ def setRegParam(regParam: Double): this.type = {
+ this.regParam = regParam
+ this
+ }
+
+ /**
+ * Set the gradient function to be used for SGD.
+ */
+ def setGradient(gradient: Gradient): this.type = {
+ this.gradient = gradient
+ this
+ }
+
+
+ /**
+ * Set the updater function to be used for SGD.
+ */
+ def setUpdater(updater: Updater): this.type = {
+ this.updater = updater
+ this
+ }
+
+ def optimize(data: RDD[(Double, Array[Double])], initialWeights: Array[Double])
+ : Array[Double] = {
+
+ val (weights, stochasticLossHistory) = GradientDescent.runMiniBatchSGD(
+ data,
+ gradient,
+ updater,
+ stepSize,
+ numIterations,
+ regParam,
+ miniBatchFraction,
+ initialWeights)
+ weights
+ }
+
+}
+
+// Top-level method to run gradient descent.
+object GradientDescent extends Logging {
+ /**
+ * Run gradient descent in parallel using mini batches.
+ *
+ * @param data - Input data for SGD. RDD of form (label, [feature values]).
+ * @param gradient - Gradient object that will be used to compute the gradient.
+ * @param updater - Updater object that will be used to update the model.
+ * @param stepSize - stepSize to be used during update.
+ * @param numIterations - number of iterations that SGD should be run.
+ * @param regParam - regularization parameter
+ * @param miniBatchFraction - fraction of the input data set that should be used for
+ * one iteration of SGD. Default value 1.0.
+ *
+ * @return A tuple containing two elements. The first element is a column matrix containing
+ * weights for every feature, and the second element is an array containing the stochastic
+ * loss computed for every iteration.
+ */
+ def runMiniBatchSGD(
+ data: RDD[(Double, Array[Double])],
+ gradient: Gradient,
+ updater: Updater,
+ stepSize: Double,
+ numIterations: Int,
+ regParam: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double]) : (Array[Double], Array[Double]) = {
+
+ val stochasticLossHistory = new ArrayBuffer[Double](numIterations)
+
+ val nexamples: Long = data.count()
+ val miniBatchSize = nexamples * miniBatchFraction
+
+ // Initialize weights as a column vector
+ var weights = new DoubleMatrix(initialWeights.length, 1, initialWeights:_*)
+ var regVal = 0.0
+
+ for (i <- 1 to numIterations) {
+ val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map {
+ case (y, features) =>
+ val featuresCol = new DoubleMatrix(features.length, 1, features:_*)
+ val (grad, loss) = gradient.compute(featuresCol, y, weights)
+ (grad, loss)
+ }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
+
+ /**
+ * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration
+ * and regVal is the regularization value computed in the previous iteration as well.
+ */
+ stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
+ val update = updater.compute(
+ weights, gradientSum.div(miniBatchSize), stepSize, i, regParam)
+ weights = update._1
+ regVal = update._2
+ }
+
+ logInfo("GradientDescent finished. Last 10 stochastic losses %s".format(
+ stochasticLossHistory.takeRight(10).mkString(", ")))
+
+ (weights.toArray, stochasticLossHistory.toArray)
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
new file mode 100644
index 0000000000..94d30b56f2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.optimization
+
+import org.apache.spark.rdd.RDD
+
+trait Optimizer {
+
+ /**
+ * Solve the provided convex optimization problem.
+ */
+ def optimize(data: RDD[(Double, Array[Double])], initialWeights: Array[Double]): Array[Double]
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
new file mode 100644
index 0000000000..4c51f4f881
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.optimization
+
+import scala.math._
+import org.jblas.DoubleMatrix
+
+/**
+ * Class used to update weights used in Gradient Descent.
+ */
+abstract class Updater extends Serializable {
+ /**
+ * Compute an updated value for weights given the gradient, stepSize, iteration number and
+ * regularization parameter. Also returns the regularization value computed using the
+ * *updated* weights.
+ *
+ * @param weightsOld - Column matrix of size nx1 where n is the number of features.
+ * @param gradient - Column matrix of size nx1 where n is the number of features.
+ * @param stepSize - step size across iterations
+ * @param iter - Iteration number
+ * @param regParam - Regularization parameter
+ *
+ * @return A tuple of 2 elements. The first element is a column matrix containing updated weights,
+ * and the second element is the regularization value computed using updated weights.
+ */
+ def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int,
+ regParam: Double): (DoubleMatrix, Double)
+}
+
+/**
+ * A simple updater that adaptively adjusts the learning rate the
+ * square root of the number of iterations. Does not perform any regularization.
+ */
+class SimpleUpdater extends Updater {
+ override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
+ stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
+ val thisIterStepSize = stepSize / math.sqrt(iter)
+ val normGradient = gradient.mul(thisIterStepSize)
+ (weightsOld.sub(normGradient), 0)
+ }
+}
+
+/**
+ * Updater that adjusts learning rate and performs L1 regularization.
+ *
+ * The corresponding proximal operator used is the soft-thresholding function.
+ * That is, each weight component is shrunk towards 0 by shrinkageVal.
+ *
+ * If w > shrinkageVal, set weight component to w-shrinkageVal.
+ * If w < -shrinkageVal, set weight component to w+shrinkageVal.
+ * If -shrinkageVal < w < shrinkageVal, set weight component to 0.
+ *
+ * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
+ */
+class L1Updater extends Updater {
+ override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
+ stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
+ val thisIterStepSize = stepSize / math.sqrt(iter)
+ val normGradient = gradient.mul(thisIterStepSize)
+ // Take gradient step
+ val newWeights = weightsOld.sub(normGradient)
+ // Soft thresholding
+ val shrinkageVal = regParam * thisIterStepSize
+ (0 until newWeights.length).foreach { i =>
+ val wi = newWeights.get(i)
+ newWeights.put(i, signum(wi) * max(0.0, abs(wi) - shrinkageVal))
+ }
+ (newWeights, newWeights.norm1 * regParam)
+ }
+}
+
+/**
+ * Updater that adjusts the learning rate and performs L2 regularization
+ */
+class SquaredL2Updater extends Updater {
+ override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
+ stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
+ val thisIterStepSize = stepSize / math.sqrt(iter)
+ val normGradient = gradient.mul(thisIterStepSize)
+ val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0)
+ (newWeights, pow(newWeights.norm2, 2.0) * regParam)
+ }
+}
+
diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 7da96397a6..be002d02bc 100644
--- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -15,16 +15,17 @@
* limitations under the License.
*/
-package spark.mllib.recommendation
+package org.apache.spark.mllib.recommendation
import scala.collection.mutable.{ArrayBuffer, BitSet}
import scala.util.Random
import scala.util.Sorting
-import spark.{HashPartitioner, Partitioner, SparkContext, RDD}
-import spark.storage.StorageLevel
-import spark.KryoRegistrator
-import spark.SparkContext._
+import org.apache.spark.{HashPartitioner, Partitioner, SparkContext}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.rdd.RDD
+import org.apache.spark.serializer.KryoRegistrator
+import org.apache.spark.SparkContext._
import com.esotericsoftware.kryo.Kryo
import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
@@ -35,8 +36,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
* of the elements within this block, and the list of destination blocks that each user or
* product will need to send its feature vector to.
*/
-private[recommendation] case class OutLinkBlock(
- elementIds: Array[Int], shouldSend: Array[BitSet])
+private[recommendation] case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[BitSet])
/**
@@ -56,8 +56,7 @@ private[recommendation] case class InLinkBlock(
/**
* A more compact class to represent a rating than Tuple3[Int, Int, Double].
*/
-private[recommendation] case class Rating(user: Int, product: Int, rating: Double)
-
+case class Rating(val user: Int, val product: Int, val rating: Double)
/**
* Alternating Least Squares matrix factorization.
@@ -105,10 +104,10 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
}
/**
- * Run ALS with the configured parmeters on an input RDD of (user, product, rating) triples.
+ * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
* Returns a MatrixFactorizationModel with feature vectors for each user and product.
*/
- def train(ratings: RDD[(Int, Int, Double)]): MatrixFactorizationModel = {
+ def run(ratings: RDD[Rating]): MatrixFactorizationModel = {
val numBlocks = if (this.numBlocks == -1) {
math.max(ratings.context.defaultParallelism, ratings.partitions.size / 2)
} else {
@@ -117,16 +116,36 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
val partitioner = new HashPartitioner(numBlocks)
- val ratingsByUserBlock = ratings.map{ case (u, p, r) => (u % numBlocks, Rating(u, p, r)) }
- val ratingsByProductBlock = ratings.map{ case (u, p, r) => (p % numBlocks, Rating(p, u, r)) }
+ val ratingsByUserBlock = ratings.map{ rating => (rating.user % numBlocks, rating) }
+ val ratingsByProductBlock = ratings.map{ rating =>
+ (rating.product % numBlocks, Rating(rating.product, rating.user, rating.rating))
+ }
val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock)
val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock)
- // Initialize user and product factors randomly
- val seed = new Random().nextInt()
- var users = userOutLinks.mapValues(_.elementIds.map(u => randomFactor(rank, seed ^ u)))
- var products = productOutLinks.mapValues(_.elementIds.map(p => randomFactor(rank, seed ^ ~p)))
+ // Initialize user and product factors randomly, but use a deterministic seed for each partition
+ // so that fault recovery works
+ val seedGen = new Random()
+ val seed1 = seedGen.nextInt()
+ val seed2 = seedGen.nextInt()
+ // Hash an integer to propagate random bits at all positions, similar to java.util.HashTable
+ def hash(x: Int): Int = {
+ val r = x ^ (x >>> 20) ^ (x >>> 12)
+ r ^ (r >>> 7) ^ (r >>> 4)
+ }
+ var users = userOutLinks.mapPartitionsWithIndex { (index, itr) =>
+ val rand = new Random(hash(seed1 ^ index))
+ itr.map { case (x, y) =>
+ (x, y.elementIds.map(_ => randomFactor(rank, rand)))
+ }
+ }
+ var products = productOutLinks.mapPartitionsWithIndex { (index, itr) =>
+ val rand = new Random(hash(seed2 ^ index))
+ itr.map { case (x, y) =>
+ (x, y.elementIds.map(_ => randomFactor(rank, rand)))
+ }
+ }
for (iter <- 0 until iterations) {
// perform ALS update
@@ -213,11 +232,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
}
/**
- * Make a random factor vector with the given seed.
- * TODO: Initialize things using mapPartitionsWithIndex to make it faster?
+ * Make a random factor vector with the given random.
*/
- private def randomFactor(rank: Int, seed: Int): Array[Double] = {
- val rand = new Random(seed)
+ private def randomFactor(rank: Int, rand: Random): Array[Double] = {
Array.fill(rank)(rand.nextDouble)
}
@@ -357,14 +374,14 @@ object ALS {
* @param blocks level of parallelism to split computation into
*/
def train(
- ratings: RDD[(Int, Int, Double)],
+ ratings: RDD[Rating],
rank: Int,
iterations: Int,
lambda: Double,
blocks: Int)
: MatrixFactorizationModel =
{
- new ALS(blocks, rank, iterations, lambda).train(ratings)
+ new ALS(blocks, rank, iterations, lambda).run(ratings)
}
/**
@@ -379,7 +396,7 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
*/
- def train(ratings: RDD[(Int, Int, Double)], rank: Int, iterations: Int, lambda: Double)
+ def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double)
: MatrixFactorizationModel =
{
train(ratings, rank, iterations, lambda, -1)
@@ -396,7 +413,7 @@ object ALS {
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
*/
- def train(ratings: RDD[(Int, Int, Double)], rank: Int, iterations: Int)
+ def train(ratings: RDD[Rating], rank: Int, iterations: Int)
: MatrixFactorizationModel =
{
train(ratings, rank, iterations, 0.01, -1)
@@ -416,14 +433,15 @@ object ALS {
val (master, ratingsFile, rank, iters, outputDir) =
(args(0), args(1), args(2).toInt, args(3).toInt, args(4))
val blocks = if (args.length == 6) args(5).toInt else -1
- System.setProperty("spark.serializer", "spark.KryoSerializer")
+ System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
System.setProperty("spark.kryo.registrator", classOf[ALSRegistrator].getName)
System.setProperty("spark.kryo.referenceTracking", "false")
+ System.setProperty("spark.kryoserializer.buffer.mb", "8")
System.setProperty("spark.locality.wait", "10000")
val sc = new SparkContext(master, "ALS")
val ratings = sc.textFile(ratingsFile).map { line =>
val fields = line.split(',')
- (fields(0).toInt, fields(1).toInt, fields(2).toDouble)
+ Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
}
val model = ALS.train(ratings, rank, iters, 0.01, blocks)
model.userFeatures.map{ case (id, vec) => id + "," + vec.mkString(" ") }
diff --git a/mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 38637b3dd1..af43d89c70 100644
--- a/mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -15,13 +15,22 @@
* limitations under the License.
*/
-package spark.mllib.recommendation
+package org.apache.spark.mllib.recommendation
-import spark.RDD
-import spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
import org.jblas._
+/**
+ * Model representing the result of matrix factorization.
+ *
+ * @param rank Rank for the features in this model.
+ * @param userFeatures RDD of tuples where each tuple represents the userId and
+ * the features computed for this user.
+ * @param productFeatures RDD of tuples where each tuple represents the productId
+ * and the features computed for this product.
+ */
class MatrixFactorizationModel(
val rank: Int,
val userFeatures: RDD[(Int, Array[Double])],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
new file mode 100644
index 0000000000..f98b0b536d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+
+import org.jblas.DoubleMatrix
+
+/**
+ * GeneralizedLinearModel (GLM) represents a model trained using
+ * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and
+ * an intercept.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+abstract class GeneralizedLinearModel(val weights: Array[Double], val intercept: Double)
+ extends Serializable {
+
+ // Create a column vector that can be used for predictions
+ private val weightsMatrix = new DoubleMatrix(weights.length, 1, weights:_*)
+
+ /**
+ * Predict the result given a data point and the weights learned.
+ *
+ * @param dataMatrix Row vector containing the features for this data point
+ * @param weightMatrix Column vector containing the weights of the model
+ * @param intercept Intercept of the model.
+ */
+ def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double): Double
+
+ /**
+ * Predict values for the given data set using the model trained.
+ *
+ * @param testData RDD representing data points to be predicted
+ * @return RDD[Double] where each entry contains the corresponding prediction
+ */
+ def predict(testData: RDD[Array[Double]]): RDD[Double] = {
+ // A small optimization to avoid serializing the entire model. Only the weightsMatrix
+ // and intercept is needed.
+ val localWeights = weightsMatrix
+ val localIntercept = intercept
+
+ testData.map { x =>
+ val dataMatrix = new DoubleMatrix(1, x.length, x:_*)
+ predictPoint(dataMatrix, localWeights, localIntercept)
+ }
+ }
+
+ /**
+ * Predict values for a single data point using the model trained.
+ *
+ * @param testData array representing a single data point
+ * @return Double prediction from the trained model
+ */
+ def predict(testData: Array[Double]): Double = {
+ val dataMat = new DoubleMatrix(1, testData.length, testData:_*)
+ predictPoint(dataMat, weightsMatrix, intercept)
+ }
+}
+
+/**
+ * GeneralizedLinearAlgorithm implements methods to train a Genearalized Linear Model (GLM).
+ * This class should be extended with an Optimizer to create a new GLM.
+ */
+abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
+ extends Logging with Serializable {
+
+ protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
+
+ val optimizer: Optimizer
+
+ protected var addIntercept: Boolean = true
+
+ protected var validateData: Boolean = true
+
+ /**
+ * Create a model given the weights and intercept
+ */
+ protected def createModel(weights: Array[Double], intercept: Double): M
+
+ /**
+ * Set if the algorithm should add an intercept. Default true.
+ */
+ def setIntercept(addIntercept: Boolean): this.type = {
+ this.addIntercept = addIntercept
+ this
+ }
+
+ /**
+ * Set if the algorithm should validate data before training. Default true.
+ */
+ def setValidateData(validateData: Boolean): this.type = {
+ this.validateData = validateData
+ this
+ }
+
+ /**
+ * Run the algorithm with the configured parameters on an input
+ * RDD of LabeledPoint entries.
+ */
+ def run(input: RDD[LabeledPoint]) : M = {
+ val nfeatures: Int = input.first().features.length
+ val initialWeights = Array.fill(nfeatures)(1.0)
+ run(input, initialWeights)
+ }
+
+ /**
+ * Run the algorithm with the configured parameters on an input RDD
+ * of LabeledPoint entries starting from the initial weights provided.
+ */
+ def run(input: RDD[LabeledPoint], initialWeights: Array[Double]) : M = {
+
+ // Check the data properties before running the optimizer
+ if (validateData && !validators.forall(func => func(input))) {
+ throw new SparkException("Input validation failed.")
+ }
+
+ // Add a extra variable consisting of all 1.0's for the intercept.
+ val data = if (addIntercept) {
+ input.map(labeledPoint => (labeledPoint.label, Array(1.0, labeledPoint.features:_*)))
+ } else {
+ input.map(labeledPoint => (labeledPoint.label, labeledPoint.features))
+ }
+
+ val initialWeightsWithIntercept = if (addIntercept) {
+ Array(1.0, initialWeights:_*)
+ } else {
+ initialWeights
+ }
+
+ val weights = optimizer.optimize(data, initialWeightsWithIntercept)
+ val intercept = weights(0)
+ val weightsScaled = weights.tail
+
+ val model = createModel(weightsScaled, intercept)
+
+ logInfo("Final model weights " + model.weights.mkString(","))
+ logInfo("Final model intercept " + model.intercept)
+ model
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
new file mode 100644
index 0000000000..63240e24dc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+/**
+ * Class that represents the features and labels of a data point.
+ *
+ * @param label Label for this data point.
+ * @param features List of features for this data point.
+ */
+case class LabeledPoint(val label: Double, val features: Array[Double])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
new file mode 100644
index 0000000000..d959695325
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.util.MLUtils
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Regression model trained using Lasso.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+class LassoModel(
+ override val weights: Array[Double],
+ override val intercept: Double)
+ extends GeneralizedLinearModel(weights, intercept)
+ with RegressionModel with Serializable {
+
+ override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double) = {
+ dataMatrix.dot(weightMatrix) + intercept
+ }
+}
+
+/**
+ * Train a regression model with L1-regularization using Stochastic Gradient Descent.
+ */
+class LassoWithSGD private (
+ var stepSize: Double,
+ var numIterations: Int,
+ var regParam: Double,
+ var miniBatchFraction: Double)
+ extends GeneralizedLinearAlgorithm[LassoModel]
+ with Serializable {
+
+ val gradient = new SquaredGradient()
+ val updater = new L1Updater()
+ @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setRegParam(regParam)
+ .setMiniBatchFraction(miniBatchFraction)
+
+ // We don't want to penalize the intercept, so set this to false.
+ setIntercept(false)
+
+ var yMean = 0.0
+ var xColMean: DoubleMatrix = _
+ var xColSd: DoubleMatrix = _
+
+ /**
+ * Construct a Lasso object with default parameters
+ */
+ def this() = this(1.0, 100, 1.0, 1.0)
+
+ def createModel(weights: Array[Double], intercept: Double) = {
+ val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*)
+ val weightsScaled = weightsMat.div(xColSd)
+ val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0))
+
+ new LassoModel(weightsScaled.data, interceptScaled)
+ }
+
+ override def run(
+ input: RDD[LabeledPoint],
+ initialWeights: Array[Double])
+ : LassoModel =
+ {
+ val nfeatures: Int = input.first.features.length
+ val nexamples: Long = input.count()
+
+ // To avoid penalizing the intercept, we center and scale the data.
+ val stats = MLUtils.computeStats(input, nfeatures, nexamples)
+ yMean = stats._1
+ xColMean = stats._2
+ xColSd = stats._3
+
+ val normalizedData = input.map { point =>
+ val yNormalized = point.label - yMean
+ val featuresMat = new DoubleMatrix(nfeatures, 1, point.features:_*)
+ val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd)
+ LabeledPoint(yNormalized, featuresNormalized.toArray)
+ }
+
+ super.run(normalizedData, initialWeights)
+ }
+}
+
+/**
+ * Top-level methods for calling Lasso.
+ */
+object LassoWithSGD {
+
+ /**
+ * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+ * gradient descent are initialized using the initial weights provided.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * the number of features in the data.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double])
+ : LassoModel =
+ {
+ new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input,
+ initialWeights)
+ }
+
+ /**
+ * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double)
+ : LassoModel =
+ {
+ new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
+ }
+
+ /**
+ * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param stepSize Step size to be used for each iteration of Gradient Descent.
+ * @param regParam Regularization parameter.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LassoModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double)
+ : LassoModel =
+ {
+ train(input, numIterations, stepSize, regParam, 1.0)
+ }
+
+ /**
+ * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LassoModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int)
+ : LassoModel =
+ {
+ train(input, numIterations, 1.0, 1.0, 1.0)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length != 5) {
+ println("Usage: Lasso <master> <input_dir> <step_size> <regularization_parameter> <niters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "Lasso")
+ val data = MLUtils.loadLabeledData(sc, args(1))
+ val model = LassoWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
new file mode 100644
index 0000000000..ae95ea24fc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.util.MLUtils
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Regression model trained using LinearRegression.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+class LinearRegressionModel(
+ override val weights: Array[Double],
+ override val intercept: Double)
+ extends GeneralizedLinearModel(weights, intercept)
+ with RegressionModel with Serializable {
+
+ override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double) = {
+ dataMatrix.dot(weightMatrix) + intercept
+ }
+}
+
+/**
+ * Train a regression model with no regularization using Stochastic Gradient Descent.
+ */
+class LinearRegressionWithSGD private (
+ var stepSize: Double,
+ var numIterations: Int,
+ var miniBatchFraction: Double)
+ extends GeneralizedLinearAlgorithm[LinearRegressionModel]
+ with Serializable {
+
+ val gradient = new SquaredGradient()
+ val updater = new SimpleUpdater()
+ val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setMiniBatchFraction(miniBatchFraction)
+
+ /**
+ * Construct a LinearRegression object with default parameters
+ */
+ def this() = this(1.0, 100, 1.0)
+
+ def createModel(weights: Array[Double], intercept: Double) = {
+ new LinearRegressionModel(weights, intercept)
+ }
+}
+
+/**
+ * Top-level methods for calling LinearRegression.
+ */
+object LinearRegressionWithSGD {
+
+ /**
+ * Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+ * gradient descent are initialized using the initial weights provided.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * the number of features in the data.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double])
+ : LinearRegressionModel =
+ {
+ new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input,
+ initialWeights)
+ }
+
+ /**
+ * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ miniBatchFraction: Double)
+ : LinearRegressionModel =
+ {
+ new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input)
+ }
+
+ /**
+ * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param stepSize Step size to be used for each iteration of Gradient Descent.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LinearRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double)
+ : LinearRegressionModel =
+ {
+ train(input, numIterations, stepSize, 1.0)
+ }
+
+ /**
+ * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a LinearRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int)
+ : LinearRegressionModel =
+ {
+ train(input, numIterations, 1.0, 1.0)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length != 5) {
+ println("Usage: LinearRegression <master> <input_dir> <step_size> <niters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "LinearRegression")
+ val data = MLUtils.loadLabeledData(sc, args(1))
+ val model = LinearRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble)
+
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/spark/mllib/regression/Regression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 645204ddf3..423afc32d6 100644
--- a/mllib/src/main/scala/spark/mllib/regression/Regression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.mllib.regression
+package org.apache.spark.mllib.regression
-import spark.RDD
+import org.apache.spark.rdd.RDD
-trait RegressionModel {
+trait RegressionModel extends Serializable {
/**
* Predict values for the given data set using the model trained.
*
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
new file mode 100644
index 0000000000..b29508d2b9
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.util.MLUtils
+
+import org.jblas.DoubleMatrix
+
+/**
+ * Regression model trained using RidgeRegression.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+class RidgeRegressionModel(
+ override val weights: Array[Double],
+ override val intercept: Double)
+ extends GeneralizedLinearModel(weights, intercept)
+ with RegressionModel with Serializable {
+
+ override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix,
+ intercept: Double) = {
+ dataMatrix.dot(weightMatrix) + intercept
+ }
+}
+
+/**
+ * Train a regression model with L2-regularization using Stochastic Gradient Descent.
+ */
+class RidgeRegressionWithSGD private (
+ var stepSize: Double,
+ var numIterations: Int,
+ var regParam: Double,
+ var miniBatchFraction: Double)
+ extends GeneralizedLinearAlgorithm[RidgeRegressionModel]
+ with Serializable {
+
+ val gradient = new SquaredGradient()
+ val updater = new SquaredL2Updater()
+
+ @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setRegParam(regParam)
+ .setMiniBatchFraction(miniBatchFraction)
+
+ // We don't want to penalize the intercept in RidgeRegression, so set this to false.
+ setIntercept(false)
+
+ var yMean = 0.0
+ var xColMean: DoubleMatrix = _
+ var xColSd: DoubleMatrix = _
+
+ /**
+ * Construct a RidgeRegression object with default parameters
+ */
+ def this() = this(1.0, 100, 1.0, 1.0)
+
+ def createModel(weights: Array[Double], intercept: Double) = {
+ val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*)
+ val weightsScaled = weightsMat.div(xColSd)
+ val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0))
+
+ new RidgeRegressionModel(weightsScaled.data, interceptScaled)
+ }
+
+ override def run(
+ input: RDD[LabeledPoint],
+ initialWeights: Array[Double])
+ : RidgeRegressionModel =
+ {
+ val nfeatures: Int = input.first.features.length
+ val nexamples: Long = input.count()
+
+ // To avoid penalizing the intercept, we center and scale the data.
+ val stats = MLUtils.computeStats(input, nfeatures, nexamples)
+ yMean = stats._1
+ xColMean = stats._2
+ xColSd = stats._3
+
+ val normalizedData = input.map { point =>
+ val yNormalized = point.label - yMean
+ val featuresMat = new DoubleMatrix(nfeatures, 1, point.features:_*)
+ val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd)
+ LabeledPoint(yNormalized, featuresNormalized.toArray)
+ }
+
+ super.run(normalizedData, initialWeights)
+ }
+}
+
+/**
+ * Top-level methods for calling RidgeRegression.
+ */
+object RidgeRegressionWithSGD {
+
+ /**
+ * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+ * gradient descent are initialized using the initial weights provided.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * the number of features in the data.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double,
+ initialWeights: Array[Double])
+ : RidgeRegressionModel =
+ {
+ new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(
+ input, initialWeights)
+ }
+
+ /**
+ * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. Each iteration uses
+ * `miniBatchFraction` fraction of the data to calculate the gradient.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @param stepSize Step size to be used for each iteration of gradient descent.
+ * @param regParam Regularization parameter.
+ * @param miniBatchFraction Fraction of data to be used per iteration.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double,
+ miniBatchFraction: Double)
+ : RidgeRegressionModel =
+ {
+ new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
+ }
+
+ /**
+ * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param stepSize Step size to be used for each iteration of Gradient Descent.
+ * @param regParam Regularization parameter.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a RidgeRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int,
+ stepSize: Double,
+ regParam: Double)
+ : RidgeRegressionModel =
+ {
+ train(input, numIterations, stepSize, regParam, 1.0)
+ }
+
+ /**
+ * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
+ *
+ * @param input RDD of (label, array of features) pairs.
+ * @param numIterations Number of iterations of gradient descent to run.
+ * @return a RidgeRegressionModel which has the weights and offset from training.
+ */
+ def train(
+ input: RDD[LabeledPoint],
+ numIterations: Int)
+ : RidgeRegressionModel =
+ {
+ train(input, numIterations, 1.0, 1.0, 1.0)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length != 5) {
+ println("Usage: RidgeRegression <master> <input_dir> <step_size> <regularization_parameter>" +
+ " <niters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "RidgeRegression")
+ val data = MLUtils.loadLabeledData(sc, args(1))
+ val model = RidgeRegressionWithSGD.train(data, args(4).toInt, args(2).toDouble,
+ args(3).toDouble)
+
+ sc.stop()
+ }
+}
diff --git a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index df55be1254..8b55bce7c4 100644
--- a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -15,30 +15,29 @@
* limitations under the License.
*/
-package spark.deploy
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.mapred.JobConf
+package org.apache.spark.mllib.util
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.regression.LabeledPoint
/**
- * Contains util methods to interact with Hadoop from spark.
+ * A collection of methods used to validate data before applying ML algorithms.
*/
-object SparkHadoopUtil {
-
- def getUserNameFromEnvironment(): String = {
- // defaulting to -D ...
- System.getProperty("user.name")
- }
-
- def runAsUser(func: (Product) => Unit, args: Product) {
-
- // Add support, if exists - for now, simply run func !
- func(args)
+object DataValidators extends Logging {
+
+ /**
+ * Function to check if labels used for classification are either zero or one.
+ *
+ * @param data - input data set that needs to be checked
+ *
+ * @return True if labels are all zero or one, false otherwise.
+ */
+ val classificationLabels: RDD[LabeledPoint] => Boolean = { data =>
+ val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
+ if (numInvalid != 0) {
+ logError("Classification labels should be 0 or 1. Found " + numInvalid + " invalid labels")
+ }
+ numInvalid == 0
}
-
- // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
- def newConfiguration(): Configuration = new Configuration()
-
- // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
- def addCredentials(conf: JobConf) {}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
new file mode 100644
index 0000000000..9109189dff
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import scala.util.Random
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+
+/**
+ * Generate test data for KMeans. This class first chooses k cluster centers
+ * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
+ * cluster with scale 1 around each center.
+ */
+
+object KMeansDataGenerator {
+
+ /**
+ * Generate an RDD containing test data for KMeans.
+ *
+ * @param sc SparkContext to use for creating the RDD
+ * @param numPoints Number of points that will be contained in the RDD
+ * @param k Number of clusters
+ * @param d Number of dimensions
+ * @param r Scaling factor for the distribution of the initial centers
+ * @param numPartitions Number of partitions of the generated RDD; default 2
+ */
+ def generateKMeansRDD(
+ sc: SparkContext,
+ numPoints: Int,
+ k: Int,
+ d: Int,
+ r: Double,
+ numPartitions: Int = 2)
+ : RDD[Array[Double]] =
+ {
+ // First, generate some centers
+ val rand = new Random(42)
+ val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r))
+ // Then generate points around each center
+ sc.parallelize(0 until numPoints, numPartitions).map { idx =>
+ val center = centers(idx % k)
+ val rand2 = new Random(42 + idx)
+ Array.tabulate(d)(i => center(i) + rand2.nextGaussian())
+ }
+ }
+
+ def main(args: Array[String]) {
+ if (args.length < 6) {
+ println("Usage: KMeansGenerator " +
+ "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]")
+ System.exit(1)
+ }
+
+ val sparkMaster = args(0)
+ val outputPath = args(1)
+ val numPoints = args(2).toInt
+ val k = args(3).toInt
+ val d = args(4).toInt
+ val r = args(5).toDouble
+ val parts = if (args.length >= 7) args(6).toInt else 2
+
+ val sc = new SparkContext(sparkMaster, "KMeansDataGenerator")
+ val data = generateKMeansRDD(sc, numPoints, k, d, r, parts)
+ data.map(_.mkString(" ")).saveAsTextFile(outputPath)
+
+ System.exit(0)
+ }
+}
+
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
new file mode 100644
index 0000000000..bc5045fb05
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import scala.collection.JavaConversions._
+import scala.util.Random
+
+import org.jblas.DoubleMatrix
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.LabeledPoint
+
+/**
+ * Generate sample data used for Linear Data. This class generates
+ * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
+ * response variable `Y`.
+ */
+object LinearDataGenerator {
+
+ /**
+ * Return a Java List of synthetic data randomly generated according to a multi
+ * collinear model.
+ * @param intercept Data intercept
+ * @param weights Weights to be applied.
+ * @param nPoints Number of points in sample.
+ * @param seed Random seed
+ * @return Java List of input.
+ */
+ def generateLinearInputAsList(
+ intercept: Double,
+ weights: Array[Double],
+ nPoints: Int,
+ seed: Int,
+ eps: Double): java.util.List[LabeledPoint] = {
+ seqAsJavaList(generateLinearInput(intercept, weights, nPoints, seed, eps))
+ }
+
+ /**
+ *
+ * @param intercept Data intercept
+ * @param weights Weights to be applied.
+ * @param nPoints Number of points in sample.
+ * @param seed Random seed
+ * @param eps Epsilon scaling factor.
+ * @return
+ */
+ def generateLinearInput(
+ intercept: Double,
+ weights: Array[Double],
+ nPoints: Int,
+ seed: Int,
+ eps: Double = 0.1): Seq[LabeledPoint] = {
+
+ val rnd = new Random(seed)
+ val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
+ val x = Array.fill[Array[Double]](nPoints)(
+ Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0))
+ val y = x.map { xi =>
+ (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + eps * rnd.nextGaussian()
+ }
+ y.zip(x).map(p => LabeledPoint(p._1, p._2))
+ }
+
+ /**
+ * Generate an RDD containing sample data for Linear Regression models - including Ridge, Lasso,
+ * and uregularized variants.
+ *
+ * @param sc SparkContext to be used for generating the RDD.
+ * @param nexamples Number of examples that will be contained in the RDD.
+ * @param nfeatures Number of features to generate for each example.
+ * @param eps Epsilon factor by which examples are scaled.
+ * @param weights Weights associated with the first weights.length features.
+ * @param nparts Number of partitions in the RDD. Default value is 2.
+ *
+ * @return RDD of LabeledPoint containing sample data.
+ */
+ def generateLinearRDD(
+ sc: SparkContext,
+ nexamples: Int,
+ nfeatures: Int,
+ eps: Double,
+ nparts: Int = 2,
+ intercept: Double = 0.0) : RDD[LabeledPoint] = {
+ org.jblas.util.Random.seed(42)
+ // Random values distributed uniformly in [-0.5, 0.5]
+ val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5)
+
+ val data: RDD[LabeledPoint] = sc.parallelize(0 until nparts, nparts).flatMap { p =>
+ val seed = 42 + p
+ val examplesInPartition = nexamples / nparts
+ generateLinearInput(intercept, w.toArray, examplesInPartition, seed, eps)
+ }
+ data
+ }
+
+ def main(args: Array[String]) {
+ if (args.length < 2) {
+ println("Usage: LinearDataGenerator " +
+ "<master> <output_dir> [num_examples] [num_features] [num_partitions]")
+ System.exit(1)
+ }
+
+ val sparkMaster: String = args(0)
+ val outputPath: String = args(1)
+ val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
+ val nfeatures: Int = if (args.length > 3) args(3).toInt else 100
+ val parts: Int = if (args.length > 4) args(4).toInt else 2
+ val eps = 10
+
+ val sc = new SparkContext(sparkMaster, "LinearDataGenerator")
+ val data = generateLinearRDD(sc, nexamples, nfeatures, eps, nparts = parts)
+
+ MLUtils.saveLabeledData(data, outputPath)
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 8094d22405..52c4a71d62 100644
--- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -15,16 +15,49 @@
* limitations under the License.
*/
-package spark.mllib.regression
+package org.apache.spark.mllib.util
import scala.util.Random
-import org.jblas.DoubleMatrix
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.regression.LabeledPoint
-import spark.{RDD, SparkContext}
-import spark.mllib.util.MLUtils
+/**
+ * Generate test data for LogisticRegression. This class chooses positive labels
+ * with probability `probOne` and scales features for positive examples by `eps`.
+ */
+
+object LogisticRegressionDataGenerator {
+
+ /**
+ * Generate an RDD containing test data for LogisticRegression.
+ *
+ * @param sc SparkContext to use for creating the RDD.
+ * @param nexamples Number of examples that will be contained in the RDD.
+ * @param nfeatures Number of features to generate for each example.
+ * @param eps Epsilon factor by which positive examples are scaled.
+ * @param nparts Number of partitions of the generated RDD. Default value is 2.
+ * @param probOne Probability that a label is 1 (and not 0). Default value is 0.5.
+ */
+ def generateLogisticRDD(
+ sc: SparkContext,
+ nexamples: Int,
+ nfeatures: Int,
+ eps: Double,
+ nparts: Int = 2,
+ probOne: Double = 0.5): RDD[LabeledPoint] = {
+ val data = sc.parallelize(0 until nexamples, nparts).map { idx =>
+ val rnd = new Random(42 + idx)
-object LogisticRegressionGenerator {
+ val y = if (idx % 2 == 0) 0.0 else 1.0
+ val x = Array.fill[Double](nfeatures) {
+ rnd.nextGaussian() + (y * eps)
+ }
+ LabeledPoint(y, x)
+ }
+ data
+ }
def main(args: Array[String]) {
if (args.length != 5) {
@@ -40,17 +73,8 @@ object LogisticRegressionGenerator {
val parts: Int = if (args.length > 4) args(4).toInt else 2
val eps = 3
- val sc = new SparkContext(sparkMaster, "LogisticRegressionGenerator")
-
- val data: RDD[(Double, Array[Double])] = sc.parallelize(0 until nexamples, parts).map { idx =>
- val rnd = new Random(42 + idx)
-
- val y = if (idx % 2 == 0) 0 else 1
- val x = Array.fill[Double](nfeatures) {
- rnd.nextGaussian() + (y * eps)
- }
- (y, x)
- }
+ val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator")
+ val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts)
MLUtils.saveLabeledData(data, outputPath)
sc.stop()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
new file mode 100644
index 0000000000..5aec867257
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.recommendation
+
+import scala.util.Random
+
+import org.jblas.DoubleMatrix
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+* Generate RDD(s) containing data for Matrix Factorization.
+*
+* This method samples training entries according to the oversampling factor
+* 'trainSampFact', which is a multiplicative factor of the number of
+* degrees of freedom of the matrix: rank*(m+n-rank).
+*
+* It optionally samples entries for a testing matrix using
+* 'testSampFact', the percentage of the number of training entries
+* to use for testing.
+*
+* This method takes the following inputs:
+* sparkMaster (String) The master URL.
+* outputPath (String) Directory to save output.
+* m (Int) Number of rows in data matrix.
+* n (Int) Number of columns in data matrix.
+* rank (Int) Underlying rank of data matrix.
+* trainSampFact (Double) Oversampling factor.
+* noise (Boolean) Whether to add gaussian noise to training data.
+* sigma (Double) Standard deviation of added gaussian noise.
+* test (Boolean) Whether to create testing RDD.
+* testSampFact (Double) Percentage of training data to use as test data.
+*/
+
+object MFDataGenerator{
+
+ def main(args: Array[String]) {
+ if (args.length < 2) {
+ println("Usage: MFDataGenerator " +
+ "<master> <outputDir> [m] [n] [rank] [trainSampFact] [noise] [sigma] [test] [testSampFact]")
+ System.exit(1)
+ }
+
+ val sparkMaster: String = args(0)
+ val outputPath: String = args(1)
+ val m: Int = if (args.length > 2) args(2).toInt else 100
+ val n: Int = if (args.length > 3) args(3).toInt else 100
+ val rank: Int = if (args.length > 4) args(4).toInt else 10
+ val trainSampFact: Double = if (args.length > 5) args(5).toDouble else 1.0
+ val noise: Boolean = if (args.length > 6) args(6).toBoolean else false
+ val sigma: Double = if (args.length > 7) args(7).toDouble else 0.1
+ val test: Boolean = if (args.length > 8) args(8).toBoolean else false
+ val testSampFact: Double = if (args.length > 9) args(9).toDouble else 0.1
+
+ val sc = new SparkContext(sparkMaster, "MFDataGenerator")
+
+ val A = DoubleMatrix.randn(m, rank)
+ val B = DoubleMatrix.randn(rank, n)
+ val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank)))
+ A.mmuli(z)
+ B.mmuli(z)
+ val fullData = A.mmul(B)
+
+ val df = rank * (m + n - rank)
+ val sampSize = scala.math.min(scala.math.round(trainSampFact * df),
+ scala.math.round(.99 * m * n)).toInt
+ val rand = new Random()
+ val mn = m * n
+ val shuffled = rand.shuffle(1 to mn toIterable)
+
+ val omega = shuffled.slice(0, sampSize)
+ val ordered = omega.sortWith(_ < _).toArray
+ val trainData: RDD[(Int, Int, Double)] = sc.parallelize(ordered)
+ .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
+
+ // optionally add gaussian noise
+ if (noise) {
+ trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma))
+ }
+
+ trainData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)
+
+ // optionally generate testing data
+ if (test) {
+ val testSampSize = scala.math
+ .min(scala.math.round(sampSize * testSampFact),scala.math.round(mn - sampSize)).toInt
+ val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
+ val testOrdered = testOmega.sortWith(_ < _).toArray
+ val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)
+ .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
+ testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)
+ }
+
+ sc.stop()
+
+ }
+}
diff --git a/mllib/src/main/scala/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index b5e564df6d..d91b74c3ac 100644
--- a/mllib/src/main/scala/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -15,38 +15,49 @@
* limitations under the License.
*/
-package spark.mllib.util
+package org.apache.spark.mllib.util
-import spark.{RDD, SparkContext}
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
import org.jblas.DoubleMatrix
+import org.apache.spark.mllib.regression.LabeledPoint
/**
- * Helper methods to load and save data
- * Data format:
- * <l>, <f1> <f2> ...
- * where <f1>, <f2> are feature values in Double and <l> is the corresponding label as Double.
+ * Helper methods to load, save and pre-process data used in ML Lib.
*/
object MLUtils {
/**
+ * Load labeled data from a file. The data format used here is
+ * <L>, <f1> <f2> ...
+ * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+ *
* @param sc SparkContext
* @param dir Directory to the input data files.
- * @return An RDD of tuples. For each tuple, the first element is the label, and the second
- * element represents the feature values (an array of Double).
+ * @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is
+ * the label, and the second element represents the feature values (an array of Double).
*/
- def loadLabeledData(sc: SparkContext, dir: String): RDD[(Double, Array[Double])] = {
+ def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
sc.textFile(dir).map { line =>
- val parts = line.split(",")
+ val parts = line.split(',')
val label = parts(0).toDouble
- val features = parts(1).trim().split(" ").map(_.toDouble)
- (label, features)
+ val features = parts(1).trim().split(' ').map(_.toDouble)
+ LabeledPoint(label, features)
}
}
- def saveLabeledData(data: RDD[(Double, Array[Double])], dir: String) {
- val dataStr = data.map(x => x._1 + "," + x._2.mkString(" "))
+ /**
+ * Save labeled data to a file. The data format used here is
+ * <L>, <f1> <f2> ...
+ * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+ *
+ * @param data An RDD of LabeledPoints containing data to be saved.
+ * @param dir Directory to save the data.
+ */
+ def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
+ val dataStr = data.map(x => x.label + "," + x.features.mkString(" "))
dataStr.saveAsTextFile(dir)
}
@@ -62,16 +73,16 @@ object MLUtils {
* xColMean - Row vector with mean for every column (or feature) of the input data
* xColSd - Row vector standard deviation for every column (or feature) of the input data.
*/
- def computeStats(data: RDD[(Double, Array[Double])], nfeatures: Int, nexamples: Long):
+ def computeStats(data: RDD[LabeledPoint], nfeatures: Int, nexamples: Long):
(Double, DoubleMatrix, DoubleMatrix) = {
- val yMean: Double = data.map { case (y, features) => y }.reduce(_ + _) / nexamples
+ val yMean: Double = data.map { labeledPoint => labeledPoint.label }.reduce(_ + _) / nexamples
// NOTE: We shuffle X by column here to compute column sum and sum of squares.
- val xColSumSq: RDD[(Int, (Double, Double))] = data.flatMap { case(y, features) =>
- val nCols = features.length
+ val xColSumSq: RDD[(Int, (Double, Double))] = data.flatMap { labeledPoint =>
+ val nCols = labeledPoint.features.length
// Traverse over every column and emit (col, value, value^2)
Iterator.tabulate(nCols) { i =>
- (i, (features(i), features(i)*features(i)))
+ (i, (labeledPoint.features(i), labeledPoint.features(i)*labeledPoint.features(i)))
}
}.reduceByKey { case(x1, x2) =>
(x1._1 + x2._1, x1._2 + x2._2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
new file mode 100644
index 0000000000..6e9f667635
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -0,0 +1,51 @@
+package org.apache.spark.mllib.util
+
+import scala.util.Random
+
+import org.jblas.DoubleMatrix
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.regression.LabeledPoint
+
+/**
+ * Generate sample data used for SVM. This class generates uniform random values
+ * for the features and adds Gaussian noise with weight 0.1 to generate labels.
+ */
+object SVMDataGenerator {
+
+ def main(args: Array[String]) {
+ if (args.length < 2) {
+ println("Usage: SVMGenerator " +
+ "<master> <output_dir> [num_examples] [num_features] [num_partitions]")
+ System.exit(1)
+ }
+
+ val sparkMaster: String = args(0)
+ val outputPath: String = args(1)
+ val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
+ val nfeatures: Int = if (args.length > 3) args(3).toInt else 2
+ val parts: Int = if (args.length > 4) args(4).toInt else 2
+
+ val sc = new SparkContext(sparkMaster, "SVMGenerator")
+
+ val globalRnd = new Random(94720)
+ val trueWeights = new DoubleMatrix(1, nfeatures + 1,
+ Array.fill[Double](nfeatures + 1)(globalRnd.nextGaussian()):_*)
+
+ val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx =>
+ val rnd = new Random(42 + idx)
+
+ val x = Array.fill[Double](nfeatures) {
+ rnd.nextDouble() * 2.0 - 1.0
+ }
+ val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + rnd.nextGaussian() * 0.1
+ val y = if (yD < 0) 0.0 else 1.0
+ LabeledPoint(y, x)
+ }
+
+ MLUtils.saveLabeledData(data, outputPath)
+
+ sc.stop()
+ }
+}
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala
deleted file mode 100644
index 2fb0c8136f..0000000000
--- a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.optimization
-
-import org.jblas.DoubleMatrix
-
-abstract class Gradient extends Serializable {
- /**
- * Compute the gradient for a given row of data.
- *
- * @param data - One row of data. Row matrix of size 1xn where n is the number of features.
- * @param label - Label for this data item.
- * @param weights - Column matrix containing weights for every feature.
- */
- def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
- (DoubleMatrix, Double)
-}
-
-class LogisticGradient extends Gradient {
- override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix):
- (DoubleMatrix, Double) = {
- val margin: Double = -1.0 * data.dot(weights)
- val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-
- val gradient = data.mul(gradientMultiplier)
- val loss =
- if (margin > 0) {
- math.log(1 + math.exp(0 - margin))
- } else {
- math.log(1 + math.exp(margin)) - margin
- }
-
- (gradient, loss)
- }
-}
diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
deleted file mode 100644
index e1b73bc25e..0000000000
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.optimization
-
-import spark.{Logging, RDD, SparkContext}
-import spark.SparkContext._
-
-import org.jblas.DoubleMatrix
-
-import scala.collection.mutable.ArrayBuffer
-
-
-object GradientDescent {
-
- /**
- * Run gradient descent in parallel using mini batches.
- * Based on Matlab code written by John Duchi.
- *
- * @param data - Input data for SGD. RDD of form (label, [feature values]).
- * @param gradient - Gradient object that will be used to compute the gradient.
- * @param updater - Updater object that will be used to update the model.
- * @param stepSize - stepSize to be used during update.
- * @param numIters - number of iterations that SGD should be run.
- * @param miniBatchFraction - fraction of the input data set that should be used for
- * one iteration of SGD. Default value 1.0.
- *
- * @return weights - Column matrix containing weights for every feature.
- * @return lossHistory - Array containing the loss computed for every iteration.
- */
- def runMiniBatchSGD(
- data: RDD[(Double, Array[Double])],
- gradient: Gradient,
- updater: Updater,
- stepSize: Double,
- numIters: Int,
- miniBatchFraction: Double=1.0) : (DoubleMatrix, Array[Double]) = {
-
- val lossHistory = new ArrayBuffer[Double](numIters)
-
- val nfeatures: Int = data.take(1)(0)._2.length
- val nexamples: Long = data.count()
- val miniBatchSize = nexamples * miniBatchFraction
-
- // Initialize weights as a column matrix
- var weights = DoubleMatrix.ones(nfeatures)
- var reg_val = 0.0
-
- for (i <- 1 to numIters) {
- val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map {
- case (y, features) =>
- val featuresRow = new DoubleMatrix(features.length, 1, features:_*)
- val (grad, loss) = gradient.compute(featuresRow, y, weights)
- (grad, loss)
- }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
-
- lossHistory.append(lossSum / miniBatchSize + reg_val)
- val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i)
- weights = update._1
- reg_val = update._2
- }
-
- (weights, lossHistory.toArray)
- }
-}
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
deleted file mode 100644
index b864fd4634..0000000000
--- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.optimization
-
-import org.jblas.DoubleMatrix
-
-abstract class Updater extends Serializable {
- /**
- * Compute an updated value for weights given the gradient, stepSize and iteration number.
- *
- * @param weightsOld - Column matrix of size nx1 where n is the number of features.
- * @param gradient - Column matrix of size nx1 where n is the number of features.
- * @param stepSize - step size across iterations
- * @param iter - Iteration number
- *
- * @return weightsNew - Column matrix containing updated weights
- * @return reg_val - regularization value
- */
- def compute(weightsOlds: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int):
- (DoubleMatrix, Double)
-}
-
-class SimpleUpdater extends Updater {
- override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
- stepSize: Double, iter: Int): (DoubleMatrix, Double) = {
- val normGradient = gradient.mul(stepSize / math.sqrt(iter))
- (weightsOld.sub(normGradient), 0)
- }
-}
diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
deleted file mode 100644
index bb294c2257..0000000000
--- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.regression
-
-import spark.{Logging, RDD, SparkContext}
-import spark.mllib.optimization._
-import spark.mllib.util.MLUtils
-
-import org.jblas.DoubleMatrix
-
-/**
- * Logistic Regression using Stochastic Gradient Descent.
- * Based on Matlab code written by John Duchi.
- */
-class LogisticRegressionModel(
- val weights: DoubleMatrix,
- val intercept: Double,
- val losses: Array[Double]) extends RegressionModel {
-
- override def predict(testData: spark.RDD[Array[Double]]) = {
- testData.map { x =>
- val margin = new DoubleMatrix(1, x.length, x:_*).mmul(this.weights).get(0) + this.intercept
- 1.0/ (1.0 + math.exp(margin * -1))
- }
- }
-
- override def predict(testData: Array[Double]): Double = {
- val dataMat = new DoubleMatrix(1, testData.length, testData:_*)
- val margin = dataMat.mmul(this.weights).get(0) + this.intercept
- 1.0/ (1.0 + math.exp(margin * -1))
- }
-}
-
-class LogisticRegression private (var stepSize: Double, var miniBatchFraction: Double,
- var numIters: Int)
- extends Logging {
-
- /**
- * Construct a LogisticRegression object with default parameters
- */
- def this() = this(1.0, 1.0, 100)
-
- /**
- * Set the step size per-iteration of SGD. Default 1.0.
- */
- def setStepSize(step: Double) = {
- this.stepSize = step
- this
- }
-
- /**
- * Set fraction of data to be used for each SGD iteration. Default 1.0.
- */
- def setMiniBatchFraction(fraction: Double) = {
- this.miniBatchFraction = fraction
- this
- }
-
- /**
- * Set the number of iterations for SGD. Default 100.
- */
- def setNumIterations(iters: Int) = {
- this.numIters = iters
- this
- }
-
- def train(input: RDD[(Double, Array[Double])]): LogisticRegressionModel = {
- // Add a extra variable consisting of all 1.0's for the intercept.
- val data = input.map { case (y, features) =>
- (y, Array(1.0, features:_*))
- }
-
- val (weights, losses) = GradientDescent.runMiniBatchSGD(
- data, new LogisticGradient(), new SimpleUpdater(), stepSize, numIters, miniBatchFraction)
-
- val weightsScaled = weights.getRange(1, weights.length)
- val intercept = weights.get(0)
-
- val model = new LogisticRegressionModel(weightsScaled, intercept, losses)
-
- logInfo("Final model weights " + model.weights)
- logInfo("Final model intercept " + model.intercept)
- logInfo("Last 10 losses " + model.losses.takeRight(10).mkString(", "))
- model
- }
-}
-
-/**
- * Top-level methods for calling Logistic Regression.
- */
-object LogisticRegression {
-
- /**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. Each iteration uses
- * `miniBatchFraction` fraction of the data to calculate the gradient.
- *
- * @param input RDD of (label, array of features) pairs.
- * @param numIterations Number of iterations of gradient descent to run.
- * @param stepSize Step size to be used for each iteration of gradient descent.
- * @param miniBatchFraction Fraction of data to be used per iteration.
- */
- def train(
- input: RDD[(Double, Array[Double])],
- numIterations: Int,
- stepSize: Double,
- miniBatchFraction: Double)
- : LogisticRegressionModel =
- {
- new LogisticRegression(stepSize, miniBatchFraction, numIterations).train(input)
- }
-
- /**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. We use the entire data set to update
- * the gradient in each iteration.
- *
- * @param input RDD of (label, array of features) pairs.
- * @param stepSize Step size to be used for each iteration of Gradient Descent.
- * @param numIterations Number of iterations of gradient descent to run.
- * @return a LogisticRegressionModel which has the weights and offset from training.
- */
- def train(
- input: RDD[(Double, Array[Double])],
- numIterations: Int,
- stepSize: Double)
- : LogisticRegressionModel =
- {
- train(input, numIterations, stepSize, 1.0)
- }
-
- /**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
- * the gradient in each iteration.
- *
- * @param input RDD of (label, array of features) pairs.
- * @param numIterations Number of iterations of gradient descent to run.
- * @return a LogisticRegressionModel which has the weights and offset from training.
- */
- def train(
- input: RDD[(Double, Array[Double])],
- numIterations: Int)
- : LogisticRegressionModel =
- {
- train(input, numIterations, 1.0, 1.0)
- }
-
- def main(args: Array[String]) {
- if (args.length != 4) {
- println("Usage: LogisticRegression <master> <input_dir> <step_size> <niters>")
- System.exit(1)
- }
- val sc = new SparkContext(args(0), "LogisticRegression")
- val data = MLUtils.loadLabeledData(sc, args(1))
- val model = LogisticRegression.train(data, args(3).toInt, args(2).toDouble)
-
- sc.stop()
- }
-}
diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
deleted file mode 100644
index 7c7f912b43..0000000000
--- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.regression
-
-import spark.{Logging, RDD, SparkContext}
-import spark.mllib.util.MLUtils
-
-import org.jblas.DoubleMatrix
-import org.jblas.Solve
-
-import scala.annotation.tailrec
-import scala.collection.mutable
-
-/**
- * Ridge Regression from Joseph Gonzalez's implementation in MLBase
- */
-class RidgeRegressionModel(
- val weights: DoubleMatrix,
- val intercept: Double,
- val lambdaOpt: Double,
- val lambdas: Seq[(Double, Double, DoubleMatrix)])
- extends RegressionModel {
-
- override def predict(testData: RDD[Array[Double]]): RDD[Double] = {
- testData.map { x =>
- (new DoubleMatrix(1, x.length, x:_*).mmul(this.weights)).get(0) + this.intercept
- }
- }
-
- override def predict(testData: Array[Double]): Double = {
- (new DoubleMatrix(1, testData.length, testData:_*).mmul(this.weights)).get(0) + this.intercept
- }
-}
-
-class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double)
- extends Logging {
-
- def this() = this(0.0, 100.0)
-
- /**
- * Set the lower bound on binary search for lambda's. Default is 0.
- */
- def setLowLambda(low: Double) = {
- this.lambdaLow = low
- this
- }
-
- /**
- * Set the upper bound on binary search for lambda's. Default is 100.0.
- */
- def setHighLambda(hi: Double) = {
- this.lambdaHigh = hi
- this
- }
-
- def train(input: RDD[(Double, Array[Double])]): RidgeRegressionModel = {
- val nfeatures: Int = input.take(1)(0)._2.length
- val nexamples: Long = input.count()
-
- val (yMean, xColMean, xColSd) = MLUtils.computeStats(input, nfeatures, nexamples)
-
- val data = input.map { case(y, features) =>
- val yNormalized = y - yMean
- val featuresMat = new DoubleMatrix(nfeatures, 1, features:_*)
- val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd)
- (yNormalized, featuresNormalized.toArray)
- }
-
- // Compute XtX - Size of XtX is nfeatures by nfeatures
- val XtX: DoubleMatrix = data.map { case (y, features) =>
- val x = new DoubleMatrix(1, features.length, features:_*)
- x.transpose().mmul(x)
- }.reduce(_.addi(_))
-
- // Compute Xt*y - Size of Xty is nfeatures by 1
- val Xty: DoubleMatrix = data.map { case (y, features) =>
- new DoubleMatrix(features.length, 1, features:_*).mul(y)
- }.reduce(_.addi(_))
-
- // Define a function to compute the leave one out cross validation error
- // for a single example
- def crossValidate(lambda: Double): (Double, Double, DoubleMatrix) = {
- // Compute the MLE ridge regression parameter value
-
- // Ridge Regression parameter = inv(XtX + \lambda*I) * Xty
- val XtXlambda = DoubleMatrix.eye(nfeatures).muli(lambda).addi(XtX)
- val w = Solve.solveSymmetric(XtXlambda, Xty)
-
- val invXtX = Solve.solveSymmetric(XtXlambda, DoubleMatrix.eye(nfeatures))
-
- // compute the generalized cross validation score
- val cverror = data.map {
- case (y, features) =>
- val x = new DoubleMatrix(features.length, 1, features:_*)
- val yhat = w.transpose().mmul(x).get(0)
- val H_ii = x.transpose().mmul(invXtX).mmul(x).get(0)
- val residual = (y - yhat) / (1.0 - H_ii)
- residual * residual
- }.reduce(_ + _) / nexamples
-
- (lambda, cverror, w)
- }
-
- // Binary search for the best assignment to lambda.
- def binSearch(low: Double, high: Double): Seq[(Double, Double, DoubleMatrix)] = {
- val buffer = mutable.ListBuffer.empty[(Double, Double, DoubleMatrix)]
-
- @tailrec
- def loop(low: Double, high: Double): Seq[(Double, Double, DoubleMatrix)] = {
- val mid = (high - low) / 2 + low
- val lowValue = crossValidate((mid - low) / 2 + low)
- val highValue = crossValidate((high - mid) / 2 + mid)
- val (newLow, newHigh) = if (lowValue._2 < highValue._2) {
- (low, mid + (high-low)/4)
- } else {
- (mid - (high-low)/4, high)
- }
- if (newHigh - newLow > 1.0E-7) {
- buffer += lowValue += highValue
- loop(newLow, newHigh)
- } else {
- buffer += lowValue += highValue
- buffer.result()
- }
- }
-
- loop(low, high)
- }
-
- // Actually compute the best lambda
- val lambdas = binSearch(lambdaLow, lambdaHigh).sortBy(_._1)
-
- // Find the best parameter set by taking the lowest cverror.
- val (lambdaOpt, cverror, weights) = lambdas.reduce((a, b) => if (a._2 < b._2) a else b)
-
- // Return the model which contains the solution
- val weightsScaled = weights.div(xColSd)
- val intercept = yMean - (weights.transpose().mmul(xColMean.div(xColSd)).get(0))
- val model = new RidgeRegressionModel(weightsScaled, intercept, lambdaOpt, lambdas)
-
- logInfo("RidgeRegression: optimal lambda " + model.lambdaOpt)
- logInfo("RidgeRegression: optimal weights " + model.weights)
- logInfo("RidgeRegression: optimal intercept " + model.intercept)
- logInfo("RidgeRegression: cross-validation error " + cverror)
-
- model
- }
-}
-
-/**
- * Top-level methods for calling Ridge Regression.
- */
-object RidgeRegression {
-
- /**
- * Train a ridge regression model given an RDD of (response, features) pairs.
- * We use the closed form solution to compute the cross-validation score for
- * a given lambda. The optimal lambda is computed by performing binary search
- * between the provided bounds of lambda.
- *
- * @param input RDD of (response, array of features) pairs.
- * @param lambdaLow lower bound used in binary search for lambda
- * @param lambdaHigh upper bound used in binary search for lambda
- */
- def train(
- input: RDD[(Double, Array[Double])],
- lambdaLow: Double,
- lambdaHigh: Double)
- : RidgeRegressionModel =
- {
- new RidgeRegression(lambdaLow, lambdaHigh).train(input)
- }
-
- /**
- * Train a ridge regression model given an RDD of (response, features) pairs.
- * We use the closed form solution to compute the cross-validation score for
- * a given lambda. The optimal lambda is computed by performing binary search
- * between lambda values of 0 and 100.
- *
- * @param input RDD of (response, array of features) pairs.
- */
- def train(input: RDD[(Double, Array[Double])]) : RidgeRegressionModel = {
- train(input, 0.0, 100.0)
- }
-
- def main(args: Array[String]) {
- if (args.length != 2) {
- println("Usage: RidgeRegression <master> <input_dir>")
- System.exit(1)
- }
- val sc = new SparkContext(args(0), "RidgeRegression")
- val data = MLUtils.loadLabeledData(sc, args(1))
- val model = RidgeRegression.train(data, 0, 1000)
- sc.stop()
- }
-}
diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala
deleted file mode 100644
index c2260ae286..0000000000
--- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.regression
-
-import scala.util.Random
-
-import org.jblas.DoubleMatrix
-
-import spark.{RDD, SparkContext}
-import spark.mllib.util.MLUtils
-
-
-object RidgeRegressionGenerator {
-
- def main(args: Array[String]) {
- if (args.length != 5) {
- println("Usage: RidgeRegressionGenerator " +
- "<master> <output_dir> <num_examples> <num_features> <num_partitions>")
- System.exit(1)
- }
-
- val sparkMaster: String = args(0)
- val outputPath: String = args(1)
- val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
- val nfeatures: Int = if (args.length > 3) args(3).toInt else 100
- val parts: Int = if (args.length > 4) args(4).toInt else 2
- val eps = 10
-
- org.jblas.util.Random.seed(42)
- val sc = new SparkContext(sparkMaster, "RidgeRegressionGenerator")
-
- // Random values distributed uniformly in [-0.5, 0.5]
- val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5)
- w.put(0, 0, 10)
- w.put(1, 0, 10)
-
- val data: RDD[(Double, Array[Double])] = sc.parallelize(0 until parts, parts).flatMap { p =>
- org.jblas.util.Random.seed(42 + p)
- val examplesInPartition = nexamples / parts
-
- val X = DoubleMatrix.rand(examplesInPartition, nfeatures)
- val y = X.mmul(w)
-
- val rnd = new Random(42 + p)
-
- val normalValues = Array.fill[Double](examplesInPartition)(rnd.nextGaussian() * eps)
- val yObs = new DoubleMatrix(normalValues).addi(y)
-
- Iterator.tabulate(examplesInPartition) { i =>
- (yObs.get(i, 0), X.getRow(i).toArray)
- }
- }
-
- MLUtils.saveLabeledData(data, outputPath)
- sc.stop()
- }
-}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
new file mode 100644
index 0000000000..e18e3bc6a8
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+public class JavaLogisticRegressionSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaLogisticRegressionSuite");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ int validatePrediction(List<LabeledPoint> validationData, LogisticRegressionModel model) {
+ int numAccurate = 0;
+ for (LabeledPoint point: validationData) {
+ Double prediction = model.predict(point.features());
+ if (prediction == point.label()) {
+ numAccurate++;
+ }
+ }
+ return numAccurate;
+ }
+
+ @Test
+ public void runLRUsingConstructor() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double B = -1.5;
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(
+ LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 42), 2).cache();
+ List<LabeledPoint> validationData =
+ LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
+
+ LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD();
+ lrImpl.optimizer().setStepSize(1.0)
+ .setRegParam(1.0)
+ .setNumIterations(100);
+ LogisticRegressionModel model = lrImpl.run(testRDD.rdd());
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+ @Test
+ public void runLRUsingStaticMethods() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double B = -1.5;
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(
+ LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 42), 2).cache();
+ List<LabeledPoint> validationData =
+ LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
+
+ LogisticRegressionModel model = LogisticRegressionWithSGD.train(
+ testRDD.rdd(), 100, 1.0, 1.0);
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java
new file mode 100644
index 0000000000..117e5eaa8b
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification;
+
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+public class JavaSVMSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaSVMSuite");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ int validatePrediction(List<LabeledPoint> validationData, SVMModel model) {
+ int numAccurate = 0;
+ for (LabeledPoint point: validationData) {
+ Double prediction = model.predict(point.features());
+ if (prediction == point.label()) {
+ numAccurate++;
+ }
+ }
+ return numAccurate;
+ }
+
+ @Test
+ public void runSVMUsingConstructor() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double[] weights = {-1.5, 1.0};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(SVMSuite.generateSVMInputAsList(A,
+ weights, nPoints, 42), 2).cache();
+ List<LabeledPoint> validationData =
+ SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17);
+
+ SVMWithSGD svmSGDImpl = new SVMWithSGD();
+ svmSGDImpl.optimizer().setStepSize(1.0)
+ .setRegParam(1.0)
+ .setNumIterations(100);
+ SVMModel model = svmSGDImpl.run(testRDD.rdd());
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+ @Test
+ public void runSVMUsingStaticMethods() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double[] weights = {-1.5, 1.0};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(SVMSuite.generateSVMInputAsList(A,
+ weights, nPoints, 42), 2).cache();
+ List<LabeledPoint> validationData =
+ SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17);
+
+ SVMModel model = SVMWithSGD.train(testRDD.rdd(), 100, 1.0, 1.0, 1.0);
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
new file mode 100644
index 0000000000..32d3934ac1
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaKMeansSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaKMeans");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ // L1 distance between two points
+ double distance1(double[] v1, double[] v2) {
+ double distance = 0.0;
+ for (int i = 0; i < v1.length; ++i) {
+ distance = Math.max(distance, Math.abs(v1[i] - v2[i]));
+ }
+ return distance;
+ }
+
+ // Assert that two sets of points are equal, within EPSILON tolerance
+ void assertSetsEqual(double[][] v1, double[][] v2) {
+ double EPSILON = 1e-4;
+ Assert.assertTrue(v1.length == v2.length);
+ for (int i = 0; i < v1.length; ++i) {
+ double minDistance = Double.MAX_VALUE;
+ for (int j = 0; j < v2.length; ++j) {
+ minDistance = Math.min(minDistance, distance1(v1[i], v2[j]));
+ }
+ Assert.assertTrue(minDistance <= EPSILON);
+ }
+
+ for (int i = 0; i < v2.length; ++i) {
+ double minDistance = Double.MAX_VALUE;
+ for (int j = 0; j < v1.length; ++j) {
+ minDistance = Math.min(minDistance, distance1(v2[i], v1[j]));
+ }
+ Assert.assertTrue(minDistance <= EPSILON);
+ }
+ }
+
+
+ @Test
+ public void runKMeansUsingStaticMethods() {
+ List<double[]> points = new ArrayList();
+ points.add(new double[]{1.0, 2.0, 6.0});
+ points.add(new double[]{1.0, 3.0, 0.0});
+ points.add(new double[]{1.0, 4.0, 6.0});
+
+ double[][] expectedCenter = { {1.0, 3.0, 4.0} };
+
+ JavaRDD<double[]> data = sc.parallelize(points, 2);
+ KMeansModel model = KMeans.train(data.rdd(), 1, 1);
+ assertSetsEqual(model.clusterCenters(), expectedCenter);
+
+ model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
+ assertSetsEqual(model.clusterCenters(), expectedCenter);
+ }
+
+ @Test
+ public void runKMeansUsingConstructor() {
+ List<double[]> points = new ArrayList();
+ points.add(new double[]{1.0, 2.0, 6.0});
+ points.add(new double[]{1.0, 3.0, 0.0});
+ points.add(new double[]{1.0, 4.0, 6.0});
+
+ double[][] expectedCenter = { {1.0, 3.0, 4.0} };
+
+ JavaRDD<double[]> data = sc.parallelize(points, 2);
+ KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
+ assertSetsEqual(model.clusterCenters(), expectedCenter);
+
+ model = new KMeans().setK(1)
+ .setMaxIterations(1)
+ .setRuns(1)
+ .setInitializationMode(KMeans.RANDOM())
+ .run(data.rdd());
+ assertSetsEqual(model.clusterCenters(), expectedCenter);
+ }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
new file mode 100644
index 0000000000..3323f6cee2
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.recommendation;
+
+import java.io.Serializable;
+import java.util.List;
+
+import scala.Tuple2;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import org.jblas.DoubleMatrix;
+
+public class JavaALSSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaALS");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ void validatePrediction(MatrixFactorizationModel model, int users, int products, int features,
+ DoubleMatrix trueRatings, double matchThreshold) {
+ DoubleMatrix predictedU = new DoubleMatrix(users, features);
+ List<scala.Tuple2<Object, double[]>> userFeatures = model.userFeatures().toJavaRDD().collect();
+ for (int i = 0; i < features; ++i) {
+ for (scala.Tuple2<Object, double[]> userFeature : userFeatures) {
+ predictedU.put((Integer)userFeature._1(), i, userFeature._2()[i]);
+ }
+ }
+ DoubleMatrix predictedP = new DoubleMatrix(products, features);
+
+ List<scala.Tuple2<Object, double[]>> productFeatures =
+ model.productFeatures().toJavaRDD().collect();
+ for (int i = 0; i < features; ++i) {
+ for (scala.Tuple2<Object, double[]> productFeature : productFeatures) {
+ predictedP.put((Integer)productFeature._1(), i, productFeature._2()[i]);
+ }
+ }
+
+ DoubleMatrix predictedRatings = predictedU.mmul(predictedP.transpose());
+
+ for (int u = 0; u < users; ++u) {
+ for (int p = 0; p < products; ++p) {
+ double prediction = predictedRatings.get(u, p);
+ double correct = trueRatings.get(u, p);
+ Assert.assertTrue(Math.abs(prediction - correct) < matchThreshold);
+ }
+ }
+ }
+
+ @Test
+ public void runALSUsingStaticMethods() {
+ int features = 1;
+ int iterations = 15;
+ int users = 10;
+ int products = 10;
+ scala.Tuple2<List<Rating>, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+ users, products, features, 0.7);
+
+ JavaRDD<Rating> data = sc.parallelize(testData._1());
+ MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
+ validatePrediction(model, users, products, features, testData._2(), 0.3);
+ }
+
+ @Test
+ public void runALSUsingConstructor() {
+ int features = 2;
+ int iterations = 15;
+ int users = 20;
+ int products = 30;
+ scala.Tuple2<List<Rating>, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+ users, products, features, 0.7);
+
+ JavaRDD<Rating> data = sc.parallelize(testData._1());
+
+ MatrixFactorizationModel model = new ALS().setRank(features)
+ .setIterations(iterations)
+ .run(data.rdd());
+ validatePrediction(model, users, products, features, testData._2(), 0.3);
+ }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
new file mode 100644
index 0000000000..f44b25cd44
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.util.LinearDataGenerator;
+
+public class JavaLassoSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaLassoSuite");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ int validatePrediction(List<LabeledPoint> validationData, LassoModel model) {
+ int numAccurate = 0;
+ for (LabeledPoint point: validationData) {
+ Double prediction = model.predict(point.features());
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ if (Math.abs(prediction - point.label()) <= 0.5) {
+ numAccurate++;
+ }
+ }
+ return numAccurate;
+ }
+
+ @Test
+ public void runLassoUsingConstructor() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double[] weights = {-1.5, 1.0e-2};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
+ weights, nPoints, 42, 0.1), 2).cache();
+ List<LabeledPoint> validationData =
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
+
+ LassoWithSGD lassoSGDImpl = new LassoWithSGD();
+ lassoSGDImpl.optimizer().setStepSize(1.0)
+ .setRegParam(0.01)
+ .setNumIterations(20);
+ LassoModel model = lassoSGDImpl.run(testRDD.rdd());
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+ @Test
+ public void runLassoUsingStaticMethods() {
+ int nPoints = 10000;
+ double A = 2.0;
+ double[] weights = {-1.5, 1.0e-2};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
+ weights, nPoints, 42, 0.1), 2).cache();
+ List<LabeledPoint> validationData =
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
+
+ LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
new file mode 100644
index 0000000000..5a4410a632
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.util.LinearDataGenerator;
+
+public class JavaLinearRegressionSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaLinearRegressionSuite");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ int validatePrediction(List<LabeledPoint> validationData, LinearRegressionModel model) {
+ int numAccurate = 0;
+ for (LabeledPoint point: validationData) {
+ Double prediction = model.predict(point.features());
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ if (Math.abs(prediction - point.label()) <= 0.5) {
+ numAccurate++;
+ }
+ }
+ return numAccurate;
+ }
+
+ @Test
+ public void runLinearRegressionUsingConstructor() {
+ int nPoints = 100;
+ double A = 3.0;
+ double[] weights = {10, 10};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache();
+ List<LabeledPoint> validationData =
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
+
+ LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD();
+ LinearRegressionModel model = linSGDImpl.run(testRDD.rdd());
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+ @Test
+ public void runLinearRegressionUsingStaticMethods() {
+ int nPoints = 100;
+ double A = 3.0;
+ double[] weights = {10, 10};
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache();
+ List<LabeledPoint> validationData =
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
+
+ LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100);
+
+ int numAccurate = validatePrediction(validationData, model);
+ Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
+ }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
new file mode 100644
index 0000000000..2fdd5fc8fd
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.jblas.DoubleMatrix;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.util.LinearDataGenerator;
+
+public class JavaRidgeRegressionSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaRidgeRegressionSuite");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ System.clearProperty("spark.driver.port");
+ }
+
+ double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
+ double errorSum = 0;
+ for (LabeledPoint point: validationData) {
+ Double prediction = model.predict(point.features());
+ errorSum += (prediction - point.label()) * (prediction - point.label());
+ }
+ return errorSum / validationData.size();
+ }
+
+ List<LabeledPoint> generateRidgeData(int numPoints, int nfeatures, double eps) {
+ org.jblas.util.Random.seed(42);
+ // Pick weights as random values distributed uniformly in [-0.5, 0.5]
+ DoubleMatrix w = DoubleMatrix.rand(nfeatures, 1).subi(0.5);
+ // Set first two weights to eps
+ w.put(0, 0, eps);
+ w.put(1, 0, eps);
+ return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, eps);
+ }
+
+ @Test
+ public void runRidgeRegressionUsingConstructor() {
+ int nexamples = 200;
+ int nfeatures = 20;
+ double eps = 10.0;
+ List<LabeledPoint> data = generateRidgeData(2*nexamples, nfeatures, eps);
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, nexamples));
+ List<LabeledPoint> validationData = data.subList(nexamples, 2*nexamples);
+
+ RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD();
+ ridgeSGDImpl.optimizer().setStepSize(1.0)
+ .setRegParam(0.0)
+ .setNumIterations(200);
+ RidgeRegressionModel model = ridgeSGDImpl.run(testRDD.rdd());
+ double unRegularizedErr = predictionError(validationData, model);
+
+ ridgeSGDImpl.optimizer().setRegParam(0.1);
+ model = ridgeSGDImpl.run(testRDD.rdd());
+ double regularizedErr = predictionError(validationData, model);
+
+ Assert.assertTrue(regularizedErr < unRegularizedErr);
+ }
+
+ @Test
+ public void runRidgeRegressionUsingStaticMethods() {
+ int nexamples = 200;
+ int nfeatures = 20;
+ double eps = 10.0;
+ List<LabeledPoint> data = generateRidgeData(2*nexamples, nfeatures, eps);
+
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, nexamples));
+ List<LabeledPoint> validationData = data.subList(nexamples, 2*nexamples);
+
+ RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
+ double unRegularizedErr = predictionError(validationData, model);
+
+ model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
+ double regularizedErr = predictionError(validationData, model);
+
+ Assert.assertTrue(regularizedErr < unRegularizedErr);
+ }
+}
diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties
index a112e0b506..4265ba6e5d 100644
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
@@ -19,7 +19,7 @@
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=ml/target/unit-tests.log
+log4j.appender.file.file=mllib/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
new file mode 100644
index 0000000000..34c67294e9
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.util.Random
+import scala.collection.JavaConversions._
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.regression._
+
+object LogisticRegressionSuite {
+
+ def generateLogisticInputAsList(
+ offset: Double,
+ scale: Double,
+ nPoints: Int,
+ seed: Int): java.util.List[LabeledPoint] = {
+ seqAsJavaList(generateLogisticInput(offset, scale, nPoints, seed))
+ }
+
+ // Generate input of the form Y = logistic(offset + scale*X)
+ def generateLogisticInput(
+ offset: Double,
+ scale: Double,
+ nPoints: Int,
+ seed: Int): Seq[LabeledPoint] = {
+ val rnd = new Random(seed)
+ val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
+
+ // NOTE: if U is uniform[0, 1] then ln(u) - ln(1-u) is Logistic(0,1)
+ val unifRand = new scala.util.Random(45)
+ val rLogis = (0 until nPoints).map { i =>
+ val u = unifRand.nextDouble()
+ math.log(u) - math.log(1.0-u)
+ }
+
+ // y <- A + B*x + rLogis()
+ // y <- as.numeric(y > 0)
+ val y: Seq[Int] = (0 until nPoints).map { i =>
+ val yVal = offset + scale * x1(i) + rLogis(i)
+ if (yVal > 0) 1 else 0
+ }
+
+ val testData = (0 until nPoints).map(i => LabeledPoint(y(i), Array(x1(i))))
+ testData
+ }
+
+}
+
+class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with ShouldMatchers {
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
+
+
+ override def afterAll() {
+ sc.stop()
+ System.clearProperty("spark.driver.port")
+ }
+
+ def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+ val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+ (prediction != expected.label)
+ }.size
+ // At least 83% of the predictions should be on.
+ ((input.length - numOffPredictions).toDouble / input.length) should be > 0.83
+ }
+
+ // Test if we can correctly learn A, B where Y = logistic(A + B*X)
+ test("logistic regression") {
+ val nPoints = 10000
+ val A = 2.0
+ val B = -1.5
+
+ val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+ val lr = new LogisticRegressionWithSGD()
+ lr.optimizer.setStepSize(10.0).setNumIterations(20)
+
+ val model = lr.run(testRDD)
+
+ // Test the weights
+ val weight0 = model.weights(0)
+ assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
+ assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+
+ val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+
+ test("logistic regression with initial weights") {
+ val nPoints = 10000
+ val A = 2.0
+ val B = -1.5
+
+ val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+ val initialB = -1.0
+ val initialWeights = Array(initialB)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+
+ // Use half as many iterations as the previous test.
+ val lr = new LogisticRegressionWithSGD()
+ lr.optimizer.setStepSize(10.0).setNumIterations(10)
+
+ val model = lr.run(testRDD, initialWeights)
+
+ val weight0 = model.weights(0)
+ assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
+ assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+
+ val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
new file mode 100644
index 0000000000..6a957e3ddc
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.util.Random
+import scala.math.signum
+import scala.collection.JavaConversions._
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+
+import org.jblas.DoubleMatrix
+
+import org.apache.spark.{SparkException, SparkContext}
+import org.apache.spark.mllib.regression._
+
+object SVMSuite {
+
+ def generateSVMInputAsList(
+ intercept: Double,
+ weights: Array[Double],
+ nPoints: Int,
+ seed: Int): java.util.List[LabeledPoint] = {
+ seqAsJavaList(generateSVMInput(intercept, weights, nPoints, seed))
+ }
+
+ // Generate noisy input of the form Y = signum(x.dot(weights) + intercept + noise)
+ def generateSVMInput(
+ intercept: Double,
+ weights: Array[Double],
+ nPoints: Int,
+ seed: Int): Seq[LabeledPoint] = {
+ val rnd = new Random(seed)
+ val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
+ val x = Array.fill[Array[Double]](nPoints)(
+ Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
+ val y = x.map { xi =>
+ val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) +
+ intercept + 0.01 * rnd.nextGaussian()
+ if (yD < 0) 0.0 else 1.0
+ }
+ y.zip(x).map(p => LabeledPoint(p._1, p._2))
+ }
+
+}
+
+class SVMSuite extends FunSuite with BeforeAndAfterAll {
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
+
+ override def afterAll() {
+ sc.stop()
+ System.clearProperty("spark.driver.port")
+ }
+
+ def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+ val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+ (prediction != expected.label)
+ }.size
+ // At least 80% of the predictions should be on.
+ assert(numOffPredictions < input.length / 5)
+ }
+
+
+ test("SVM using local random SGD") {
+ val nPoints = 10000
+
+ // NOTE: Intercept should be small for generating equal 0s and 1s
+ val A = 0.01
+ val B = -1.5
+ val C = 1.0
+
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+
+ val svm = new SVMWithSGD()
+ svm.optimizer.setStepSize(1.0).setRegParam(1.0).setNumIterations(100)
+
+ val model = svm.run(testRDD)
+
+ val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
+
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+
+ test("SVM local random SGD with initial weights") {
+ val nPoints = 10000
+
+ // NOTE: Intercept should be small for generating equal 0s and 1s
+ val A = 0.01
+ val B = -1.5
+ val C = 1.0
+
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+
+ val initialB = -1.0
+ val initialC = -1.0
+ val initialWeights = Array(initialB,initialC)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+
+ val svm = new SVMWithSGD()
+ svm.optimizer.setStepSize(1.0).setRegParam(1.0).setNumIterations(100)
+
+ val model = svm.run(testRDD, initialWeights)
+
+ val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData,2)
+
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+
+ test("SVM with invalid labels") {
+ val nPoints = 10000
+
+ // NOTE: Intercept should be small for generating equal 0s and 1s
+ val A = 0.01
+ val B = -1.5
+ val C = 1.0
+
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+ val testRDD = sc.parallelize(testData, 2)
+
+ val testRDDInvalid = testRDD.map { lp =>
+ if (lp.label == 0.0) {
+ LabeledPoint(-1.0, lp.features)
+ } else {
+ lp
+ }
+ }
+
+ intercept[SparkException] {
+ val model = SVMWithSGD.train(testRDDInvalid, 100)
+ }
+
+ // Turning off data validation should not throw an exception
+ val noValidationModel = new SVMWithSGD().setValidateData(false).run(testRDDInvalid)
+ }
+}
diff --git a/mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index bebade9afb..94245f6027 100644
--- a/mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -15,21 +15,24 @@
* limitations under the License.
*/
-package spark.mllib.clustering
+package org.apache.spark.mllib.clustering
import scala.util.Random
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import org.jblas._
-
class KMeansSuite extends FunSuite with BeforeAndAfterAll {
- val sc = new SparkContext("local", "test")
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
override def afterAll() {
sc.stop()
diff --git a/mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index f98590b8d9..347ef238f4 100644
--- a/mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -15,21 +15,62 @@
* limitations under the License.
*/
-package spark.mllib.recommendation
+package org.apache.spark.mllib.recommendation
+import scala.collection.JavaConversions._
import scala.util.Random
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import org.jblas._
+object ALSSuite {
+
+ def generateRatingsAsJavaList(
+ users: Int,
+ products: Int,
+ features: Int,
+ samplingRate: Double): (java.util.List[Rating], DoubleMatrix) = {
+ val (sampledRatings, trueRatings) = generateRatings(users, products, features, samplingRate)
+ (seqAsJavaList(sampledRatings), trueRatings)
+ }
+
+ def generateRatings(
+ users: Int,
+ products: Int,
+ features: Int,
+ samplingRate: Double): (Seq[Rating], DoubleMatrix) = {
+ val rand = new Random(42)
+
+ // Create a random matrix with uniform values from -1 to 1
+ def randomMatrix(m: Int, n: Int) =
+ new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
+
+ val userMatrix = randomMatrix(users, features)
+ val productMatrix = randomMatrix(features, products)
+ val trueRatings = userMatrix.mmul(productMatrix)
+
+ val sampledRatings = {
+ for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
+ yield Rating(u, p, trueRatings.get(u, p))
+ }
+
+ (sampledRatings, trueRatings)
+ }
+
+}
+
class ALSSuite extends FunSuite with BeforeAndAfterAll {
- val sc = new SparkContext("local", "test")
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
override def afterAll() {
sc.stop()
@@ -57,21 +98,8 @@ class ALSSuite extends FunSuite with BeforeAndAfterAll {
def testALS(users: Int, products: Int, features: Int, iterations: Int,
samplingRate: Double, matchThreshold: Double)
{
- val rand = new Random(42)
-
- // Create a random matrix with uniform values from -1 to 1
- def randomMatrix(m: Int, n: Int) =
- new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*)
-
- val userMatrix = randomMatrix(users, features)
- val productMatrix = randomMatrix(features, products)
- val trueRatings = userMatrix.mmul(productMatrix)
-
- val sampledRatings = {
- for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate)
- yield (u, p, trueRatings.get(u, p))
- }
-
+ val (sampledRatings, trueRatings) = ALSSuite.generateRatings(users, products,
+ features, samplingRate)
val model = ALS.train(sc.parallelize(sampledRatings), features, iterations)
val predictedU = new DoubleMatrix(users, features)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
new file mode 100644
index 0000000000..db980c7bae
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import scala.collection.JavaConversions._
+import scala.util.Random
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.util.LinearDataGenerator
+
+
+class LassoSuite extends FunSuite with BeforeAndAfterAll {
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
+
+
+ override def afterAll() {
+ sc.stop()
+ System.clearProperty("spark.driver.port")
+ }
+
+ def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+ val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
+ }.size
+ // At least 80% of the predictions should be on.
+ assert(numOffPredictions < input.length / 5)
+ }
+
+ test("Lasso local random SGD") {
+ val nPoints = 10000
+
+ val A = 2.0
+ val B = -1.5
+ val C = 1.0e-2
+
+ val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+
+ val ls = new LassoWithSGD()
+ ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(20)
+
+ val model = ls.run(testRDD)
+
+ val weight0 = model.weights(0)
+ val weight1 = model.weights(1)
+ assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+ assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
+ assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]")
+
+ val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
+
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+
+ test("Lasso local random SGD with initial weights") {
+ val nPoints = 10000
+
+ val A = 2.0
+ val B = -1.5
+ val C = 1.0e-2
+
+ val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42)
+
+ val initialB = -1.0
+ val initialC = -1.0
+ val initialWeights = Array(initialB,initialC)
+
+ val testRDD = sc.parallelize(testData, 2)
+ testRDD.cache()
+
+ val ls = new LassoWithSGD()
+ ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(20)
+
+ val model = ls.run(testRDD, initialWeights)
+
+ val weight0 = model.weights(0)
+ val weight1 = model.weights(1)
+ assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+ assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
+ assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]")
+
+ val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData,2)
+
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
new file mode 100644
index 0000000000..ef500c704c
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.util.LinearDataGenerator
+
+class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
+
+ override def afterAll() {
+ sc.stop()
+ System.clearProperty("spark.driver.port")
+ }
+
+ def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+ val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
+ }.size
+ // At least 80% of the predictions should be on.
+ assert(numOffPredictions < input.length / 5)
+ }
+
+ // Test if we can correctly learn Y = 3 + 10*X1 + 10*X2
+ test("linear regression") {
+ val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(
+ 3.0, Array(10.0, 10.0), 100, 42), 2).cache()
+ val linReg = new LinearRegressionWithSGD()
+ linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
+
+ val model = linReg.run(testRDD)
+
+ assert(model.intercept >= 2.5 && model.intercept <= 3.5)
+ assert(model.weights.length === 2)
+ assert(model.weights(0) >= 9.0 && model.weights(0) <= 11.0)
+ assert(model.weights(1) >= 9.0 && model.weights(1) <= 11.0)
+
+ val validationData = LinearDataGenerator.generateLinearInput(
+ 3.0, Array(10.0, 10.0), 100, 17)
+ val validationRDD = sc.parallelize(validationData, 2).cache()
+
+ // Test prediction on RDD.
+ validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Test prediction on Array.
+ validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+ }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
new file mode 100644
index 0000000000..c18092d804
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import scala.collection.JavaConversions._
+import scala.util.Random
+
+import org.jblas.DoubleMatrix
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.util.LinearDataGenerator
+
+class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
+ @transient private var sc: SparkContext = _
+
+ override def beforeAll() {
+ sc = new SparkContext("local", "test")
+ }
+
+ override def afterAll() {
+ sc.stop()
+ System.clearProperty("spark.driver.port")
+ }
+
+ def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
+ predictions.zip(input).map { case (prediction, expected) =>
+ (prediction - expected.label) * (prediction - expected.label)
+ }.reduceLeft(_ + _) / predictions.size
+ }
+
+ test("regularization with skewed weights") {
+ val nexamples = 200
+ val nfeatures = 20
+ val eps = 10
+
+ org.jblas.util.Random.seed(42)
+ // Pick weights as random values distributed uniformly in [-0.5, 0.5]
+ val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5)
+ // Set first two weights to eps
+ w.put(0, 0, eps)
+ w.put(1, 0, eps)
+
+ // Use half of data for training and other half for validation
+ val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2*nexamples, 42, eps)
+ val testData = data.take(nexamples)
+ val validationData = data.takeRight(nexamples)
+
+ val testRDD = sc.parallelize(testData, 2).cache()
+ val validationRDD = sc.parallelize(validationData, 2).cache()
+
+ // First run without regularization.
+ val linearReg = new LinearRegressionWithSGD()
+ linearReg.optimizer.setNumIterations(200)
+ .setStepSize(1.0)
+
+ val linearModel = linearReg.run(testRDD)
+ val linearErr = predictionError(
+ linearModel.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ val ridgeReg = new RidgeRegressionWithSGD()
+ ridgeReg.optimizer.setNumIterations(200)
+ .setRegParam(0.1)
+ .setStepSize(1.0)
+ val ridgeModel = ridgeReg.run(testRDD)
+ val ridgeErr = predictionError(
+ ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData)
+
+ // Ridge CV-error should be lower than linear regression
+ assert(ridgeErr < linearErr,
+ "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")")
+ }
+}
diff --git a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala
deleted file mode 100644
index bc9bfd054f..0000000000
--- a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.regression
-
-import scala.util.Random
-
-import org.scalatest.BeforeAndAfterAll
-import org.scalatest.FunSuite
-
-import spark.SparkContext
-import spark.SparkContext._
-
-
-class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
- val sc = new SparkContext("local", "test")
-
- override def afterAll() {
- sc.stop()
- System.clearProperty("spark.driver.port")
- }
-
- // Test if we can correctly learn A, B where Y = logistic(A + B*X)
- test("logistic regression") {
- val nPoints = 10000
- val rnd = new Random(42)
-
- val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
-
- val A = 2.0
- val B = -1.5
-
- // NOTE: if U is uniform[0, 1] then ln(u) - ln(1-u) is Logistic(0,1)
- val unifRand = new scala.util.Random(45)
- val rLogis = (0 until nPoints).map { i =>
- val u = unifRand.nextDouble()
- math.log(u) - math.log(1.0-u)
- }
-
- // y <- A + B*x + rlogis(100)
- // y <- as.numeric(y > 0)
- val y = (0 until nPoints).map { i =>
- val yVal = A + B * x1(i) + rLogis(i)
- if (yVal > 0) 1.0 else 0.0
- }
-
- val testData = (0 until nPoints).map(i => (y(i).toDouble, Array(x1(i)))).toArray
-
- val testRDD = sc.parallelize(testData, 2)
- testRDD.cache()
- val lr = new LogisticRegression().setStepSize(10.0)
- .setNumIterations(20)
-
- val model = lr.train(testRDD)
-
- val weight0 = model.weights.get(0)
- assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
- assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
- }
-}
diff --git a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala
deleted file mode 100644
index 3c588c6162..0000000000
--- a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.mllib.regression
-
-import scala.util.Random
-
-import org.scalatest.BeforeAndAfterAll
-import org.scalatest.FunSuite
-
-import spark.SparkContext
-import spark.SparkContext._
-
-
-class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
- val sc = new SparkContext("local", "test")
-
- override def afterAll() {
- sc.stop()
- System.clearProperty("spark.driver.port")
- }
-
- // Test if we can correctly learn Y = 3 + X1 + X2 when
- // X1 and X2 are collinear.
- test("multi-collinear variables") {
- val rnd = new Random(43)
- val x1 = Array.fill[Double](20)(rnd.nextGaussian())
-
- // Pick a mean close to mean of x1
- val rnd1 = new Random(42) //new NormalDistribution(0.1, 0.01)
- val x2 = Array.fill[Double](20)(0.1 + rnd1.nextGaussian() * 0.01)
-
- val xMat = (0 until 20).map(i => Array(x1(i), x2(i))).toArray
-
- val y = xMat.map(i => 3 + i(0) + i(1))
- val testData = (0 until 20).map(i => (y(i), xMat(i))).toArray
-
- val testRDD = sc.parallelize(testData, 2)
- testRDD.cache()
- val ridgeReg = new RidgeRegression().setLowLambda(0)
- .setHighLambda(10)
-
- val model = ridgeReg.train(testRDD)
-
- assert(model.intercept >= 2.9 && model.intercept <= 3.1)
- assert(model.weights.length === 2)
- assert(model.weights.get(0) >= 0.9 && model.weights.get(0) <= 1.1)
- assert(model.weights.get(1) >= 0.9 && model.weights.get(1) <= 1.1)
- }
-}
diff --git a/pagerank_data.txt b/pagerank_data.txt
new file mode 100644
index 0000000000..95755ab8f5
--- /dev/null
+++ b/pagerank_data.txt
@@ -0,0 +1,6 @@
+1 2
+1 3
+1 4
+2 1
+3 1
+4 1
diff --git a/pom.xml b/pom.xml
index 4b48072c6e..6a73e81da0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,22 +18,22 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>Spark Project Parent POM</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<licenses>
<license>
- <name>BSD License</name>
- <url>https://github.com/mesos/spark/blob/master/LICENSE</url>
+ <name>Apache 2.0 License</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
- <connection>scm:git:git@github.com:mesos/spark.git</connection>
- <url>scm:git:git@github.com:mesos/spark.git</url>
+ <connection>scm:git:git@github.com:apache/incubator-spark.git</connection>
+ <url>scm:git:git@github.com:apache/incubator-spark.git</url>
</scm>
<developers>
<developer>
@@ -41,12 +41,12 @@
<name>Matei Zaharia</name>
<email>matei.zaharia@gmail.com</email>
<url>http://www.cs.berkeley.edu/~matei</url>
- <organization>U.C. Berkeley Computer Science</organization>
- <organizationUrl>http://www.cs.berkeley.edu/</organizationUrl>
+ <organization>Apache Software Foundation</organization>
+ <organizationUrl>http://spark.incubator.apache.org</organizationUrl>
</developer>
</developers>
<issueManagement>
- <system>github</system>
+ <system>JIRA</system>
<url>https://spark-project.atlassian.net/browse/SPARK</url>
</issueManagement>
@@ -58,9 +58,11 @@
<module>core</module>
<module>bagel</module>
<module>examples</module>
+ <module>mllib</module>
+ <module>tools</module>
<module>streaming</module>
<module>repl</module>
- <module>repl-bin</module>
+ <module>assembly</module>
</modules>
<properties>
@@ -69,11 +71,12 @@
<java.version>1.5</java.version>
<scala.version>2.9.3</scala.version>
- <mesos.version>0.9.0-incubating</mesos.version>
- <akka.version>2.0.3</akka.version>
+ <mesos.version>0.12.1</mesos.version>
+ <akka.version>2.0.5</akka.version>
<slf4j.version>1.7.2</slf4j.version>
- <cdh.version>4.1.2</cdh.version>
<log4j.version>1.2.17</log4j.version>
+ <hadoop.version>1.0.4</hadoop.version>
+ <hbase.version>0.94.6</hbase.version>
<PermGen>64m</PermGen>
<MaxPermGen>512m</MaxPermGen>
@@ -155,12 +158,17 @@
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
- <version>7.5.3.v20111011</version>
+ <version>7.6.8.v20121106</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>11.0.1</version>
+ <version>14.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ <version>1.3.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@@ -183,6 +191,11 @@
<version>0.8.4</version>
</dependency>
<dependency>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ <version>1.0.5</version>
+ </dependency>
+ <dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
<version>4.0</version>
@@ -193,9 +206,14 @@
<version>2.4.1</version>
</dependency>
<dependency>
- <groupId>de.javakaffee</groupId>
- <artifactId>kryo-serializers</artifactId>
- <version>0.22</version>
+ <groupId>com.twitter</groupId>
+ <artifactId>chill_2.9.3</artifactId>
+ <version>0.3.1</version>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>chill-java</artifactId>
+ <version>0.3.1</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
@@ -249,6 +267,21 @@
<version>2.5</version>
</dependency>
<dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>3.0.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-jvm</artifactId>
+ <version>3.0.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-json</artifactId>
+ <version>3.0.0</version>
+ </dependency>
+ <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
@@ -296,9 +329,57 @@
<dependency>
<groupId>com.novocode</groupId>
<artifactId>junit-interface</artifactId>
- <version>0.8</version>
+ <version>0.9</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <!-- Specify Avro version because Kafka also has it as a dependency -->
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>1.7.4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-ipc</artifactId>
+ <version>1.7.4</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
</dependencies>
</dependencyManagement>
@@ -367,6 +448,7 @@
<args>
<arg>-unchecked</arg>
<arg>-optimise</arg>
+ <arg>-deprecation</arg>
</args>
<jvmArgs>
<jvmArg>-Xms64m</jvmArg>
@@ -411,7 +493,7 @@
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>${project.build.directory}/SparkTestSuite.txt</filereports>
- <argLine>-Xms64m -Xmx1024m</argLine>
+ <argLine>-Xms64m -Xmx3g</argLine>
<stderr/>
</configuration>
<executions>
@@ -505,68 +587,18 @@
<profiles>
<profile>
- <id>hadoop1</id>
- <properties>
- <hadoop.major.version>1</hadoop.major.version>
- </properties>
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>1.0.4</version>
- </dependency>
- </dependencies>
- </dependencyManagement>
- </profile>
-
- <profile>
- <id>hadoop2</id>
- <properties>
- <hadoop.major.version>2</hadoop.major.version>
- </properties>
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>2.0.0-mr1-cdh${cdh.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>2.0.0-mr1-cdh${cdh.version}</version>
- </dependency>
- <!-- Specify Avro version because Kafka also has it as a dependency -->
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <version>1.7.4</version>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <version>1.7.4</version>
- <exclusions>
- <exclusion>
- <groupId>org.jboss.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- </dependencies>
- </dependencyManagement>
- </profile>
-
- <profile>
<id>hadoop2-yarn</id>
<properties>
<hadoop.major.version>2</hadoop.major.version>
<!-- 0.23.* is same as 2.0.* - except hardened to run production jobs -->
- <!-- <yarn.version>0.23.7</yarn.version> -->
- <yarn.version>2.0.2-alpha</yarn.version>
+ <!-- <hadoop.version>0.23.7</hadoop.version> -->
+ <hadoop.version>2.0.5-alpha</hadoop.version>
</properties>
+ <modules>
+ <module>yarn</module>
+ </modules>
+
<repositories>
<repository>
<id>maven-root</id>
@@ -587,36 +619,138 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
- <version>${yarn.version}</version>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
- <version>${yarn.version}</version>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
- <version>${yarn.version}</version>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
- <version>${yarn.version}</version>
- </dependency>
- <!-- Specify Avro version because Kafka also has it as a dependency -->
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <version>1.7.4</version>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <version>1.7.4</version>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
</dependencyManagement>
</profile>
+ <profile>
+ <id>repl-bin</id>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ </activation>
+ <modules>
+ <module>repl-bin</module>
+ </modules>
+ </profile>
</profiles>
</project>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7a4d4c4575..b1b99b37c4 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -24,42 +24,57 @@ import AssemblyKeys._
//import com.jsuereth.pgp.sbtplugin.PgpKeys._
object SparkBuild extends Build {
- // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
- // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop.
- val HADOOP_VERSION = "1.0.4"
- val HADOOP_MAJOR_VERSION = "1"
- val HADOOP_YARN = false
+ // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
+ // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
+ // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
+ val DEFAULT_HADOOP_VERSION = "1.0.4"
+ val DEFAULT_YARN = false
- // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2"
- //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1"
- //val HADOOP_MAJOR_VERSION = "2"
- //val HADOOP_YARN = false
+ // HBase version; set as appropriate.
+ val HBASE_VERSION = "0.94.6"
- // For Hadoop 2 YARN support
- //val HADOOP_VERSION = "2.0.2-alpha"
- //val HADOOP_MAJOR_VERSION = "2"
- //val HADOOP_YARN = true
-
- lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)
+ lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*)
lazy val core = Project("core", file("core"), settings = coreSettings)
- lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core)
+ lazy val repl = Project("repl", file("repl"), settings = replSettings)
+ .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*)
+
+ lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
+ .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*)
+
+ lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
- lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) dependsOn (streaming)
+ lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
- lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core)
+ lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
- lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
+ lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
- lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)
+ lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core)
+
+ lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
+ .dependsOn(core, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
// A configuration to set an alternative publishLocalConfiguration
lazy val MavenCompile = config("m2r") extend(Compile)
lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
+ // Allows build configuration to be set through environment variables
+ lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
+ lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match {
+ case None => DEFAULT_YARN
+ case Some(v) => v.toBoolean
+ }
+
+ // Conditionally include the yarn sub-project
+ lazy val maybeYarn = if(isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]()
+ lazy val maybeYarnRef = if(isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
+ lazy val allProjects = Seq[ProjectReference](
+ core, repl, examples, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef
+
def sharedSettings = Defaults.defaultSettings ++ Seq(
- organization := "org.spark-project",
+ organization := "org.apache.spark",
version := "0.8.0-SNAPSHOT",
scalaVersion := "2.9.3",
scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"),
@@ -71,7 +86,7 @@ object SparkBuild extends Build {
// Fork new JVMs for tests and set Java options for those
fork := true,
- javaOptions += "-Xmx2500m",
+ javaOptions += "-Xmx3g",
// Only allow one test at a time, even across projects, since they run in the same JVM
concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
@@ -88,17 +103,17 @@ object SparkBuild extends Build {
//useGpg in Global := true,
pomExtra := (
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<licenses>
<license>
- <name>BSD License</name>
- <url>https://github.com/mesos/spark/blob/master/LICENSE</url>
+ <name>Apache 2.0 License</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
- <connection>scm:git:git@github.com:mesos/spark.git</connection>
- <url>scm:git:git@github.com:mesos/spark.git</url>
+ <connection>scm:git:git@github.com:apache/incubator-spark.git</connection>
+ <url>scm:git:git@github.com:apache/incubator-spark.git</url>
</scm>
<developers>
<developer>
@@ -106,10 +121,14 @@ object SparkBuild extends Build {
<name>Matei Zaharia</name>
<email>matei.zaharia@gmail.com</email>
<url>http://www.cs.berkeley.edu/~matei</url>
- <organization>U.C. Berkeley Computer Science</organization>
- <organizationUrl>http://www.cs.berkeley.edu/</organizationUrl>
+ <organization>Apache Software Foundation</organization>
+ <organizationUrl>http://spark.incubator.apache.org</organizationUrl>
</developer>
</developers>
+ <issueManagement>
+ <system>JIRA</system>
+ <url>https://spark-project.atlassian.net/browse/SPARK</url>
+ </issueManagement>
),
/*
@@ -124,7 +143,6 @@ object SparkBuild extends Build {
*/
libraryDependencies ++= Seq(
- "io.netty" % "netty" % "3.5.3.Final",
"org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106",
"org.scalatest" %% "scalatest" % "1.9.1" % "test",
"org.scalacheck" %% "scalacheck" % "1.10.0" % "test",
@@ -149,62 +167,45 @@ object SparkBuild extends Build {
val excludeJackson = ExclusionRule(organization = "org.codehaus.jackson")
val excludeNetty = ExclusionRule(organization = "org.jboss.netty")
val excludeAsm = ExclusionRule(organization = "asm")
+ val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy")
def coreSettings = sharedSettings ++ Seq(
name := "spark-core",
resolvers ++= Seq(
"JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/",
- "Spray Repository" at "http://repo.spray.cc/",
"Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/"
),
libraryDependencies ++= Seq(
"com.google.guava" % "guava" % "14.0.1",
"com.google.code.findbugs" % "jsr305" % "1.3.9",
- "log4j" % "log4j" % "1.2.16",
+ "log4j" % "log4j" % "1.2.17",
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion,
- "commons-daemon" % "commons-daemon" % "1.0.10",
+ "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407
"com.ning" % "compress-lzf" % "0.8.4",
+ "org.xerial.snappy" % "snappy-java" % "1.0.5",
"org.ow2.asm" % "asm" % "4.0",
"com.google.protobuf" % "protobuf-java" % "2.4.1",
- "de.javakaffee" % "kryo-serializers" % "0.22",
- "com.typesafe.akka" % "akka-actor" % "2.0.3" excludeAll(excludeNetty),
- "com.typesafe.akka" % "akka-remote" % "2.0.3" excludeAll(excludeNetty),
- "com.typesafe.akka" % "akka-slf4j" % "2.0.3" excludeAll(excludeNetty),
+ "com.typesafe.akka" % "akka-actor" % "2.0.5" excludeAll(excludeNetty),
+ "com.typesafe.akka" % "akka-remote" % "2.0.5" excludeAll(excludeNetty),
+ "com.typesafe.akka" % "akka-slf4j" % "2.0.5" excludeAll(excludeNetty),
"it.unimi.dsi" % "fastutil" % "6.4.4",
"colt" % "colt" % "1.2.0",
"net.liftweb" % "lift-json_2.9.2" % "2.5",
- "org.apache.mesos" % "mesos" % "0.9.0-incubating",
+ "org.apache.mesos" % "mesos" % "0.12.1",
"io.netty" % "netty-all" % "4.0.0.Beta2",
- "org.apache.derby" % "derby" % "10.4.2.0" % "test"
- ) ++ (
- if (HADOOP_MAJOR_VERSION == "2") {
- if (HADOOP_YARN) {
- Seq(
- // Exclude rule required for all ?
- "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty),
- "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty),
- "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty),
- "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty)
- )
- } else {
- Seq(
- "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty),
- "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty)
- )
- }
- } else {
- Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) )
- }),
- unmanagedSourceDirectories in Compile <+= baseDirectory{ _ /
- ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") {
- "src/hadoop2-yarn/scala"
- } else {
- "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala"
- } )
- }
- ) ++ assemblySettings ++ extraAssemblySettings
+ "org.apache.derby" % "derby" % "10.4.2.0" % "test",
+ "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.avro" % "avro" % "1.7.4",
+ "org.apache.avro" % "avro-ipc" % "1.7.4" excludeAll(excludeNetty),
+ "com.codahale.metrics" % "metrics-core" % "3.0.0",
+ "com.codahale.metrics" % "metrics-jvm" % "3.0.0",
+ "com.codahale.metrics" % "metrics-json" % "3.0.0",
+ "com.twitter" % "chill_2.9.3" % "0.3.1",
+ "com.twitter" % "chill-java" % "0.3.1"
+ )
+ )
def rootSettings = sharedSettings ++ Seq(
publish := {}
@@ -213,14 +214,14 @@ object SparkBuild extends Build {
def replSettings = sharedSettings ++ Seq(
name := "spark-repl",
libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _)
- ) ++ assemblySettings ++ extraAssemblySettings
+ )
def examplesSettings = sharedSettings ++ Seq(
name := "spark-examples",
libraryDependencies ++= Seq(
"com.twitter" % "algebird-core_2.9.2" % "0.1.11",
- "org.apache.hbase" % "hbase" % "0.94.6" excludeAll(excludeNetty, excludeAsm),
+ "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm),
"org.apache.cassandra" % "cassandra-all" % "1.2.5"
exclude("com.google.guava", "guava")
@@ -230,10 +231,17 @@ object SparkBuild extends Build {
exclude("jline","jline")
exclude("log4j","log4j")
exclude("org.apache.cassandra.deps", "avro")
+ excludeAll(excludeSnappy)
)
+ ) ++ assemblySettings ++ extraAssemblySettings
+
+ def toolsSettings = sharedSettings ++ Seq(
+ name := "spark-tools"
)
- def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
+ def bagelSettings = sharedSettings ++ Seq(
+ name := "spark-bagel"
+ )
def mllibSettings = sharedSettings ++ Seq(
name := "spark-mllib",
@@ -248,14 +256,38 @@ object SparkBuild extends Build {
"Akka Repository" at "http://repo.akka.io/releases/"
),
libraryDependencies ++= Seq(
- "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty),
+ "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy),
"com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty),
"org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty),
- "com.typesafe.akka" % "akka-zeromq" % "2.0.3" excludeAll(excludeNetty)
+ "com.typesafe.akka" % "akka-zeromq" % "2.0.5" excludeAll(excludeNetty)
)
+ )
+
+ def yarnSettings = sharedSettings ++ Seq(
+ name := "spark-yarn"
+ ) ++ extraYarnSettings
+
+ // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
+ // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
+ def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq()
+
+ def yarnEnabledSettings = Seq(
+ libraryDependencies ++= Seq(
+ // Exclude rule required for all ?
+ "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm)
+ )
+ )
+
+ def assemblyProjSettings = sharedSettings ++ Seq(
+ name := "spark-assembly",
+ jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
) ++ assemblySettings ++ extraAssemblySettings
- def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq(
+ def extraAssemblySettings() = Seq(
+ test in assembly := {},
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
diff --git a/project/build.properties b/project/build.properties
index 08e17131f6..9647277162 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -15,4 +15,4 @@
# limitations under the License.
#
-sbt.version=0.12.3
+sbt.version=0.12.4
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 1b0f879b94..cfcd85082a 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -4,11 +4,11 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release
resolvers += "Spray Repository" at "http://repo.spray.cc/"
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.5")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1")
-addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.1")
+addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0")
-addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0")
+addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1")
// For Sonatype publishing
//resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns)
diff --git a/pyspark b/pyspark
index 37a355462e..4941a36d0d 100755
--- a/pyspark
+++ b/pyspark
@@ -23,11 +23,17 @@ FWDIR="$(cd `dirname $0`; pwd)"
# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"
+SCALA_VERSION=2.9.3
+
# Exit if the user hasn't compiled Spark
-if [ ! -e "$SPARK_HOME/repl/target" ]; then
- echo "Failed to find Spark classes in $SPARK_HOME/repl/target" >&2
- echo "You need to compile Spark before running this program" >&2
- exit 1
+if [ ! -f "$FWDIR/RELEASE" ]; then
+ # Exit if the user hasn't compiled Spark
+ ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+ if [[ $? != 0 ]]; then
+ echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
+ echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
+ exit 1
+ fi
fi
# Load environment variables from conf/spark-env.sh, if it exists
@@ -48,14 +54,13 @@ export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
-# Launch with `scala` by default:
-if [[ "$SPARK_LAUNCH_WITH_SCALA" != "0" ]] ; then
- export SPARK_LAUNCH_WITH_SCALA=1
+if [ -n "$IPYTHON_OPTS" ]; then
+ IPYTHON=1
fi
if [[ "$IPYTHON" = "1" ]] ; then
- export PYSPARK_PYTHON="ipython"
- exec "$PYSPARK_PYTHON" -i -c "%run $PYTHONSTARTUP"
+ IPYTHON_OPTS=${IPYTHON_OPTS:--i}
+ exec ipython "$IPYTHON_OPTS" -c "%run $PYTHONSTARTUP"
else
- exec "$PYSPARK_PYTHON" "$@"
+ exec "$PYSPARK_PYTHON" "$@"
fi
diff --git a/python/examples/als.py b/python/examples/als.py
index f2b2eee64c..a77dfb2577 100755
--- a/python/examples/als.py
+++ b/python/examples/als.py
@@ -48,8 +48,7 @@ def update(i, vec, mat, ratings):
if __name__ == "__main__":
if len(sys.argv) < 2:
- print >> sys.stderr, \
- "Usage: PythonALS <master> <M> <U> <F> <iters> <slices>"
+ print >> sys.stderr, "Usage: als <master> <M> <U> <F> <iters> <slices>"
exit(-1)
sc = SparkContext(sys.argv[1], "PythonALS", pyFiles=[realpath(__file__)])
M = int(sys.argv[2]) if len(sys.argv) > 2 else 100
@@ -84,5 +83,5 @@ if __name__ == "__main__":
usb = sc.broadcast(us)
error = rmse(R, ms, us)
- print "Iteration %d:" % i
+ print "Iteration %d:" % i
print "\nRMSE: %5.4f\n" % error
diff --git a/python/examples/kmeans.py b/python/examples/kmeans.py
index c670556f2b..ba31af92fc 100644..100755
--- a/python/examples/kmeans.py
+++ b/python/examples/kmeans.py
@@ -41,8 +41,7 @@ def closestPoint(p, centers):
if __name__ == "__main__":
if len(sys.argv) < 5:
- print >> sys.stderr, \
- "Usage: PythonKMeans <master> <file> <k> <convergeDist>"
+ print >> sys.stderr, "Usage: kmeans <master> <file> <k> <convergeDist>"
exit(-1)
sc = SparkContext(sys.argv[1], "PythonKMeans")
lines = sc.textFile(sys.argv[2])
diff --git a/python/examples/logistic_regression.py b/python/examples/logistic_regression.py
index 54d227d0d3..1117dea538 100755
--- a/python/examples/logistic_regression.py
+++ b/python/examples/logistic_regression.py
@@ -16,7 +16,8 @@
#
"""
-This example requires numpy (http://www.numpy.org/)
+A logistic regression implementation that uses NumPy (http://www.numpy.org) to act on batches
+of input data using efficient matrix operations.
"""
from collections import namedtuple
from math import exp
@@ -27,48 +28,45 @@ import numpy as np
from pyspark import SparkContext
-N = 100000 # Number of data points
D = 10 # Number of dimensions
-R = 0.7 # Scaling factor
-ITERATIONS = 5
-np.random.seed(42)
-DataPoint = namedtuple("DataPoint", ['x', 'y'])
-from lr import DataPoint # So that DataPoint is properly serialized
-
-
-def generateData():
- def generatePoint(i):
- y = -1 if i % 2 == 0 else 1
- x = np.random.normal(size=D) + (y * R)
- return DataPoint(x, y)
- return [generatePoint(i) for i in range(N)]
-
+# Read a batch of points from the input file into a NumPy matrix object. We operate on batches to
+# make further computations faster.
+# The data file contains lines of the form <label> <x1> <x2> ... <xD>. We load each block of these
+# into a NumPy array of size numLines * (D + 1) and pull out column 0 vs the others in gradient().
+def readPointBatch(iterator):
+ strs = list(iterator)
+ matrix = np.zeros((len(strs), D + 1))
+ for i in xrange(len(strs)):
+ matrix[i] = np.fromstring(strs[i].replace(',', ' '), dtype=np.float32, sep=' ')
+ return [matrix]
if __name__ == "__main__":
- if len(sys.argv) == 1:
- print >> sys.stderr, \
- "Usage: PythonLR <master> [<slices>]"
+ if len(sys.argv) != 4:
+ print >> sys.stderr, "Usage: logistic_regression <master> <file> <iters>"
exit(-1)
sc = SparkContext(sys.argv[1], "PythonLR", pyFiles=[realpath(__file__)])
- slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
- points = sc.parallelize(generateData(), slices).cache()
+ points = sc.textFile(sys.argv[2]).mapPartitions(readPointBatch).cache()
+ iterations = int(sys.argv[3])
# Initialize w to a random value
w = 2 * np.random.ranf(size=D) - 1
print "Initial w: " + str(w)
+ # Compute logistic regression gradient for a matrix of data points
+ def gradient(matrix, w):
+ Y = matrix[:,0] # point labels (first column of input file)
+ X = matrix[:,1:] # point coordinates
+ # For each point (x, y), compute gradient function, then sum these up
+ return ((1.0 / (1.0 + np.exp(-Y * X.dot(w))) - 1.0) * Y * X.T).sum(1)
+
def add(x, y):
x += y
return x
- for i in range(1, ITERATIONS + 1):
- print "On iteration %i" % i
-
- gradient = points.map(lambda p:
- (1.0 / (1.0 + exp(-p.y * np.dot(w, p.x)))) * p.y * p.x
- ).reduce(add)
- w -= gradient
+ for i in range(iterations):
+ print "On iteration %i" % (i + 1)
+ w -= points.map(lambda m: gradient(m, w)).reduce(add)
print "Final w: " + str(w)
diff --git a/python/examples/pagerank.py b/python/examples/pagerank.py
new file mode 100755
index 0000000000..cd774cf3a3
--- /dev/null
+++ b/python/examples/pagerank.py
@@ -0,0 +1,70 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#!/usr/bin/env python
+
+import re, sys
+from operator import add
+
+from pyspark import SparkContext
+
+
+def computeContribs(urls, rank):
+ """Calculates URL contributions to the rank of other URLs."""
+ num_urls = len(urls)
+ for url in urls: yield (url, rank / num_urls)
+
+
+def parseNeighbors(urls):
+ """Parses a urls pair string into urls pair."""
+ parts = re.split(r'\s+', urls)
+ return parts[0], parts[1]
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 3:
+ print >> sys.stderr, "Usage: pagerank <master> <file> <number_of_iterations>"
+ exit(-1)
+
+ # Initialize the spark context.
+ sc = SparkContext(sys.argv[1], "PythonPageRank")
+
+ # Loads in input file. It should be in format of:
+ # URL neighbor URL
+ # URL neighbor URL
+ # URL neighbor URL
+ # ...
+ lines = sc.textFile(sys.argv[2], 1)
+
+ # Loads all URLs from input file and initialize their neighbors.
+ links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache()
+
+ # Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+ ranks = links.map(lambda (url, neighbors): (url, 1.0))
+
+ # Calculates and updates URL ranks continuously using PageRank algorithm.
+ for iteration in xrange(int(sys.argv[3])):
+ # Calculates URL contributions to the rank of other URLs.
+ contribs = links.join(ranks).flatMap(lambda (url, (urls, rank)):
+ computeContribs(urls, rank))
+
+ # Re-calculates URL ranks based on neighbor contributions.
+ ranks = contribs.reduceByKey(add).mapValues(lambda rank: rank * 0.85 + 0.15)
+
+ # Collects all URL ranks and dump them to console.
+ for (link, rank) in ranks.collect():
+ print "%s has rank: %s." % (link, rank)
diff --git a/python/examples/pi.py b/python/examples/pi.py
index 33c026e824..ab0645fc2f 100644..100755
--- a/python/examples/pi.py
+++ b/python/examples/pi.py
@@ -24,8 +24,7 @@ from pyspark import SparkContext
if __name__ == "__main__":
if len(sys.argv) == 1:
- print >> sys.stderr, \
- "Usage: PythonPi <master> [<slices>]"
+ print >> sys.stderr, "Usage: pi <master> [<slices>]"
exit(-1)
sc = SparkContext(sys.argv[1], "PythonPi")
slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
diff --git a/python/examples/transitive_closure.py b/python/examples/transitive_closure.py
index 40be3b5000..744cce6651 100644..100755
--- a/python/examples/transitive_closure.py
+++ b/python/examples/transitive_closure.py
@@ -37,10 +37,9 @@ def generateGraph():
if __name__ == "__main__":
if len(sys.argv) == 1:
- print >> sys.stderr, \
- "Usage: PythonTC <master> [<slices>]"
+ print >> sys.stderr, "Usage: transitive_closure <master> [<slices>]"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonTC")
+ sc = SparkContext(sys.argv[1], "PythonTransitiveClosure")
slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
tc = sc.parallelize(generateGraph(), slices).cache()
diff --git a/python/examples/wordcount.py b/python/examples/wordcount.py
index 41c846ba79..b9139b9d76 100644..100755
--- a/python/examples/wordcount.py
+++ b/python/examples/wordcount.py
@@ -23,8 +23,7 @@ from pyspark import SparkContext
if __name__ == "__main__":
if len(sys.argv) < 3:
- print >> sys.stderr, \
- "Usage: PythonWordCount <master> <file>"
+ print >> sys.stderr, "Usage: wordcount <master> <file>"
exit(-1)
sc = SparkContext(sys.argv[1], "PythonWordCount")
lines = sc.textFile(sys.argv[2], 1)
@@ -33,4 +32,4 @@ if __name__ == "__main__":
.reduceByKey(add)
output = counts.collect()
for (word, count) in output:
- print "%s : %i" % (word, count)
+ print "%s: %i" % (word, count)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2f741cb345..8fbf296509 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -46,6 +46,7 @@ class SparkContext(object):
_next_accum_id = 0
_active_spark_context = None
_lock = Lock()
+ _python_includes = None # zip and egg files that need to be added to PYTHONPATH
def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
environment=None, batchSize=1024):
@@ -103,16 +104,19 @@ class SparkContext(object):
# send.
self._pickled_broadcast_vars = set()
+ SparkFiles._sc = self
+ root_dir = SparkFiles.getRootDirectory()
+ sys.path.append(root_dir)
+
# Deploy any code dependencies specified in the constructor
+ self._python_includes = list()
for path in (pyFiles or []):
self.addPyFile(path)
- SparkFiles._sc = self
- sys.path.append(SparkFiles.getRootDirectory())
# Create a temporary directory inside spark.local.dir:
- local_dir = self._jvm.spark.Utils.getLocalDir()
+ local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir()
self._temp_dir = \
- self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath()
+ self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
@property
def defaultParallelism(self):
@@ -141,14 +145,21 @@ class SparkContext(object):
def parallelize(self, c, numSlices=None):
"""
Distribute a local Python collection to form an RDD.
+
+ >>> sc.parallelize(range(5), 5).glom().collect()
+ [[0], [1], [2], [3], [4]]
"""
numSlices = numSlices or self.defaultParallelism
# Calling the Java parallelize() method with an ArrayList is too slow,
# because it sends O(n) Py4J commands. As an alternative, serialized
# objects are written to a file and loaded through textFile().
tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir)
- if self.batchSize != 1:
- c = batched(c, self.batchSize)
+ # Make sure we distribute data evenly if it's smaller than self.batchSize
+ if "__len__" not in dir(c):
+ c = list(c) # Make it a list so we can compute its length
+ batchSize = min(len(c) // numSlices, self.batchSize)
+ if batchSize > 1:
+ c = batched(c, batchSize)
for x in c:
write_with_length(dump_pickle(x), tempFile)
tempFile.close()
@@ -250,7 +261,11 @@ class SparkContext(object):
HTTP, HTTPS or FTP URI.
"""
self.addFile(path)
- filename = path.split("/")[-1]
+ (dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix
+
+ if filename.endswith('.zip') or filename.endswith('.ZIP') or filename.endswith('.egg'):
+ self._python_includes.append(filename)
+ sys.path.append(os.path.join(SparkFiles.getRootDirectory(), filename)) # for tests in local mode
def setCheckpointDir(self, dirName, useExisting=False):
"""
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
index 89bcbcfe06..57ee14eeb7 100644
--- a/python/pyspark/files.py
+++ b/python/pyspark/files.py
@@ -52,4 +52,4 @@ class SparkFiles(object):
return cls._root_directory
else:
# This will have to change if we support multiple SparkContexts:
- return cls._sc._jvm.spark.SparkFiles.getRootDirectory()
+ return cls._sc._jvm.org.apache.spark.SparkFiles.getRootDirectory()
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index e503fb7621..26fbe0f080 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -17,6 +17,7 @@
import os
import sys
+import signal
from subprocess import Popen, PIPE
from threading import Thread
from py4j.java_gateway import java_import, JavaGateway, GatewayClient
@@ -28,9 +29,12 @@ SPARK_HOME = os.environ["SPARK_HOME"]
def launch_gateway():
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and SPARK_MEM settings from spark-env.sh
- command = [os.path.join(SPARK_HOME, "run"), "py4j.GatewayServer",
+ command = [os.path.join(SPARK_HOME, "spark-class"), "py4j.GatewayServer",
"--die-on-broken-pipe", "0"]
- proc = Popen(command, stdout=PIPE, stdin=PIPE)
+ # Don't send ctrl-c / SIGINT to the Java gateway:
+ def preexec_function():
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
+ proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_function)
# Determine which ephemeral port the server started on:
port = int(proc.stdout.readline())
# Create a thread to echo output from the GatewayServer, which is required
@@ -49,7 +53,7 @@ def launch_gateway():
# Connect to the gateway
gateway = JavaGateway(GatewayClient(port=port), auto_convert=False)
# Import the classes used by PySpark
- java_import(gateway.jvm, "spark.api.java.*")
- java_import(gateway.jvm, "spark.api.python.*")
+ java_import(gateway.jvm, "org.apache.spark.api.java.*")
+ java_import(gateway.jvm, "org.apache.spark.api.python.*")
java_import(gateway.jvm, "scala.Tuple2")
return gateway
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index c6a6b24c5a..914118ccdd 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -21,6 +21,7 @@ from collections import defaultdict
from itertools import chain, ifilter, imap, product
import operator
import os
+import sys
import shlex
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
@@ -31,6 +32,8 @@ from pyspark.serializers import batched, Batch, dump_pickle, load_pickle, \
read_from_pickle_file
from pyspark.join import python_join, python_left_outer_join, \
python_right_outer_join, python_cogroup
+from pyspark.statcounter import StatCounter
+from pyspark.rddsampler import RDDSampler
from py4j.java_collections import ListConverter, MapConverter
@@ -160,18 +163,64 @@ class RDD(object):
>>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
[1, 2, 3]
"""
- return self.map(lambda x: (x, "")) \
+ return self.map(lambda x: (x, None)) \
.reduceByKey(lambda x, _: x) \
.map(lambda (x, _): x)
- # TODO: sampling needs to be re-implemented due to Batch
- #def sample(self, withReplacement, fraction, seed):
- # jrdd = self._jrdd.sample(withReplacement, fraction, seed)
- # return RDD(jrdd, self.ctx)
+ def sample(self, withReplacement, fraction, seed):
+ """
+ Return a sampled subset of this RDD (relies on numpy and falls back
+ on default random generator if numpy is unavailable).
+
+ >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
+ [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
+ """
+ return self.mapPartitionsWithSplit(RDDSampler(withReplacement, fraction, seed).func, True)
+
+ # this is ported from scala/spark/RDD.scala
+ def takeSample(self, withReplacement, num, seed):
+ """
+ Return a fixed-size sampled subset of this RDD (currently requires numpy).
+
+ >>> sc.parallelize(range(0, 10)).takeSample(True, 10, 1) #doctest: +SKIP
+ [4, 2, 1, 8, 2, 7, 0, 4, 1, 4]
+ """
+
+ fraction = 0.0
+ total = 0
+ multiplier = 3.0
+ initialCount = self.count()
+ maxSelected = 0
+
+ if (num < 0):
+ raise ValueError
- #def takeSample(self, withReplacement, num, seed):
- # vals = self._jrdd.takeSample(withReplacement, num, seed)
- # return [load_pickle(bytes(x)) for x in vals]
+ if initialCount > sys.maxint - 1:
+ maxSelected = sys.maxint - 1
+ else:
+ maxSelected = initialCount
+
+ if num > initialCount and not withReplacement:
+ total = maxSelected
+ fraction = multiplier * (maxSelected + 1) / initialCount
+ else:
+ fraction = multiplier * (num + 1) / initialCount
+ total = num
+
+ samples = self.sample(withReplacement, fraction, seed).collect()
+
+ # If the first sample didn't turn out large enough, keep trying to take samples;
+ # this shouldn't happen often because we use a big multiplier for their initial size.
+ # See: scala/spark/RDD.scala
+ while len(samples) < total:
+ if seed > sys.maxint - 2:
+ seed = -1
+ seed += 1
+ samples = self.sample(withReplacement, fraction, seed).collect()
+
+ sampler = RDDSampler(withReplacement, fraction, seed+1)
+ sampler.shuffle(samples)
+ return samples[0:total]
def union(self, other):
"""
@@ -267,7 +316,11 @@ class RDD(object):
>>> def f(x): print x
>>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
"""
- self.map(f).collect() # Force evaluation
+ def processPartition(iterator):
+ for x in iterator:
+ f(x)
+ yield None
+ self.mapPartitions(processPartition).collect() # Force evaluation
def collect(self):
"""
@@ -353,6 +406,63 @@ class RDD(object):
3
"""
return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
+
+ def stats(self):
+ """
+ Return a L{StatCounter} object that captures the mean, variance
+ and count of the RDD's elements in one operation.
+ """
+ def redFunc(left_counter, right_counter):
+ return left_counter.mergeStats(right_counter)
+
+ return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc)
+
+ def mean(self):
+ """
+ Compute the mean of this RDD's elements.
+
+ >>> sc.parallelize([1, 2, 3]).mean()
+ 2.0
+ """
+ return self.stats().mean()
+
+ def variance(self):
+ """
+ Compute the variance of this RDD's elements.
+
+ >>> sc.parallelize([1, 2, 3]).variance()
+ 0.666...
+ """
+ return self.stats().variance()
+
+ def stdev(self):
+ """
+ Compute the standard deviation of this RDD's elements.
+
+ >>> sc.parallelize([1, 2, 3]).stdev()
+ 0.816...
+ """
+ return self.stats().stdev()
+
+ def sampleStdev(self):
+ """
+ Compute the sample standard deviation of this RDD's elements (which corrects for bias in
+ estimating the standard deviation by dividing by N-1 instead of N).
+
+ >>> sc.parallelize([1, 2, 3]).sampleStdev()
+ 1.0
+ """
+ return self.stats().sampleStdev()
+
+ def sampleVariance(self):
+ """
+ Compute the sample variance of this RDD's elements (which corrects for bias in
+ estimating the variance by dividing by N-1 instead of N).
+
+ >>> sc.parallelize([1, 2, 3]).sampleVariance()
+ 1.0
+ """
+ return self.stats().sampleVariance()
def countByValue(self):
"""
@@ -386,13 +496,16 @@ class RDD(object):
>>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
[2, 3, 4, 5, 6]
"""
+ def takeUpToNum(iterator):
+ taken = 0
+ while taken < num:
+ yield next(iterator)
+ taken += 1
+ # Take only up to num elements from each partition we try
+ mapped = self.mapPartitions(takeUpToNum)
items = []
- for partition in range(self._jrdd.splits().size()):
- iterator = self.ctx._takePartition(self._jrdd.rdd(), partition)
- # Each item in the iterator is a string, Python object, batch of
- # Python objects. Regardless, it is sufficient to take `num`
- # of these objects in order to collect `num` Python objects:
- iterator = iterator.take(num)
+ for partition in range(mapped._jrdd.splits().size()):
+ iterator = self.ctx._takePartition(mapped._jrdd.rdd(), partition)
items.extend(self._collect_iterator_through_file(iterator))
if len(items) >= num:
break
@@ -689,6 +802,43 @@ class RDD(object):
"""
return python_cogroup(self, other, numPartitions)
+ def subtractByKey(self, other, numPartitions=None):
+ """
+ Return each (key, value) pair in C{self} that has no pair with matching key
+ in C{other}.
+
+ >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
+ >>> y = sc.parallelize([("a", 3), ("c", None)])
+ >>> sorted(x.subtractByKey(y).collect())
+ [('b', 4), ('b', 5)]
+ """
+ filter_func = lambda tpl: len(tpl[1][0]) > 0 and len(tpl[1][1]) == 0
+ map_func = lambda tpl: [(tpl[0], val) for val in tpl[1][0]]
+ return self.cogroup(other, numPartitions).filter(filter_func).flatMap(map_func)
+
+ def subtract(self, other, numPartitions=None):
+ """
+ Return each value in C{self} that is not contained in C{other}.
+
+ >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)])
+ >>> y = sc.parallelize([("a", 3), ("c", None)])
+ >>> sorted(x.subtract(y).collect())
+ [('a', 1), ('b', 4), ('b', 5)]
+ """
+ rdd = other.map(lambda x: (x, True)) # note: here 'True' is just a placeholder
+ return self.map(lambda x: (x, True)).subtractByKey(rdd).map(lambda tpl: tpl[0]) # note: here 'True' is just a placeholder
+
+ def keyBy(self, f):
+ """
+ Creates tuples of the elements in this RDD by applying C{f}.
+
+ >>> x = sc.parallelize(range(0,3)).keyBy(lambda x: x*x)
+ >>> y = sc.parallelize(zip(range(0,5), range(0,5)))
+ >>> sorted(x.cogroup(y).collect())
+ [(0, ([0], [0])), (1, ([1], [1])), (2, ([], [2])), (3, ([], [3])), (4, ([2], [4]))]
+ """
+ return self.map(lambda x: (f(x), x))
+
# TODO: `lookup` is disabled because we can't make direct comparisons based
# on the key; we need to compare the hash of the key to the hash of the
# keys in the pairs. This could be an expensive operation, since those
@@ -749,11 +899,12 @@ class PipelinedRDD(RDD):
self.ctx._gateway._gateway_client)
self.ctx._pickled_broadcast_vars.clear()
class_manifest = self._prev_jrdd.classManifest()
- env = copy.copy(self.ctx.environment)
- env['PYTHONPATH'] = os.environ.get("PYTHONPATH", "")
- env = MapConverter().convert(env, self.ctx._gateway._gateway_client)
+ env = MapConverter().convert(self.ctx.environment,
+ self.ctx._gateway._gateway_client)
+ includes = ListConverter().convert(self.ctx._python_includes,
+ self.ctx._gateway._gateway_client)
python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
- pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec,
+ pipe_command, env, includes, self.preservesPartitioning, self.ctx.pythonExec,
broadcast_vars, self.ctx._javaAccumulator, class_manifest)
self._jrdd_val = python_rdd.asJavaRDD()
return self._jrdd_val
@@ -769,7 +920,7 @@ def _test():
# The small batch size here ensures that we see multiple batches,
# even in these small test examples:
globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
- (failure_count, test_count) = doctest.testmod(globs=globs)
+ (failure_count, test_count) = doctest.testmod(globs=globs,optionflags=doctest.ELLIPSIS)
globs['sc'].stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py
new file mode 100644
index 0000000000..aca2ef3b51
--- /dev/null
+++ b/python/pyspark/rddsampler.py
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import random
+
+class RDDSampler(object):
+ def __init__(self, withReplacement, fraction, seed):
+ try:
+ import numpy
+ self._use_numpy = True
+ except ImportError:
+ print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling."
+ self._use_numpy = False
+
+ self._seed = seed
+ self._withReplacement = withReplacement
+ self._fraction = fraction
+ self._random = None
+ self._split = None
+ self._rand_initialized = False
+
+ def initRandomGenerator(self, split):
+ if self._use_numpy:
+ import numpy
+ self._random = numpy.random.RandomState(self._seed)
+ for _ in range(0, split):
+ # discard the next few values in the sequence to have a
+ # different seed for the different splits
+ self._random.randint(sys.maxint)
+ else:
+ import random
+ random.seed(self._seed)
+ for _ in range(0, split):
+ # discard the next few values in the sequence to have a
+ # different seed for the different splits
+ random.randint(0, sys.maxint)
+ self._split = split
+ self._rand_initialized = True
+
+ def getUniformSample(self, split):
+ if not self._rand_initialized or split != self._split:
+ self.initRandomGenerator(split)
+
+ if self._use_numpy:
+ return self._random.random_sample()
+ else:
+ return random.uniform(0.0, 1.0)
+
+ def getPoissonSample(self, split, mean):
+ if not self._rand_initialized or split != self._split:
+ self.initRandomGenerator(split)
+
+ if self._use_numpy:
+ return self._random.poisson(mean)
+ else:
+ # here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by
+ # drawing a sequence of numbers delta_j ~ Exp(mean)
+ num_arrivals = 1
+ cur_time = 0.0
+
+ cur_time += random.expovariate(mean)
+
+ if cur_time > 1.0:
+ return 0
+
+ while(cur_time <= 1.0):
+ cur_time += random.expovariate(mean)
+ num_arrivals += 1
+
+ return (num_arrivals - 1)
+
+ def shuffle(self, vals):
+ if self._random == None or split != self._split:
+ self.initRandomGenerator(0) # this should only ever called on the master so
+ # the split does not matter
+
+ if self._use_numpy:
+ self._random.shuffle(vals)
+ else:
+ random.shuffle(vals, self._random)
+
+ def func(self, split, iterator):
+ if self._withReplacement:
+ for obj in iterator:
+ # For large datasets, the expected number of occurrences of each element in a sample with
+ # replacement is Poisson(frac). We use that to get a count for each element.
+ count = self.getPoissonSample(split, mean = self._fraction)
+ for _ in range(0, count):
+ yield obj
+ else:
+ for obj in iterator:
+ if self.getUniformSample(split) <= self._fraction:
+ yield obj
+
+
+
+
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index cc8cd9e3c4..54823f8037 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -21,13 +21,31 @@ An interactive shell.
This file is designed to be launched as a PYTHONSTARTUP script.
"""
import os
+import platform
import pyspark
from pyspark.context import SparkContext
+# this is the equivalent of ADD_JARS
+add_files = os.environ.get("ADD_FILES").split(',') if os.environ.get("ADD_FILES") != None else None
-sc = SparkContext(os.environ.get("MASTER", "local"), "PySparkShell")
+sc = SparkContext(os.environ.get("MASTER", "local"), "PySparkShell", pyFiles=add_files)
+
+print """Welcome to
+ ____ __
+ / __/__ ___ _____/ /__
+ _\ \/ _ \/ _ `/ __/ '_/
+ /__ / .__/\_,_/_/ /_/\_\ version 0.8.0
+ /_/
+"""
+print "Using Python version %s (%s, %s)" % (
+ platform.python_version(),
+ platform.python_build()[0],
+ platform.python_build()[1])
print "Spark context avaiable as sc."
+if add_files != None:
+ print "Adding files: [%s]" % ", ".join(add_files)
+
# The ./pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
# which allows us to execute the user's PYTHONSTARTUP file:
_pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py
new file mode 100644
index 0000000000..8e1cbd4ad9
--- /dev/null
+++ b/python/pyspark/statcounter.py
@@ -0,0 +1,109 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is ported from spark/util/StatCounter.scala
+
+import copy
+import math
+
+class StatCounter(object):
+
+ def __init__(self, values=[]):
+ self.n = 0L # Running count of our values
+ self.mu = 0.0 # Running mean of our values
+ self.m2 = 0.0 # Running variance numerator (sum of (x - mean)^2)
+
+ for v in values:
+ self.merge(v)
+
+ # Add a value into this StatCounter, updating the internal statistics.
+ def merge(self, value):
+ delta = value - self.mu
+ self.n += 1
+ self.mu += delta / self.n
+ self.m2 += delta * (value - self.mu)
+ return self
+
+ # Merge another StatCounter into this one, adding up the internal statistics.
+ def mergeStats(self, other):
+ if not isinstance(other, StatCounter):
+ raise Exception("Can only merge Statcounters!")
+
+ if other is self: # reference equality holds
+ self.merge(copy.deepcopy(other)) # Avoid overwriting fields in a weird order
+ else:
+ if self.n == 0:
+ self.mu = other.mu
+ self.m2 = other.m2
+ self.n = other.n
+ elif other.n != 0:
+ delta = other.mu - self.mu
+ if other.n * 10 < self.n:
+ self.mu = self.mu + (delta * other.n) / (self.n + other.n)
+ elif self.n * 10 < other.n:
+ self.mu = other.mu - (delta * self.n) / (self.n + other.n)
+ else:
+ self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n)
+
+ self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n)
+ self.n += other.n
+ return self
+
+ # Clone this StatCounter
+ def copy(self):
+ return copy.deepcopy(self)
+
+ def count(self):
+ return self.n
+
+ def mean(self):
+ return self.mu
+
+ def sum(self):
+ return self.n * self.mu
+
+ # Return the variance of the values.
+ def variance(self):
+ if self.n == 0:
+ return float('nan')
+ else:
+ return self.m2 / self.n
+
+ #
+ # Return the sample variance, which corrects for bias in estimating the variance by dividing
+ # by N-1 instead of N.
+ #
+ def sampleVariance(self):
+ if self.n <= 1:
+ return float('nan')
+ else:
+ return self.m2 / (self.n - 1)
+
+ # Return the standard deviation of the values.
+ def stdev(self):
+ return math.sqrt(self.variance())
+
+ #
+ # Return the sample standard deviation of the values, which corrects for bias in estimating the
+ # variance by dividing by N-1 instead of N.
+ #
+ def sampleStdev(self):
+ return math.sqrt(self.sampleVariance())
+
+ def __repr__(self):
+ return "(count: %s, mean: %s, stdev: %s)" % (self.count(), self.mean(), self.stdev())
+
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index dfd841b10a..29d6a128f6 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -64,7 +64,7 @@ class TestCheckpoint(PySparkTestCase):
flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
self.assertFalse(flatMappedRDD.isCheckpointed())
- self.assertIsNone(flatMappedRDD.getCheckpointFile())
+ self.assertTrue(flatMappedRDD.getCheckpointFile() is None)
flatMappedRDD.checkpoint()
result = flatMappedRDD.collect()
@@ -79,13 +79,13 @@ class TestCheckpoint(PySparkTestCase):
flatMappedRDD = parCollection.flatMap(lambda x: [x])
self.assertFalse(flatMappedRDD.isCheckpointed())
- self.assertIsNone(flatMappedRDD.getCheckpointFile())
+ self.assertTrue(flatMappedRDD.getCheckpointFile() is None)
flatMappedRDD.checkpoint()
flatMappedRDD.count() # forces a checkpoint to be computed
time.sleep(1) # 1 second
- self.assertIsNotNone(flatMappedRDD.getCheckpointFile())
+ self.assertTrue(flatMappedRDD.getCheckpointFile() is not None)
recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile())
self.assertEquals([1, 2, 3, 4], recovered.collect())
@@ -125,6 +125,17 @@ class TestAddFile(PySparkTestCase):
from userlibrary import UserClass
self.assertEqual("Hello World!", UserClass().hello())
+ def test_add_egg_file_locally(self):
+ # To ensure that we're actually testing addPyFile's effects, check that
+ # this fails due to `userlibrary` not being on the Python path:
+ def func():
+ from userlib import UserClass
+ self.assertRaises(ImportError, func)
+ path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1-py2.7.egg")
+ self.sc.addPyFile(path)
+ from userlib import UserClass
+ self.assertEqual("Hello World from inside a package!", UserClass().hello())
+
class TestIO(PySparkTestCase):
@@ -164,9 +175,12 @@ class TestDaemon(unittest.TestCase):
time.sleep(1)
# daemon should no longer accept connections
- with self.assertRaises(EnvironmentError) as trap:
+ try:
self.connect(port)
- self.assertEqual(trap.exception.errno, ECONNREFUSED)
+ except EnvironmentError as exception:
+ self.assertEqual(exception.errno, ECONNREFUSED)
+ else:
+ self.fail("Expected EnvironmentError to be raised")
def test_termination_stdin(self):
"""Ensure that daemon and workers terminate when stdin is closed."""
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 75d692beeb..695f6dfb84 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -49,15 +49,26 @@ def main(infile, outfile):
split_index = read_int(infile)
if split_index == -1: # for unit tests
return
+
+ # fetch name of workdir
spark_files_dir = load_pickle(read_with_length(infile))
SparkFiles._root_directory = spark_files_dir
SparkFiles._is_running_on_worker = True
- sys.path.append(spark_files_dir)
+
+ # fetch names and values of broadcast variables
num_broadcast_variables = read_int(infile)
for _ in range(num_broadcast_variables):
bid = read_long(infile)
value = read_with_length(infile)
_broadcastRegistry[bid] = Broadcast(bid, load_pickle(value))
+
+ # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
+ sys.path.append(spark_files_dir) # *.py files that were added will be copied here
+ num_python_includes = read_int(infile)
+ for _ in range(num_python_includes):
+ sys.path.append(os.path.join(spark_files_dir, load_pickle(read_with_length(infile))))
+
+ # now load function
func = load_obj(infile)
bypassSerializer = load_obj(infile)
if bypassSerializer:
diff --git a/python/run-tests b/python/run-tests
index 1ee947d414..cbc554ea9d 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -21,22 +21,23 @@
# Figure out where the Spark framework is installed
FWDIR="$(cd `dirname $0`; cd ../; pwd)"
-FAILED=0
-
-$FWDIR/pyspark pyspark/rdd.py
-FAILED=$(($?||$FAILED))
+# CD into the python directory to find things on the right path
+cd "$FWDIR/python"
-$FWDIR/pyspark pyspark/context.py
-FAILED=$(($?||$FAILED))
+FAILED=0
-$FWDIR/pyspark -m doctest pyspark/broadcast.py
-FAILED=$(($?||$FAILED))
+rm -f unit-tests.log
-$FWDIR/pyspark -m doctest pyspark/accumulators.py
-FAILED=$(($?||$FAILED))
+function run_test() {
+ $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
+ FAILED=$((PIPESTATUS[0]||$FAILED))
+}
-$FWDIR/pyspark -m unittest pyspark.tests
-FAILED=$(($?||$FAILED))
+run_test "pyspark/rdd.py"
+run_test "pyspark/context.py"
+run_test "-m doctest pyspark/broadcast.py"
+run_test "-m doctest pyspark/accumulators.py"
+run_test "pyspark/tests.py"
if [[ $FAILED != 0 ]]; then
echo -en "\033[31m" # Red
diff --git a/python/test_support/userlib-0.1-py2.7.egg b/python/test_support/userlib-0.1-py2.7.egg
new file mode 100644
index 0000000000..1674c9cb22
--- /dev/null
+++ b/python/test_support/userlib-0.1-py2.7.egg
Binary files differ
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 7c4e722cc1..d61b36a61a 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -19,24 +19,44 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-repl-bin</artifactId>
<packaging>pom</packaging>
<name>Spark Project REPL binary packaging</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<properties>
- <deb.pkg.name>spark-${classifier}</deb.pkg.name>
- <deb.install.path>/usr/share/spark-${classifier}</deb.install.path>
+ <deb.pkg.name>spark</deb.pkg.name>
+ <deb.install.path>/usr/share/spark</deb.install.path>
<deb.user>root</deb.user>
</properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-bagel</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-repl</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+
<build>
<plugins>
<plugin>
@@ -44,7 +64,7 @@
<artifactId>maven-shade-plugin</artifactId>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
- <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar</outputFile>
+ <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile>
<artifactSet>
<includes>
<include>*:*</include>
@@ -86,142 +106,12 @@
<profiles>
<profile>
- <id>hadoop1</id>
- <properties>
- <classifier>hadoop1</classifier>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-repl</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>hadoop2</id>
- <properties>
- <classifier>hadoop2</classifier>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-repl</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
<id>hadoop2-yarn</id>
- <properties>
- <classifier>hadoop2-yarn</classifier>
- </properties>
<dependencies>
<dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
<version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-repl</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <scope>runtime</scope>
</dependency>
</dependencies>
</profile>
@@ -261,7 +151,7 @@
<compression>gzip</compression>
<dataSet>
<data>
- <src>${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar</src>
+ <src>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</src>
<type>file</type>
<mapper>
<type>perm</type>
diff --git a/repl-bin/src/deb/bin/run b/repl-bin/src/deb/bin/run
index 0e5100e00b..8b5d8300f2 100755
--- a/repl-bin/src/deb/bin/run
+++ b/repl-bin/src/deb/bin/run
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/repl-bin/src/deb/bin/spark-executor b/repl-bin/src/deb/bin/spark-executor
index d3c1c23413..bcfae22677 100755
--- a/repl-bin/src/deb/bin/spark-executor
+++ b/repl-bin/src/deb/bin/spark-executor
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/repl-bin/src/deb/bin/spark-shell b/repl-bin/src/deb/bin/spark-shell
index 8b258543db..ec7e33e1e3 100755
--- a/repl-bin/src/deb/bin/spark-shell
+++ b/repl-bin/src/deb/bin/spark-shell
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/repl/pom.xml b/repl/pom.xml
index 7d8da03254..a1c87d7618 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -19,17 +19,17 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-repl</artifactId>
<packaging>jar</packaging>
<name>Spark Project REPL</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<properties>
<deb.install.path>/usr/share/spark</deb.install.path>
@@ -38,6 +38,23 @@
<dependencies>
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-bagel</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-mllib</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
@@ -57,7 +74,6 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
-
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.version}</artifactId>
@@ -74,192 +90,57 @@
<testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>test</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <exportAntProperties>true</exportAntProperties>
+ <tasks>
+ <property name="spark.classpath" refid="maven.test.classpath"/>
+ <property environment="env"/>
+ <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
+ <condition>
+ <not>
+ <or>
+ <isset property="env.SCALA_HOME"/>
+ <isset property="env.SCALA_LIBRARY_PATH"/>
+ </or>
+ </not>
+ </condition>
+ </fail>
+ </tasks>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<environmentVariables>
<SPARK_HOME>${basedir}/..</SPARK_HOME>
<SPARK_TESTING>1</SPARK_TESTING>
+ <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
</environmentVariables>
</configuration>
</plugin>
</plugins>
</build>
-
<profiles>
<profile>
- <id>hadoop1</id>
- <properties>
- <classifier>hadoop1</classifier>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <properties>
- <classifier>hadoop2</classifier>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
<id>hadoop2-yarn</id>
- <properties>
- <classifier>hadoop2-yarn</classifier>
- </properties>
<dependencies>
<dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-bagel</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
<version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-examples</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <scope>provided</scope>
</dependency>
</dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
</profile>
</profiles>
</project>
diff --git a/repl/src/main/scala/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 274bc585db..3e171849e3 100644
--- a/repl/src/main/scala/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.repl
+package org.apache.spark.repl
import java.io.{ByteArrayOutputStream, InputStream}
import java.net.{URI, URL, URLClassLoader, URLEncoder}
diff --git a/repl/src/main/scala/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala
index d824d62fd1..17e149f8ab 100644
--- a/repl/src/main/scala/spark/repl/Main.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.repl
+package org.apache.spark.repl
import scala.collection.mutable.Set
diff --git a/repl/src/main/scala/spark/repl/SparkHelper.scala b/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala
index d8fb7191b4..d8fb7191b4 100644
--- a/repl/src/main/scala/spark/repl/SparkHelper.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala
diff --git a/repl/src/main/scala/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 59f9d05683..193ccb48ee 100644
--- a/repl/src/main/scala/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -3,7 +3,7 @@
* @author Alexander Spoon
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
@@ -22,8 +22,8 @@ import util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
import interpreter._
import io.{ File, Sources }
-import spark.Logging
-import spark.SparkContext
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext
/** The Scala interactive shell. It provides a read-eval-print loop
* around the Interpreter class.
@@ -816,13 +816,13 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
def initializeSpark() {
intp.beQuietDuring {
command("""
- spark.repl.Main.interp.out.println("Creating SparkContext...");
- spark.repl.Main.interp.out.flush();
- @transient val sc = spark.repl.Main.interp.createSparkContext();
- spark.repl.Main.interp.out.println("Spark context available as sc.");
- spark.repl.Main.interp.out.flush();
+ org.apache.spark.repl.Main.interp.out.println("Creating SparkContext...");
+ org.apache.spark.repl.Main.interp.out.flush();
+ @transient val sc = org.apache.spark.repl.Main.interp.createSparkContext();
+ org.apache.spark.repl.Main.interp.out.println("Spark context available as sc.");
+ org.apache.spark.repl.Main.interp.out.flush();
""")
- command("import spark.SparkContext._")
+ command("import org.apache.spark.SparkContext._")
}
echo("Type in expressions to have them evaluated.")
echo("Type :help for more information.")
@@ -831,6 +831,10 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
var sparkContext: SparkContext = null
def createSparkContext(): SparkContext = {
+ val uri = System.getenv("SPARK_EXECUTOR_URI")
+ if (uri != null) {
+ System.setProperty("spark.executor.uri", uri)
+ }
val master = this.master match {
case Some(m) => m
case None => {
diff --git a/repl/src/main/scala/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
index 43b6a6c950..e6e35c9b5d 100644
--- a/repl/src/main/scala/spark/repl/SparkIMain.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -3,7 +3,7 @@
* @author Martin Odersky
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
@@ -27,9 +27,9 @@ import scala.util.control.Exception.{ ultimately }
import scala.reflect.NameTransformer
import SparkIMain._
-import spark.HttpServer
-import spark.Utils
-import spark.SparkEnv
+import org.apache.spark.HttpServer
+import org.apache.spark.util.Utils
+import org.apache.spark.SparkEnv
/** An interpreter for Scala code.
*
@@ -883,7 +883,7 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
val execution = lineManager.set(originalLine) {
// MATEI: set the right SparkEnv for our SparkContext, because
// this execution will happen in a separate thread
- val sc = spark.repl.Main.interp.sparkContext
+ val sc = org.apache.spark.repl.Main.interp.sparkContext
if (sc != null && sc.env != null)
SparkEnv.set(sc.env)
// Execute the line
diff --git a/repl/src/main/scala/spark/repl/SparkISettings.scala b/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala
index 8ebb01d146..605b7b259b 100644
--- a/repl/src/main/scala/spark/repl/SparkISettings.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala
@@ -3,7 +3,7 @@
* @author Alexander Spoon
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
diff --git a/repl/src/main/scala/spark/repl/SparkImports.scala b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
index 5caf5ca51a..41a1731d60 100644
--- a/repl/src/main/scala/spark/repl/SparkImports.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -3,7 +3,7 @@
* @author Paul Phillips
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
diff --git a/repl/src/main/scala/spark/repl/SparkJLineCompletion.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
index 0069d8b2f4..fdc172d753 100644
--- a/repl/src/main/scala/spark/repl/SparkJLineCompletion.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
@@ -3,7 +3,7 @@
* @author Paul Phillips
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
diff --git a/repl/src/main/scala/spark/repl/SparkJLineReader.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
index ef6b6e092e..d9e1de105c 100644
--- a/repl/src/main/scala/spark/repl/SparkJLineReader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
@@ -3,7 +3,7 @@
* @author Stepan Koltsov
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
diff --git a/repl/src/main/scala/spark/repl/SparkMemberHandlers.scala b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
index 2980dfcd76..a3409bf665 100644
--- a/repl/src/main/scala/spark/repl/SparkMemberHandlers.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
@@ -3,7 +3,7 @@
* @author Martin Odersky
*/
-package spark.repl
+package org.apache.spark.repl
import scala.tools.nsc._
import scala.tools.nsc.interpreter._
diff --git a/repl/src/test/scala/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 80ae605558..8f9b632c0e 100644
--- a/repl/src/test/scala/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.repl
+package org.apache.spark.repl
import java.io._
import java.net.URLClassLoader
@@ -41,10 +41,10 @@ class ReplSuite extends FunSuite {
}
}
val interp = new SparkILoop(in, new PrintWriter(out), master)
- spark.repl.Main.interp = interp
+ org.apache.spark.repl.Main.interp = interp
val separator = System.getProperty("path.separator")
interp.process(Array("-classpath", paths.mkString(separator)))
- spark.repl.Main.interp = null
+ org.apache.spark.repl.Main.interp = null
if (interp.sparkContext != null) {
interp.sparkContext.stop()
}
diff --git a/run-example b/run-example
new file mode 100755
index 0000000000..24d83ba5cf
--- /dev/null
+++ b/run-example
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCALA_VERSION=2.9.3
+
+# Figure out where the Scala framework is installed
+FWDIR="$(cd `dirname $0`; pwd)"
+
+# Export this as SPARK_HOME
+export SPARK_HOME="$FWDIR"
+
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+ . $FWDIR/conf/spark-env.sh
+fi
+
+if [ -z "$1" ]; then
+ echo "Usage: run-example <example-class> [<args>]" >&2
+ exit 1
+fi
+
+# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
+# to avoid the -sources and -doc packages that are built by publish-local.
+EXAMPLES_DIR="$FWDIR"/examples
+SPARK_EXAMPLES_JAR=""
+if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9T].jar ]; then
+ # Use the JAR from the SBT build
+ export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9T].jar`
+fi
+if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9T].jar ]; then
+ # Use the JAR from the Maven build
+ # TODO: this also needs to become an assembly!
+ export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/spark-examples*[0-9T].jar`
+fi
+if [[ -z $SPARK_EXAMPLES_JAR ]]; then
+ echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
+ echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
+ exit 1
+fi
+
+# Since the examples JAR ideally shouldn't include spark-core (that dependency should be
+# "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
+CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
+
+# Find java binary
+if [ -n "${JAVA_HOME}" ]; then
+ RUNNER="${JAVA_HOME}/bin/java"
+else
+ if [ `command -v java` ]; then
+ RUNNER="java"
+ else
+ echo "JAVA_HOME is not set" >&2
+ exit 1
+ fi
+fi
+
+if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
+ echo -n "Spark Command: "
+ echo "$RUNNER" -cp "$CLASSPATH" "$@"
+ echo "========================================"
+ echo
+fi
+
+exec "$RUNNER" -cp "$CLASSPATH" "$@"
diff --git a/sbt/sbt b/sbt/sbt
index 8c9a42824f..c31a0280ff 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
@@ -22,7 +22,7 @@ if [ "$MESOS_HOME" != "" ]; then
EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java"
fi
-export SPARK_HOME=$(cd "$(dirname $0)/.."; pwd)
+export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd)
export SPARK_TESTING=1 # To put test classes on classpath
-java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@"
+java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS $SBT_OPTS -jar "$SPARK_HOME"/sbt/sbt-launch-*.jar "$@"
diff --git a/run b/spark-class
index 72ee76d722..71dfb9c4e6 100755
--- a/run
+++ b/spark-class
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,6 +17,8 @@
# limitations under the License.
#
+SCALA_VERSION=2.9.3
+
# Figure out where the Scala framework is installed
FWDIR="$(cd `dirname $0`; pwd)"
@@ -64,48 +66,20 @@ case "$1" in
;;
esac
-# Figure out whether to run our class with java or with the scala launcher.
-# In most cases, we'd prefer to execute our process with java because scala
-# creates a shell script as the parent of its Java process, which makes it
-# hard to kill the child with stuff like Process.destroy(). However, for
-# the Spark shell, the wrapper is necessary to properly reset the terminal
-# when we exit, so we allow it to set a variable to launch with scala.
-if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
- if [ "$SCALA_HOME" ]; then
- RUNNER="${SCALA_HOME}/bin/scala"
- else
- if [ `command -v scala` ]; then
- RUNNER="scala"
- else
- echo "SCALA_HOME is not set and scala is not in PATH" >&2
- exit 1
- fi
- fi
+# Find the java binary
+if [ -n "${JAVA_HOME}" ]; then
+ RUNNER="${JAVA_HOME}/bin/java"
else
- if [ -n "${JAVA_HOME}" ]; then
- RUNNER="${JAVA_HOME}/bin/java"
+ if [ `command -v java` ]; then
+ RUNNER="java"
else
- if [ `command -v java` ]; then
- RUNNER="java"
- else
- echo "JAVA_HOME is not set" >&2
- exit 1
- fi
- fi
- if [[ ! -f "$FWDIR/RELEASE" && -z "$SCALA_LIBRARY_PATH" ]]; then
- if [ -z "$SCALA_HOME" ]; then
- echo "SCALA_HOME is not set" >&2
- exit 1
- fi
- SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
+ echo "JAVA_HOME is not set" >&2
+ exit 1
fi
fi
-# Figure out how much memory to use per executor and set it as an environment
-# variable so that our process sees it and can report it to Mesos
-if [ -z "$SPARK_MEM" ] ; then
- SPARK_MEM="512m"
-fi
+# Set SPARK_MEM if it isn't already set since we also use it for this process
+SPARK_MEM=${SPARK_MEM:-512m}
export SPARK_MEM
# Set JAVA_OPTS to be able to load native libraries and to set heap size
@@ -120,20 +94,11 @@ export JAVA_OPTS
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
if [ ! -f "$FWDIR/RELEASE" ]; then
- CORE_DIR="$FWDIR/core"
- EXAMPLES_DIR="$FWDIR/examples"
- REPL_DIR="$FWDIR/repl"
-
# Exit if the user hasn't compiled Spark
- if [ ! -e "$CORE_DIR/target" ]; then
- echo "Failed to find Spark classes in $CORE_DIR/target" >&2
- echo "You need to compile Spark before running this program" >&2
- exit 1
- fi
-
- if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
- echo "Failed to find Spark classes in $REPL_DIR/target" >&2
- echo "You need to compile Spark repl module before running this program" >&2
+ ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+ if [[ $? != 0 ]]; then
+ echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
+ echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
exit 1
fi
fi
@@ -142,10 +107,11 @@ fi
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
export CLASSPATH
-if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
- EXTRA_ARGS="" # Java options will be passed to scala as JAVA_OPTS
-else
- # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
- EXTRA_ARGS="$JAVA_OPTS"
+if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
+ echo -n "Spark Command: "
+ echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+ echo "========================================"
+ echo
fi
-exec "$RUNNER" -cp "$CLASSPATH" $EXTRA_ARGS "$@"
+
+exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
diff --git a/spark-executor b/spark-executor
index feccbf5cc2..2c07c54843 100755
--- a/spark-executor
+++ b/spark-executor
@@ -19,4 +19,4 @@
FWDIR="`dirname $0`"
echo "Running spark-executor with framework dir = $FWDIR"
-exec $FWDIR/run spark.executor.MesosExecutorBackend
+exec $FWDIR/spark-class org.apache.spark.executor.MesosExecutorBackend
diff --git a/spark-shell b/spark-shell
index 5371fc540a..9608bd3f30 100755
--- a/spark-shell
+++ b/spark-shell
@@ -1,4 +1,4 @@
-#!/bin/bash --posix
+#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,6 +25,9 @@
# -c <cores> Set the number of cores for REPL to use
#
+# Enter posix mode for bash
+set -o posix
+
FWDIR="`dirname $0`"
for o in "$@"; do
@@ -76,7 +79,7 @@ if [[ ! $? ]]; then
saved_stty=""
fi
-$FWDIR/run $OPTIONS spark.repl.Main "$@"
+$FWDIR/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
diff --git a/spark-shell.cmd b/spark-shell.cmd
index ec65eabb74..b9b4d4bfb2 100644
--- a/spark-shell.cmd
+++ b/spark-shell.cmd
@@ -19,4 +19,4 @@ rem
set FWDIR=%~dp0
set SPARK_LAUNCH_WITH_SCALA=1
-cmd /V /E /C %FWDIR%run2.cmd spark.repl.Main %*
+cmd /V /E /C %FWDIR%run2.cmd org.apache.spark.repl.Main %*
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e6b06d772..7bea069b61 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -19,17 +19,17 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-streaming</artifactId>
<packaging>jar</packaging>
<name>Spark Project Streaming</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<repositories>
<!-- A repository in the local filesystem for the Kafka JAR, which we modified for Scala 2.9 -->
@@ -41,6 +41,11 @@
<dependencies>
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
@@ -115,103 +120,4 @@
</plugin>
</plugins>
</build>
-
- <profiles>
- <profile>
- <id>hadoop1</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2-yarn</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
</project>
diff --git a/streaming/src/main/scala/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 1e4c1e3742..2d8f072624 100644
--- a/streaming/src/main/scala/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -15,18 +15,19 @@
* limitations under the License.
*/
-package spark.streaming
-
-import spark.{Logging, Utils}
-
-import org.apache.hadoop.fs.{FileUtil, Path}
-import org.apache.hadoop.conf.Configuration
+package org.apache.spark.streaming
import java.io._
-import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
import java.util.concurrent.Executors
import java.util.concurrent.RejectedExecutionException
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.Logging
+import org.apache.spark.io.CompressionCodec
+
+
private[streaming]
class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
extends Logging with Serializable {
@@ -49,6 +50,7 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
}
}
+
/**
* Convenience class to speed up the writing of graph checkpoint to file
*/
@@ -66,6 +68,8 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
val maxAttempts = 3
val executor = Executors.newFixedThreadPool(1)
+ private val compressionCodec = CompressionCodec.createCodec()
+
// Removed code which validates whether there is only one CheckpointWriter per path 'file' since
// I did not notice any errors - reintroduce it ?
@@ -103,7 +107,7 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
def write(checkpoint: Checkpoint) {
val bos = new ByteArrayOutputStream()
- val zos = new LZFOutputStream(bos)
+ val zos = compressionCodec.compressedOutputStream(bos)
val oos = new ObjectOutputStream(zos)
oos.writeObject(checkpoint)
oos.close()
@@ -137,6 +141,8 @@ object CheckpointReader extends Logging {
val fs = new Path(path).getFileSystem(new Configuration())
val attempts = Seq(new Path(path, "graph"), new Path(path, "graph.bk"), new Path(path), new Path(path + ".bk"))
+ val compressionCodec = CompressionCodec.createCodec()
+
attempts.foreach(file => {
if (fs.exists(file)) {
logInfo("Attempting to load checkpoint from file '" + file + "'")
@@ -147,7 +153,7 @@ object CheckpointReader extends Logging {
// of ObjectInputStream is used to explicitly use the current thread's default class
// loader to find and load classes. This is a well know Java issue and has popped up
// in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
- val zis = new LZFInputStream(fis)
+ val zis = compressionCodec.compressedInputStream(fis)
val ois = new ObjectInputStreamWithLoader(zis, Thread.currentThread().getContextClassLoader)
val cp = ois.readObject.asInstanceOf[Checkpoint]
ois.close()
@@ -170,7 +176,9 @@ object CheckpointReader extends Logging {
}
private[streaming]
-class ObjectInputStreamWithLoader(inputStream_ : InputStream, loader: ClassLoader) extends ObjectInputStream(inputStream_) {
+class ObjectInputStreamWithLoader(inputStream_ : InputStream, loader: ClassLoader)
+ extends ObjectInputStream(inputStream_) {
+
override def resolveClass(desc: ObjectStreamClass): Class[_] = {
try {
return loader.loadClass(desc.getName())
diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala
index 684d3abb56..80da6bd30b 100644
--- a/streaming/src/main/scala/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala
@@ -15,14 +15,17 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.dstream._
+import org.apache.spark.streaming.dstream._
import StreamingContext._
+import org.apache.spark.util.MetadataCleaner
+
//import Time._
-import spark.{RDD, Logging}
-import spark.storage.StorageLevel
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
@@ -34,7 +37,7 @@ import org.apache.hadoop.conf.Configuration
/**
* A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
- * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]]
+ * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]]
* for more details on RDDs). DStreams can either be created from live data (such as, data from
* HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
* such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
@@ -42,7 +45,7 @@ import org.apache.hadoop.conf.Configuration
* by a parent DStream.
*
* This class contains the basic operations available on all DStreams, such as `map`, `filter` and
- * `window`. In addition, [[spark.streaming.PairDStreamFunctions]] contains operations available
+ * `window`. In addition, [[org.apache.spark.streaming.PairDStreamFunctions]] contains operations available
* only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. These operations
* are automatically available on any DStream of the right type (e.g., DStream[(Int, Int)] through
* implicit conversions when `spark.streaming.StreamingContext._` is imported.
@@ -209,7 +212,7 @@ abstract class DStream[T: ClassManifest] (
checkpointDuration + "). Please set it to higher than " + checkpointDuration + "."
)
- val metadataCleanerDelay = spark.util.MetadataCleaner.getDelaySeconds
+ val metadataCleanerDelay = MetadataCleaner.getDelaySeconds
logInfo("metadataCleanupDelay = " + metadataCleanerDelay)
assert(
metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000,
diff --git a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala
index 399ca1c63d..58a0da2870 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import collection.mutable.HashMap
-import spark.Logging
+import org.apache.spark.Logging
diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index c09a332d44..b9a58fded6 100644
--- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import dstream.InputDStream
import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
import collection.mutable.ArrayBuffer
-import spark.Logging
+import org.apache.spark.Logging
final private[streaming] class DStreamGraph extends Serializable with Logging {
initLogging()
diff --git a/streaming/src/main/scala/spark/streaming/Duration.scala b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala
index 12a14e233d..6bf275f5af 100644
--- a/streaming/src/main/scala/spark/streaming/Duration.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.Utils
+import org.apache.spark.util.Utils
case class Duration (private val millis: Long) {
@@ -57,7 +57,7 @@ case class Duration (private val millis: Long) {
}
/**
- * Helper object that creates instance of [[spark.streaming.Duration]] representing
+ * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing
* a given number of milliseconds.
*/
object Milliseconds {
@@ -65,7 +65,7 @@ object Milliseconds {
}
/**
- * Helper object that creates instance of [[spark.streaming.Duration]] representing
+ * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing
* a given number of seconds.
*/
object Seconds {
@@ -73,7 +73,7 @@ object Seconds {
}
/**
- * Helper object that creates instance of [[spark.streaming.Duration]] representing
+ * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing
* a given number of minutes.
*/
object Minutes {
diff --git a/streaming/src/main/scala/spark/streaming/Interval.scala b/streaming/src/main/scala/org/apache/spark/streaming/Interval.scala
index b30cd969e9..04c994c136 100644
--- a/streaming/src/main/scala/spark/streaming/Interval.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Interval.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
private[streaming]
class Interval(val beginTime: Time, val endTime: Time) {
diff --git a/streaming/src/main/scala/spark/streaming/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/Job.scala
index ceb3f92b65..2128b7c7a6 100644
--- a/streaming/src/main/scala/spark/streaming/Job.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Job.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import java.util.concurrent.atomic.AtomicLong
diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala
index a31230689f..5233129506 100644
--- a/streaming/src/main/scala/spark/streaming/JobManager.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.Logging
-import spark.SparkEnv
+import org.apache.spark.Logging
+import org.apache.spark.SparkEnv
import java.util.concurrent.Executors
import collection.mutable.HashMap
import collection.mutable.ArrayBuffer
diff --git a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala
index d4cf2e568c..aae79a4e6f 100644
--- a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver}
-import spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError}
-import spark.Logging
-import spark.SparkEnv
-import spark.SparkContext._
+import org.apache.spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver}
+import org.apache.spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError}
+import org.apache.spark.Logging
+import org.apache.spark.SparkEnv
+import org.apache.spark.SparkContext._
import scala.collection.mutable.HashMap
import scala.collection.mutable.Queue
diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala
index 47bf07bee1..757bc98981 100644
--- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala
@@ -15,16 +15,17 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.StreamingContext._
-import spark.streaming.dstream.{ReducedWindowedDStream, StateDStream}
-import spark.streaming.dstream.{CoGroupedDStream, ShuffledDStream}
-import spark.streaming.dstream.{MapValuedDStream, FlatMapValuedDStream}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.{ReducedWindowedDStream, StateDStream}
+import org.apache.spark.streaming.dstream.{CoGroupedDStream, ShuffledDStream}
+import org.apache.spark.streaming.dstream.{MapValuedDStream, FlatMapValuedDStream}
-import spark.{Manifests, RDD, Partitioner, HashPartitioner}
-import spark.SparkContext._
-import spark.storage.StorageLevel
+import org.apache.spark.{Partitioner, HashPartitioner}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.{Manifests, RDD, PairRDDFunctions}
+import org.apache.spark.storage.StorageLevel
import scala.collection.mutable.ArrayBuffer
@@ -60,7 +61,7 @@ extends Serializable {
}
/**
- * Return a new DStream by applying `groupByKey` on each RDD. The supplied [[spark.Partitioner]]
+ * Return a new DStream by applying `groupByKey` on each RDD. The supplied [[org.apache.spark.Partitioner]]
* is used to control the partitioning of each RDD.
*/
def groupByKey(partitioner: Partitioner): DStream[(K, Seq[V])] = {
@@ -91,7 +92,7 @@ extends Serializable {
/**
* Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
- * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the
+ * merged using the supplied reduce function. [[org.apache.spark.Partitioner]] is used to control the
* partitioning of each RDD.
*/
def reduceByKey(reduceFunc: (V, V) => V, partitioner: Partitioner): DStream[(K, V)] = {
@@ -101,8 +102,8 @@ extends Serializable {
/**
* Combine elements of each key in DStream's RDDs using custom functions. This is similar to the
- * combineByKey for RDDs. Please refer to combineByKey in [[spark.PairRDDFunctions]] for more
- * information.
+ * combineByKey for RDDs. Please refer to combineByKey in
+ * [[org.apache.spark.rdd.PairRDDFunctions]] for more information.
*/
def combineByKey[C: ClassManifest](
createCombiner: V => C,
@@ -360,7 +361,7 @@ extends Serializable {
/**
* Create a new "state" DStream where the state for each key is updated by applying
* the given function on the previous state of the key and the new values of the key.
- * [[spark.Partitioner]] is used to control the partitioning of each RDD.
+ * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
* @param updateFunc State update function. If `this` function returns None, then
* corresponding state key-value pair will be eliminated.
* @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
@@ -379,7 +380,7 @@ extends Serializable {
/**
* Return a new "state" DStream where the state for each key is updated by applying
* the given function on the previous state of the key and the new values of each key.
- * [[spark.Paxrtitioner]] is used to control the partitioning of each RDD.
+ * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
* @param updateFunc State update function. If `this` function returns None, then
* corresponding state key-value pair will be eliminated. Note, that
* this function may generate a different a tuple with a different key
diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala
index 252cc2a303..ed892e33e6 100644
--- a/streaming/src/main/scala/spark/streaming/Scheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import util.{ManualClock, RecurringTimer, Clock}
-import spark.SparkEnv
-import spark.Logging
+import org.apache.spark.SparkEnv
+import org.apache.spark.Logging
private[streaming]
class Scheduler(ssc: StreamingContext) extends Logging {
@@ -34,7 +34,8 @@ class Scheduler(ssc: StreamingContext) extends Logging {
null
}
- val clockClass = System.getProperty("spark.streaming.clock", "spark.streaming.util.SystemClock")
+ val clockClass = System.getProperty(
+ "spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock]
val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
longTime => generateJobs(new Time(longTime)))
diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index ffd656227d..878725c705 100644
--- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -15,21 +15,22 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import akka.actor.Props
import akka.actor.SupervisorStrategy
import akka.zeromq.Subscribe
-import spark.streaming.dstream._
+import org.apache.spark.streaming.dstream._
-import spark._
-import spark.streaming.receivers.ActorReceiver
-import spark.streaming.receivers.ReceiverSupervisorStrategy
-import spark.streaming.receivers.ZeroMQReceiver
-import spark.storage.StorageLevel
-import spark.util.MetadataCleaner
-import spark.streaming.receivers.ActorReceiver
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.receivers.ActorReceiver
+import org.apache.spark.streaming.receivers.ReceiverSupervisorStrategy
+import org.apache.spark.streaming.receivers.ZeroMQReceiver
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming.receivers.ActorReceiver
import scala.collection.mutable.Queue
import scala.collection.Map
@@ -183,6 +184,7 @@ class StreamingContext private (
/**
* Create an input stream with any arbitrary user implemented network receiver.
+ * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
* @param receiver Custom implementation of NetworkReceiver
*/
def networkStream[T: ClassManifest](
@@ -195,6 +197,7 @@ class StreamingContext private (
/**
* Create an input stream with any arbitrary user implemented actor receiver.
+ * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
* @param props Props object defining creation of the actor
* @param name Name of the actor
* @param storageLevel RDD storage level. Defaults to memory-only.
diff --git a/streaming/src/main/scala/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
index ad5eab9dd2..2678334f53 100644
--- a/streaming/src/main/scala/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
/**
* This is a simple class that represents an absolute instant of time.
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
index 7dcb1d713d..d1932b6b05 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-package spark.streaming.api.java
+package org.apache.spark.streaming.api.java
-import spark.streaming.{Duration, Time, DStream}
-import spark.api.java.function.{Function => JFunction}
-import spark.api.java.JavaRDD
-import spark.storage.StorageLevel
-import spark.RDD
+import org.apache.spark.streaming.{Duration, Time, DStream}
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.rdd.RDD
/**
* A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
- * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]]
+ * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]]
* for more details on RDDs). DStreams can either be created from live data (such as, data from
* HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
* such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
@@ -33,7 +33,7 @@ import spark.RDD
* by a parent DStream.
*
* This class contains the basic operations available on all DStreams, such as `map`, `filter` and
- * `window`. In addition, [[spark.streaming.api.java.JavaPairDStream]] contains operations available
+ * `window`. In addition, [[org.apache.spark.streaming.api.java.JavaPairDStream]] contains operations available
* only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`.
*
* DStreams internally is characterized by a few basic properties:
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index 3ab5c1fdde..459695b7ca 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -15,18 +15,18 @@
* limitations under the License.
*/
-package spark.streaming.api.java
+package org.apache.spark.streaming.api.java
import java.util.{List => JList}
import java.lang.{Long => JLong}
import scala.collection.JavaConversions._
-import spark.streaming._
-import spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD}
-import spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
+import org.apache.spark.streaming._
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD}
+import org.apache.spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
import java.util
-import spark.RDD
+import org.apache.spark.rdd.RDD
import JavaDStream._
trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]]
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index ccd15563b0..978fca33ad 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -15,24 +15,25 @@
* limitations under the License.
*/
-package spark.streaming.api.java
+package org.apache.spark.streaming.api.java
import java.util.{List => JList}
import java.lang.{Long => JLong}
import scala.collection.JavaConversions._
-import spark.streaming._
-import spark.streaming.StreamingContext._
-import spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
-import spark.{RDD, Partitioner}
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
+import org.apache.spark.Partitioner
import org.apache.hadoop.mapred.{JobConf, OutputFormat}
import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.hadoop.conf.Configuration
-import spark.api.java.{JavaRDD, JavaPairRDD}
-import spark.storage.StorageLevel
+import org.apache.spark.api.java.{JavaUtils, JavaRDD, JavaPairRDD}
+import org.apache.spark.storage.StorageLevel
import com.google.common.base.Optional
-import spark.RDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.PairRDDFunctions
class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
implicit val kManifiest: ClassManifest[K],
@@ -114,7 +115,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
/**
* Return a new DStream by applying `groupByKey` on each RDD of `this` DStream.
* Therefore, the values for each key in `this` DStream's RDDs are grouped into a
- * single sequence to generate the RDDs of the new DStream. [[spark.Partitioner]]
+ * single sequence to generate the RDDs of the new DStream. [[org.apache.spark.Partitioner]]
* is used to control the partitioning of each RDD.
*/
def groupByKey(partitioner: Partitioner): JavaPairDStream[K, JList[V]] =
@@ -138,7 +139,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
/**
* Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
- * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the
+ * merged using the supplied reduce function. [[org.apache.spark.Partitioner]] is used to control the
* partitioning of each RDD.
*/
def reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner): JavaPairDStream[K, V] = {
@@ -147,7 +148,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
/**
* Combine elements of each key in DStream's RDDs using custom function. This is similar to the
- * combineByKey for RDDs. Please refer to combineByKey in [[spark.PairRDDFunctions]] for more
+ * combineByKey for RDDs. Please refer to combineByKey in [[PairRDDFunctions]] for more
* information.
*/
def combineByKey[C](createCombiner: JFunction[V, C],
@@ -401,10 +402,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
(Seq[V], Option[S]) => Option[S] = {
val scalaFunc: (Seq[V], Option[S]) => Option[S] = (values, state) => {
val list: JList[V] = values
- val scalaState: Optional[S] = state match {
- case Some(s) => Optional.of(s)
- case _ => Optional.absent()
- }
+ val scalaState: Optional[S] = JavaUtils.optionToOptional(state)
val result: Optional[S] = in.apply(list, scalaState)
result.isPresent match {
case true => Some(result.get())
@@ -448,7 +446,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
/**
* Create a new "state" DStream where the state for each key is updated by applying
* the given function on the previous state of the key and the new values of the key.
- * [[spark.Partitioner]] is used to control the partitioning of each RDD.
+ * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
* @param updateFunc State update function. If `this` function returns None, then
* corresponding state key-value pair will be eliminated.
* @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index b7720ad0ea..54ba3e6025 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -15,25 +15,29 @@
* limitations under the License.
*/
-package spark.streaming.api.java
-
-import spark.streaming._
-import receivers.{ActorReceiver, ReceiverSupervisorStrategy}
-import spark.streaming.dstream._
-import spark.storage.StorageLevel
-import spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
-import spark.api.java.{JavaSparkContext, JavaRDD}
+package org.apache.spark.streaming.api.java
+
+import java.lang.{Long => JLong, Integer => JInt}
+import java.io.InputStream
+import java.util.{Map => JMap}
+
+import scala.collection.JavaConversions._
+
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
import twitter4j.Status
import akka.actor.Props
import akka.actor.SupervisorStrategy
import akka.zeromq.Subscribe
-import scala.collection.JavaConversions._
-import java.lang.{Long => JLong, Integer => JInt}
-import java.io.InputStream
-import java.util.{Map => JMap}
import twitter4j.auth.Authorization
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
+import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.receivers.{ActorReceiver, ReceiverSupervisorStrategy}
+
/**
* A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
* information (such as, cluster URL and job name) to internally create a SparkContext, it provides
@@ -537,7 +541,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
def queueStream[T](queue: java.util.Queue[JavaRDD[T]]): JavaDStream[T] = {
implicit val cm: ClassManifest[T] =
implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
- val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]]
+ val sQueue = new scala.collection.mutable.Queue[RDD[T]]
sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
ssc.queueStream(sQueue)
}
@@ -554,7 +558,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
def queueStream[T](queue: java.util.Queue[JavaRDD[T]], oneAtATime: Boolean): JavaDStream[T] = {
implicit val cm: ClassManifest[T] =
implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
- val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]]
+ val sQueue = new scala.collection.mutable.Queue[RDD[T]]
sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
ssc.queueStream(sQueue, oneAtATime)
}
@@ -575,7 +579,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
defaultRDD: JavaRDD[T]): JavaDStream[T] = {
implicit val cm: ClassManifest[T] =
implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
- val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]]
+ val sQueue = new scala.collection.mutable.Queue[RDD[T]]
sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
ssc.queueStream(sQueue, oneAtATime, defaultRDD.rdd)
}
diff --git a/streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala
index 99553d295d..4eddc755b9 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala
@@ -15,11 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.{RDD, Partitioner}
-import spark.rdd.CoGroupedRDD
-import spark.streaming.{Time, DStream, Duration}
+import org.apache.spark.Partitioner
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.CoGroupedRDD
+import org.apache.spark.streaming.{Time, DStream, Duration}
private[streaming]
class CoGroupedDStream[K : ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
index 095137092a..a9a05c9981 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.streaming.{Time, StreamingContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Time, StreamingContext}
/**
* An input stream that always returns the same RDD on each timestep. Useful for testing.
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index de0536125d..fea0573b77 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.rdd.UnionRDD
-import spark.streaming.{DStreamCheckpointData, StreamingContext, Time}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.UnionRDD
+import org.apache.spark.streaming.{DStreamCheckpointData, StreamingContext, Time}
import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
import org.apache.hadoop.conf.Configuration
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala
index 9d8c5c3175..91ee2c1a36 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
private[streaming]
class FilteredDStream[T: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala
index 78d7117f0f..ca7d7ca49e 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
-import spark.SparkContext._
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
private[streaming]
class FlatMapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala
index d13bebb10f..b37966f9a7 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
private[streaming]
class FlatMappedDStream[T: ClassManifest, U: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala
index 4906f503c2..18de772946 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala
@@ -15,12 +15,13 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.StreamingContext
+import java.net.InetSocketAddress
+import java.io.{ObjectInput, ObjectOutput, Externalizable}
+import java.nio.ByteBuffer
-import spark.Utils
-import spark.storage.StorageLevel
+import scala.collection.JavaConversions._
import org.apache.flume.source.avro.AvroSourceProtocol
import org.apache.flume.source.avro.AvroFlumeEvent
@@ -28,11 +29,9 @@ import org.apache.flume.source.avro.Status
import org.apache.avro.ipc.specific.SpecificResponder
import org.apache.avro.ipc.NettyServer
-import scala.collection.JavaConversions._
-
-import java.net.InetSocketAddress
-import java.io.{ObjectInput, ObjectOutput, Externalizable}
-import java.nio.ByteBuffer
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.util.Utils
+import org.apache.spark.storage.StorageLevel
private[streaming]
class FlumeInputDStream[T: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
index 7df537eb56..e21bac4602 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.streaming.{Duration, DStream, Job, Time}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Duration, DStream, Job, Time}
private[streaming]
class ForEachDStream[T: ClassManifest] (
diff --git a/streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala
index 06fda6fe8e..4294b07d91 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
private[streaming]
class GlommedDStream[T: ClassManifest](parent: DStream[T])
diff --git a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 4dbdec459d..674b27118c 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Time, Duration, StreamingContext, DStream}
+import org.apache.spark.streaming.{Time, Duration, StreamingContext, DStream}
/**
* This is the abstract base class for all input streams. This class provides to methods
diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala
index 6ee588af15..51e913675d 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.Logging
-import spark.storage.StorageLevel
-import spark.streaming.{Time, DStreamCheckpointData, StreamingContext}
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Time, DStreamCheckpointData, StreamingContext}
import java.util.Properties
import java.util.concurrent.Executors
diff --git a/streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala
index af41a1b9ac..5329601a6f 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
private[streaming]
class MapPartitionedDStream[T: ClassManifest, U: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala
index 8d8a6161c6..8290df90a2 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
-import spark.SparkContext._
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
private[streaming]
class MapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala
index 3fda84a38a..b1682afea3 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
private[streaming]
class MappedDStream[T: ClassManifest, U: ClassManifest] (
diff --git a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala
index 344b41c4d0..31f9891560 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala
@@ -15,30 +15,29 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Time, StreamingContext, AddBlocks, RegisterReceiver, DeregisterReceiver}
-
-import spark.{Logging, SparkEnv, RDD}
-import spark.rdd.BlockRDD
-import spark.storage.StorageLevel
+import java.util.concurrent.ArrayBlockingQueue
+import java.nio.ByteBuffer
import scala.collection.mutable.ArrayBuffer
-import java.nio.ByteBuffer
-
import akka.actor.{Props, Actor}
import akka.pattern.ask
import akka.dispatch.Await
import akka.util.duration._
-import spark.streaming.util.{RecurringTimer, SystemClock}
-import java.util.concurrent.ArrayBlockingQueue
+
+import org.apache.spark.streaming.util.{RecurringTimer, SystemClock}
+import org.apache.spark.streaming._
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.rdd.{RDD, BlockRDD}
+import org.apache.spark.storage.StorageLevel
/**
* Abstract class for defining any InputDStream that has to start a receiver on worker
* nodes to receive external data. Specific implementations of NetworkInputDStream must
* define the getReceiver() function that gets the receiver object of type
- * [[spark.streaming.dstream.NetworkReceiver]] that will be sent to the workers to receive
+ * [[org.apache.spark.streaming.dstream.NetworkReceiver]] that will be sent to the workers to receive
* data.
* @param ssc_ Streaming context that will execute this input stream
* @tparam T Class type of the object of this stream
@@ -83,7 +82,7 @@ private[streaming] case class ReportError(msg: String) extends NetworkReceiverMe
/**
* Abstract class of a receiver that can be run on worker nodes to receive external data. See
- * [[spark.streaming.dstream.NetworkInputDStream]] for an explanation.
+ * [[org.apache.spark.streaming.dstream.NetworkInputDStream]] for an explanation.
*/
abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Logging {
@@ -145,8 +144,8 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log
}
/**
- * Stops the receiver and reports to exception to the tracker.
- * This should be called whenever an exception has happened on any thread
+ * Stops the receiver and reports exception to the tracker.
+ * This should be called whenever an exception is to be handled on any thread
* of the receiver.
*/
protected def stopOnError(e: Exception) {
@@ -202,7 +201,7 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log
}
/**
- * Batches objects created by a [[spark.streaming.NetworkReceiver]] and puts them into
+ * Batches objects created by a [[org.apache.spark.streaming.dstream.NetworkReceiver]] and puts them into
* appropriately named blocks at regular intervals. This class starts two threads,
* one to periodically start a new batch and prepare the previous batch of as a block,
* the other to push the blocks into the block manager.
diff --git a/streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala
index 33f7cd063f..15782f5c11 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext
private[streaming]
class PluggableInputDStream[T: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
index b269061b73..7d9f3521b1 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.rdd.UnionRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.UnionRDD
import scala.collection.mutable.Queue
import scala.collection.mutable.ArrayBuffer
-import spark.streaming.{Time, StreamingContext}
+import org.apache.spark.streaming.{Time, StreamingContext}
private[streaming]
class QueueInputDStream[T: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
index 236f74f575..c91f12ecd7 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.Logging
-import spark.storage.StorageLevel
-import spark.streaming.StreamingContext
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
import java.net.InetSocketAddress
import java.nio.ByteBuffer
diff --git a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
index 96260501ab..b88a4db959 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -15,18 +15,18 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._
-import spark.RDD
-import spark.rdd.{CoGroupedRDD, MapPartitionsRDD}
-import spark.Partitioner
-import spark.SparkContext._
-import spark.storage.StorageLevel
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{CoGroupedRDD, MapPartitionsRDD}
+import org.apache.spark.Partitioner
+import org.apache.spark.SparkContext._
+import org.apache.spark.storage.StorageLevel
import scala.collection.mutable.ArrayBuffer
-import spark.streaming.{Duration, Interval, Time, DStream}
+import org.apache.spark.streaming.{Duration, Interval, Time, DStream}
private[streaming]
class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala
index 83b57b27f7..a95e66d761 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala
@@ -15,11 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.{RDD, Partitioner}
-import spark.SparkContext._
-import spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.Partitioner
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+import org.apache.spark.streaming.{Duration, DStream, Time}
private[streaming]
class ShuffledDStream[K: ClassManifest, V: ClassManifest, C: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index 5877b10e0e..e2539c7396 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -15,11 +15,11 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.StreamingContext
-import spark.storage.StorageLevel
-import spark.util.NextIterator
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.NextIterator
import java.io._
import java.net.Socket
diff --git a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 4b46613d5e..362a6bf4cc 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.Partitioner
-import spark.SparkContext._
-import spark.storage.StorageLevel
-import spark.streaming.{Duration, Time, DStream}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.Partitioner
+import org.apache.spark.SparkContext._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Duration, Time, DStream}
private[streaming]
class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index e7fbc5bbcf..60485adef9 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
private[streaming]
class TransformedDStream[T: ClassManifest, U: ClassManifest] (
diff --git a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala
index f09a8b9f90..387e15b0e6 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark._
-import spark.streaming._
+import org.apache.spark._
+import org.apache.spark.streaming._
import storage.StorageLevel
import twitter4j._
import twitter4j.auth.Authorization
diff --git a/streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
index 3eaa9a7e7f..c696bb70a8 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.streaming.{Duration, DStream, Time}
-import spark.RDD
+import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.rdd.RDD
import collection.mutable.ArrayBuffer
-import spark.rdd.UnionRDD
+import org.apache.spark.rdd.UnionRDD
private[streaming]
class UnionDStream[T: ClassManifest](parents: Array[DStream[T]])
diff --git a/streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
index fd24d61730..3c57294269 100644
--- a/streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.dstream
+package org.apache.spark.streaming.dstream
-import spark.RDD
-import spark.rdd.UnionRDD
-import spark.storage.StorageLevel
-import spark.streaming.{Duration, Interval, Time, DStream}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.UnionRDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Duration, Interval, Time, DStream}
private[streaming]
class WindowedDStream[T: ClassManifest](
diff --git a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
index 2d9937eab8..4b5d8c467e 100644
--- a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.streaming.receivers
+package org.apache.spark.streaming.receivers
import akka.actor.{ Actor, PoisonPill, Props, SupervisorStrategy }
import akka.actor.{ actorRef2Scala, ActorRef }
import akka.actor.{ PossiblyHarmful, OneForOneStrategy }
-import spark.storage.StorageLevel
-import spark.streaming.dstream.NetworkReceiver
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.dstream.NetworkReceiver
import java.util.concurrent.atomic.AtomicInteger
@@ -45,6 +45,8 @@ object ReceiverSupervisorStrategy {
* A receiver trait to be mixed in with your Actor to gain access to
* pushBlock API.
*
+ * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
+ *
* @example {{{
* class MyActor extends Actor with Receiver{
* def receive {
diff --git a/streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
index 22d554e7e4..043bb8c8bf 100644
--- a/streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.receivers
+package org.apache.spark.streaming.receivers
import akka.actor.Actor
import akka.zeromq._
-import spark.Logging
+import org.apache.spark.Logging
/**
* A receiver to subscribe to ZeroMQ stream.
diff --git a/streaming/src/main/scala/spark/streaming/util/Clock.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
index d9ac722df5..f67bb2f6ac 100644
--- a/streaming/src/main/scala/spark/streaming/util/Clock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming.util
+package org.apache.spark.streaming.util
private[streaming]
trait Clock {
diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
index 8ce5d8daf5..6977957126 100644
--- a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
@@ -15,11 +15,12 @@
* limitations under the License.
*/
-package spark.streaming.util
+package org.apache.spark.streaming.util
-import spark.{Logging, RDD}
-import spark.streaming._
-import spark.streaming.dstream.ForEachDStream
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.dstream.ForEachDStream
import StreamingContext._
import scala.util.Random
diff --git a/streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index bf04120293..4e6ce6eabd 100644
--- a/streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming.util
+package org.apache.spark.streaming.util
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
import scala.collection.JavaConversions.mapAsScalaMap
diff --git a/streaming/src/main/scala/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
index 5cc6ad9dee..fc8655a083 100644
--- a/streaming/src/main/scala/spark/streaming/util/RawTextSender.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
@@ -15,15 +15,16 @@
* limitations under the License.
*/
-package spark.streaming.util
+package org.apache.spark.streaming.util
import java.nio.ByteBuffer
-import spark.util.{RateLimitedOutputStream, IntParam}
+import org.apache.spark.util.{RateLimitedOutputStream, IntParam}
import java.net.ServerSocket
-import spark.{Logging, KryoSerializer}
+import org.apache.spark.{Logging}
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
import scala.io.Source
import java.io.IOException
+import org.apache.spark.serializer.KryoSerializer
/**
* A helper program that sends blocks of Kryo-serialized text strings out on a socket at a
diff --git a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index 7ecc44236d..d644240405 100644
--- a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming.util
+package org.apache.spark.streaming.util
private[streaming]
class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) => Unit) {
diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 3b93790baa..c0d729ff87 100644
--- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming;
+package org.apache.spark.streaming;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
@@ -28,20 +28,20 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import scala.Tuple2;
-import spark.HashPartitioner;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaRDDLike;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.*;
-import spark.storage.StorageLevel;
-import spark.streaming.api.java.JavaDStream;
-import spark.streaming.api.java.JavaPairDStream;
-import spark.streaming.api.java.JavaStreamingContext;
-import spark.streaming.JavaTestUtils;
-import spark.streaming.JavaCheckpointTestUtils;
-import spark.streaming.InputStreamsSuite;
+import org.apache.spark.HashPartitioner;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaRDDLike;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.JavaTestUtils;
+import org.apache.spark.streaming.JavaCheckpointTestUtils;
+import org.apache.spark.streaming.InputStreamsSuite;
import java.io.*;
import java.util.*;
@@ -59,7 +59,7 @@ public class JavaAPISuite implements Serializable {
@Before
public void setUp() {
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock");
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
ssc.checkpoint("checkpoint");
}
diff --git a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
index f9d25db8da..8a6604904d 100644
--- a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
@@ -15,20 +15,21 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
import java.util.{List => JList}
-import spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext}
-import spark.streaming._
+import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext}
+import org.apache.spark.streaming._
import java.util.ArrayList
import collection.JavaConversions._
+import org.apache.spark.api.java.JavaRDDLike
/** Exposes streaming test functionality in a Java-friendly way. */
trait JavaTestBase extends TestSuiteBase {
/**
- * Create a [[spark.streaming.TestInputStream]] and attach it to the supplied context.
+ * Create a [[org.apache.spark.streaming.TestInputStream]] and attach it to the supplied context.
* The stream will be derived from the supplied lists of Java objects.
**/
def attachTestInputStream[T](
@@ -46,11 +47,11 @@ trait JavaTestBase extends TestSuiteBase {
/**
* Attach a provided stream to it's associated StreamingContext as a
- * [[spark.streaming.TestOutputStream]].
+ * [[org.apache.spark.streaming.TestOutputStream]].
**/
- def attachTestOutputStream[T, This <: spark.streaming.api.java.JavaDStreamLike[T, This, R],
- R <: spark.api.java.JavaRDDLike[T, R]](
- dstream: JavaDStreamLike[T, This, R]) = {
+ def attachTestOutputStream[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]](
+ dstream: JavaDStreamLike[T, This, R]) =
+ {
implicit val cm: ClassManifest[T] =
implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
val ostream = new TestOutputStream(dstream.dstream,
diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 67e3e0cd30..11586f72b6 100644
--- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._
import scala.runtime.RichInt
import util.ManualClock
@@ -26,7 +26,7 @@ class BasicOperationsSuite extends TestSuiteBase {
override def framework() = "BasicOperationsSuite"
before {
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
}
after {
diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 8c639648f0..a327de80b3 100644
--- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import dstream.FileInputDStream
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._
import java.io.File
import runtime.RichInt
import org.scalatest.BeforeAndAfter
@@ -36,7 +36,7 @@ import com.google.common.io.Files
*/
class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
before {
FileUtils.deleteDirectory(new File(checkpointDir))
@@ -63,7 +63,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
assert(batchDuration === Milliseconds(500), "batchDuration for this test must be 1 second")
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
val stateStreamCheckpointInterval = Seconds(1)
diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 7fc649fe27..6337c5359c 100644
--- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.Logging
-import spark.streaming.util.MasterFailureTest
+import org.apache.spark.Logging
+import org.apache.spark.streaming.util.MasterFailureTest
import StreamingContext._
import org.scalatest.{FunSuite, BeforeAndAfter}
diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 1c5419b16d..42e3e51e3f 100644
--- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
import akka.actor.Actor
import akka.actor.IO
@@ -29,9 +29,9 @@ import java.io.{File, BufferedWriter, OutputStreamWriter}
import java.util.concurrent.{TimeUnit, ArrayBlockingQueue}
import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
import util.ManualClock
-import spark.storage.StorageLevel
-import spark.streaming.receivers.Receiver
-import spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receivers.Receiver
+import org.apache.spark.Logging
import scala.util.Random
import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfter
@@ -52,7 +52,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
override def checkpointDir = "checkpoint"
before {
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
}
after {
@@ -207,7 +207,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
FileUtils.deleteDirectory(testDir)
// Enable manual clock back again for other tests
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
}
diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index cb34b5a7cc..37dd9c4cc6 100644
--- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -15,12 +15,10 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.dstream.{InputDStream, ForEachDStream}
-import spark.streaming.util.ManualClock
-
-import spark.{RDD, Logging}
+import org.apache.spark.streaming.dstream.{InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
import collection.mutable.ArrayBuffer
import collection.mutable.SynchronizedBuffer
@@ -29,6 +27,9 @@ import java.io.{ObjectInputStream, IOException}
import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+
/**
* This is a input stream just for the testsuites. This is equivalent to a checkpointable,
* replayable, reliable message queue like Kafka. It requires a sequence as input, and
diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
index 894b765fc6..f50e05c0d8 100644
--- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package spark.streaming
+package org.apache.spark.streaming
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._
import collection.mutable.ArrayBuffer
class WindowOperationsSuite extends TestSuiteBase {
- System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock")
+ System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
override def framework = "WindowOperationsSuite"
diff --git a/tools/pom.xml b/tools/pom.xml
new file mode 100644
index 0000000000..77646a6816
--- /dev/null
+++ b/tools/pom.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-tools</artifactId>
+ <packaging>jar</packaging>
+ <name>Spark Project Tools</name>
+ <url>http://spark.incubator.apache.org/</url>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-streaming</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala
new file mode 100644
index 0000000000..f824c472ae
--- /dev/null
+++ b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tools
+
+import java.lang.reflect.Method
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark._
+import org.apache.spark.api.java._
+import org.apache.spark.rdd.{RDD, DoubleRDDFunctions, PairRDDFunctions, OrderedRDDFunctions}
+import org.apache.spark.streaming.{PairDStreamFunctions, DStream, StreamingContext}
+import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream, JavaStreamingContext}
+
+
+private[spark] abstract class SparkType(val name: String)
+
+private[spark] case class BaseType(override val name: String) extends SparkType(name) {
+ override def toString: String = {
+ name
+ }
+}
+
+private[spark]
+case class ParameterizedType(override val name: String,
+ parameters: Seq[SparkType],
+ typebounds: String = "") extends SparkType(name) {
+ override def toString: String = {
+ if (typebounds != "") {
+ typebounds + " " + name + "<" + parameters.mkString(", ") + ">"
+ } else {
+ name + "<" + parameters.mkString(", ") + ">"
+ }
+ }
+}
+
+private[spark]
+case class SparkMethod(name: String, returnType: SparkType, parameters: Seq[SparkType]) {
+ override def toString: String = {
+ returnType + " " + name + "(" + parameters.mkString(", ") + ")"
+ }
+}
+
+/**
+ * A tool for identifying methods that need to be ported from Scala to the Java API.
+ *
+ * It uses reflection to find methods in the Scala API and rewrites those methods' signatures
+ * into appropriate Java equivalents. If those equivalent methods have not been implemented in
+ * the Java API, they are printed.
+ */
+object JavaAPICompletenessChecker {
+
+ private def parseType(typeStr: String): SparkType = {
+ if (!typeStr.contains("<")) {
+ // Base types might begin with "class" or "interface", so we have to strip that off:
+ BaseType(typeStr.trim.split(" ").last)
+ } else if (typeStr.endsWith("[]")) {
+ ParameterizedType("Array", Seq(parseType(typeStr.stripSuffix("[]"))))
+ } else {
+ val parts = typeStr.split("<", 2)
+ val name = parts(0).trim
+ assert (parts(1).last == '>')
+ val parameters = parts(1).dropRight(1)
+ ParameterizedType(name, parseTypeList(parameters))
+ }
+ }
+
+ private def parseTypeList(typeStr: String): Seq[SparkType] = {
+ val types: ArrayBuffer[SparkType] = new ArrayBuffer[SparkType]
+ var stack = 0
+ var token: StringBuffer = new StringBuffer()
+ for (c <- typeStr.trim) {
+ if (c == ',' && stack == 0) {
+ types += parseType(token.toString)
+ token = new StringBuffer()
+ } else if (c == ' ' && stack != 0) {
+ // continue
+ } else {
+ if (c == '<') {
+ stack += 1
+ } else if (c == '>') {
+ stack -= 1
+ }
+ token.append(c)
+ }
+ }
+ assert (stack == 0)
+ if (token.toString != "") {
+ types += parseType(token.toString)
+ }
+ types.toSeq
+ }
+
+ private def parseReturnType(typeStr: String): SparkType = {
+ if (typeStr(0) == '<') {
+ val parts = typeStr.drop(0).split(">", 2)
+ val parsed = parseType(parts(1)).asInstanceOf[ParameterizedType]
+ ParameterizedType(parsed.name, parsed.parameters, parts(0))
+ } else {
+ parseType(typeStr)
+ }
+ }
+
+ private def toSparkMethod(method: Method): SparkMethod = {
+ val returnType = parseReturnType(method.getGenericReturnType.toString)
+ val name = method.getName
+ val parameters = method.getGenericParameterTypes.map(t => parseType(t.toString))
+ SparkMethod(name, returnType, parameters)
+ }
+
+ private def toJavaType(scalaType: SparkType, isReturnType: Boolean): SparkType = {
+ val renameSubstitutions = Map(
+ "scala.collection.Map" -> "java.util.Map",
+ // TODO: the JavaStreamingContext API accepts Array arguments
+ // instead of Lists, so this isn't a trivial translation / sub:
+ "scala.collection.Seq" -> "java.util.List",
+ "scala.Function2" -> "org.apache.spark.api.java.function.Function2",
+ "scala.collection.Iterator" -> "java.util.Iterator",
+ "scala.collection.mutable.Queue" -> "java.util.Queue",
+ "double" -> "java.lang.Double"
+ )
+ // Keep applying the substitutions until we've reached a fixedpoint.
+ def applySubs(scalaType: SparkType): SparkType = {
+ scalaType match {
+ case ParameterizedType(name, parameters, typebounds) =>
+ name match {
+ case "org.apache.spark.rdd.RDD" =>
+ if (parameters(0).name == classOf[Tuple2[_, _]].getName) {
+ val tupleParams =
+ parameters(0).asInstanceOf[ParameterizedType].parameters.map(applySubs)
+ ParameterizedType(classOf[JavaPairRDD[_, _]].getName, tupleParams)
+ } else {
+ ParameterizedType(classOf[JavaRDD[_]].getName, parameters.map(applySubs))
+ }
+ case "org.apache.spark.streaming.DStream" =>
+ if (parameters(0).name == classOf[Tuple2[_, _]].getName) {
+ val tupleParams =
+ parameters(0).asInstanceOf[ParameterizedType].parameters.map(applySubs)
+ ParameterizedType("org.apache.spark.streaming.api.java.JavaPairDStream", tupleParams)
+ } else {
+ ParameterizedType("org.apache.spark.streaming.api.java.JavaDStream",
+ parameters.map(applySubs))
+ }
+ case "scala.Option" => {
+ if (isReturnType) {
+ ParameterizedType("com.google.common.base.Optional", parameters.map(applySubs))
+ } else {
+ applySubs(parameters(0))
+ }
+ }
+ case "scala.Function1" =>
+ val firstParamName = parameters.last.name
+ if (firstParamName.startsWith("scala.collection.Traversable") ||
+ firstParamName.startsWith("scala.collection.Iterator")) {
+ ParameterizedType("org.apache.spark.api.java.function.FlatMapFunction",
+ Seq(parameters(0),
+ parameters.last.asInstanceOf[ParameterizedType].parameters(0)).map(applySubs))
+ } else if (firstParamName == "scala.runtime.BoxedUnit") {
+ ParameterizedType("org.apache.spark.api.java.function.VoidFunction",
+ parameters.dropRight(1).map(applySubs))
+ } else {
+ ParameterizedType("org.apache.spark.api.java.function.Function", parameters.map(applySubs))
+ }
+ case _ =>
+ ParameterizedType(renameSubstitutions.getOrElse(name, name),
+ parameters.map(applySubs))
+ }
+ case BaseType(name) =>
+ if (renameSubstitutions.contains(name)) {
+ BaseType(renameSubstitutions(name))
+ } else {
+ scalaType
+ }
+ }
+ }
+ var oldType = scalaType
+ var newType = applySubs(scalaType)
+ while (oldType != newType) {
+ oldType = newType
+ newType = applySubs(scalaType)
+ }
+ newType
+ }
+
+ private def toJavaMethod(method: SparkMethod): SparkMethod = {
+ val params = method.parameters
+ .filterNot(_.name == "scala.reflect.ClassManifest")
+ .map(toJavaType(_, isReturnType = false))
+ SparkMethod(method.name, toJavaType(method.returnType, isReturnType = true), params)
+ }
+
+ private def isExcludedByName(method: Method): Boolean = {
+ val name = method.getDeclaringClass.getName + "." + method.getName
+ // Scala methods that are declared as private[mypackage] become public in the resulting
+ // Java bytecode. As a result, we need to manually exclude those methods here.
+ // This list also includes a few methods that are only used by the web UI or other
+ // internal Spark components.
+ val excludedNames = Seq(
+ "org.apache.spark.rdd.RDD.origin",
+ "org.apache.spark.rdd.RDD.elementClassManifest",
+ "org.apache.spark.rdd.RDD.checkpointData",
+ "org.apache.spark.rdd.RDD.partitioner",
+ "org.apache.spark.rdd.RDD.partitions",
+ "org.apache.spark.rdd.RDD.firstParent",
+ "org.apache.spark.rdd.RDD.doCheckpoint",
+ "org.apache.spark.rdd.RDD.markCheckpointed",
+ "org.apache.spark.rdd.RDD.clearDependencies",
+ "org.apache.spark.rdd.RDD.getDependencies",
+ "org.apache.spark.rdd.RDD.getPartitions",
+ "org.apache.spark.rdd.RDD.dependencies",
+ "org.apache.spark.rdd.RDD.getPreferredLocations",
+ "org.apache.spark.rdd.RDD.collectPartitions",
+ "org.apache.spark.rdd.RDD.computeOrReadCheckpoint",
+ "org.apache.spark.rdd.PairRDDFunctions.getKeyClass",
+ "org.apache.spark.rdd.PairRDDFunctions.getValueClass",
+ "org.apache.spark.SparkContext.stringToText",
+ "org.apache.spark.SparkContext.makeRDD",
+ "org.apache.spark.SparkContext.runJob",
+ "org.apache.spark.SparkContext.runApproximateJob",
+ "org.apache.spark.SparkContext.clean",
+ "org.apache.spark.SparkContext.metadataCleaner",
+ "org.apache.spark.SparkContext.ui",
+ "org.apache.spark.SparkContext.newShuffleId",
+ "org.apache.spark.SparkContext.newRddId",
+ "org.apache.spark.SparkContext.cleanup",
+ "org.apache.spark.SparkContext.receiverJobThread",
+ "org.apache.spark.SparkContext.getRDDStorageInfo",
+ "org.apache.spark.SparkContext.addedFiles",
+ "org.apache.spark.SparkContext.addedJars",
+ "org.apache.spark.SparkContext.persistentRdds",
+ "org.apache.spark.SparkContext.executorEnvs",
+ "org.apache.spark.SparkContext.checkpointDir",
+ "org.apache.spark.SparkContext.getSparkHome",
+ "org.apache.spark.SparkContext.executorMemoryRequested",
+ "org.apache.spark.SparkContext.getExecutorStorageStatus",
+ "org.apache.spark.streaming.DStream.generatedRDDs",
+ "org.apache.spark.streaming.DStream.zeroTime",
+ "org.apache.spark.streaming.DStream.rememberDuration",
+ "org.apache.spark.streaming.DStream.storageLevel",
+ "org.apache.spark.streaming.DStream.mustCheckpoint",
+ "org.apache.spark.streaming.DStream.checkpointDuration",
+ "org.apache.spark.streaming.DStream.checkpointData",
+ "org.apache.spark.streaming.DStream.graph",
+ "org.apache.spark.streaming.DStream.isInitialized",
+ "org.apache.spark.streaming.DStream.parentRememberDuration",
+ "org.apache.spark.streaming.DStream.initialize",
+ "org.apache.spark.streaming.DStream.validate",
+ "org.apache.spark.streaming.DStream.setContext",
+ "org.apache.spark.streaming.DStream.setGraph",
+ "org.apache.spark.streaming.DStream.remember",
+ "org.apache.spark.streaming.DStream.getOrCompute",
+ "org.apache.spark.streaming.DStream.generateJob",
+ "org.apache.spark.streaming.DStream.clearOldMetadata",
+ "org.apache.spark.streaming.DStream.addMetadata",
+ "org.apache.spark.streaming.DStream.updateCheckpointData",
+ "org.apache.spark.streaming.DStream.restoreCheckpointData",
+ "org.apache.spark.streaming.DStream.isTimeValid",
+ "org.apache.spark.streaming.StreamingContext.nextNetworkInputStreamId",
+ "org.apache.spark.streaming.StreamingContext.networkInputTracker",
+ "org.apache.spark.streaming.StreamingContext.checkpointDir",
+ "org.apache.spark.streaming.StreamingContext.checkpointDuration",
+ "org.apache.spark.streaming.StreamingContext.receiverJobThread",
+ "org.apache.spark.streaming.StreamingContext.scheduler",
+ "org.apache.spark.streaming.StreamingContext.initialCheckpoint",
+ "org.apache.spark.streaming.StreamingContext.getNewNetworkStreamId",
+ "org.apache.spark.streaming.StreamingContext.validate",
+ "org.apache.spark.streaming.StreamingContext.createNewSparkContext",
+ "org.apache.spark.streaming.StreamingContext.rddToFileName",
+ "org.apache.spark.streaming.StreamingContext.getSparkCheckpointDir",
+ "org.apache.spark.streaming.StreamingContext.env",
+ "org.apache.spark.streaming.StreamingContext.graph",
+ "org.apache.spark.streaming.StreamingContext.isCheckpointPresent"
+ )
+ val excludedPatterns = Seq(
+ """^org\.apache\.spark\.SparkContext\..*To.*Functions""",
+ """^org\.apache\.spark\.SparkContext\..*WritableConverter""",
+ """^org\.apache\.spark\.SparkContext\..*To.*Writable"""
+ ).map(_.r)
+ lazy val excludedByPattern =
+ !excludedPatterns.map(_.findFirstIn(name)).filter(_.isDefined).isEmpty
+ name.contains("$") || excludedNames.contains(name) || excludedByPattern
+ }
+
+ private def isExcludedByInterface(method: Method): Boolean = {
+ val excludedInterfaces =
+ Set("org.apache.spark.Logging", "org.apache.hadoop.mapreduce.HadoopMapReduceUtil")
+ def toComparisionKey(method: Method) =
+ (method.getReturnType, method.getName, method.getGenericReturnType)
+ val interfaces = method.getDeclaringClass.getInterfaces.filter { i =>
+ excludedInterfaces.contains(i.getName)
+ }
+ val excludedMethods = interfaces.flatMap(_.getMethods.map(toComparisionKey))
+ excludedMethods.contains(toComparisionKey(method))
+ }
+
+ private def printMissingMethods(scalaClass: Class[_], javaClass: Class[_]) {
+ val methods = scalaClass.getMethods
+ .filterNot(_.isAccessible)
+ .filterNot(isExcludedByName)
+ .filterNot(isExcludedByInterface)
+ val javaEquivalents = methods.map(m => toJavaMethod(toSparkMethod(m))).toSet
+
+ val javaMethods = javaClass.getMethods.map(toSparkMethod).toSet
+
+ val missingMethods = javaEquivalents -- javaMethods
+
+ for (method <- missingMethods) {
+ println(method)
+ }
+ }
+
+ def main(args: Array[String]) {
+ println("Missing RDD methods")
+ printMissingMethods(classOf[RDD[_]], classOf[JavaRDD[_]])
+ println()
+
+ println("Missing PairRDD methods")
+ printMissingMethods(classOf[PairRDDFunctions[_, _]], classOf[JavaPairRDD[_, _]])
+ println()
+
+ println("Missing DoubleRDD methods")
+ printMissingMethods(classOf[DoubleRDDFunctions], classOf[JavaDoubleRDD])
+ println()
+
+ println("Missing OrderedRDD methods")
+ printMissingMethods(classOf[OrderedRDDFunctions[_, _, _]], classOf[JavaPairRDD[_, _]])
+ println()
+
+ println("Missing SparkContext methods")
+ printMissingMethods(classOf[SparkContext], classOf[JavaSparkContext])
+ println()
+
+ println("Missing StreamingContext methods")
+ printMissingMethods(classOf[StreamingContext], classOf[JavaStreamingContext])
+ println()
+
+ println("Missing DStream methods")
+ printMissingMethods(classOf[DStream[_]], classOf[JavaDStream[_]])
+ println()
+
+ println("Missing PairDStream methods")
+ printMissingMethods(classOf[PairDStreamFunctions[_, _]], classOf[JavaPairDStream[_, _]])
+ println()
+ }
+}
diff --git a/yarn/pom.xml b/yarn/pom.xml
new file mode 100644
index 0000000000..fcacdac9eb
--- /dev/null
+++ b/yarn/pom.xml
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
+ <packaging>jar</packaging>
+ <name>Spark Project YARN Support</name>
+ <url>http://spark.incubator.apache.org/</url>
+
+ <build>
+ <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>hadoop2-yarn</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-ipc</artifactId>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+</project>
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 1b06169739..139a977a03 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
import java.net.Socket
import java.util.concurrent.CopyOnWriteArrayList
@@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.ipc.YarnRPC
import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
import scala.collection.JavaConversions._
-import spark.{SparkContext, Logging, Utils}
+import org.apache.spark.{SparkContext, Logging, Utils}
import org.apache.hadoop.security.UserGroupInformation
import java.security.PrivilegedExceptionAction
@@ -47,6 +47,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
private var isFinished:Boolean = false
def run() {
+ // setup the directories so things go to yarn approved directories rather
+ // then user specified and /tmp
+ System.setProperty("spark.local.dir", getLocalDirs())
appAttemptId = getApplicationAttemptId()
resourceManager = registerWithResourceManager()
@@ -89,6 +92,21 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
System.exit(0)
}
+
+ /** Get the Yarn approved local directories. */
+ private def getLocalDirs(): String = {
+ // Hadoop 0.23 and 2.x have different Environment variable names for the
+ // local dirs, so lets check both. We assume one of the 2 is set.
+ // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+ val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
+ .getOrElse(Option(System.getenv("LOCAL_DIRS"))
+ .getOrElse(""))
+
+ if (localDirs.isEmpty()) {
+ throw new Exception("Yarn Local dirs can't be empty")
+ }
+ return localDirs
+ }
private def getApplicationAttemptId(): ApplicationAttemptId = {
val envs = System.getenv()
@@ -124,18 +142,20 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
private def waitForSparkMaster() {
logInfo("Waiting for spark driver to be reachable.")
var driverUp = false
- while(!driverUp) {
+ var tries = 0
+ while(!driverUp && tries < 10) {
val driverHost = System.getProperty("spark.driver.host")
val driverPort = System.getProperty("spark.driver.port")
try {
val socket = new Socket(driverHost, driverPort.toInt)
socket.close()
- logInfo("Master now available: " + driverHost + ":" + driverPort)
+ logInfo("Driver now available: " + driverHost + ":" + driverPort)
driverUp = true
} catch {
case e: Exception =>
- logError("Failed to connect to driver at " + driverHost + ":" + driverPort)
+ logWarning("Failed to connect to driver at " + driverHost + ":" + driverPort + ", retrying")
Thread.sleep(100)
+ tries = tries + 1
}
}
}
@@ -176,7 +196,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
var sparkContext: SparkContext = null
ApplicationMaster.sparkContextRef.synchronized {
var count = 0
- while (ApplicationMaster.sparkContextRef.get() == null) {
+ while (ApplicationMaster.sparkContextRef.get() == null && count < 10) {
logInfo("Waiting for spark context initialization ... " + count)
count = count + 1
ApplicationMaster.sparkContextRef.wait(10000L)
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index 8de44b1f66..f47e23b63f 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
-import spark.util.IntParam
+import org.apache.spark.util.IntParam
import collection.mutable.ArrayBuffer
class ApplicationMasterArguments(val args: Array[String]) {
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 8bcbfc2735..48e737ed79 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
import java.net.{InetSocketAddress, URI}
import java.nio.ByteBuffer
@@ -33,10 +33,10 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.ipc.YarnRPC
import scala.collection.mutable.HashMap
import scala.collection.JavaConversions._
-import spark.{Logging, Utils}
+import org.apache.spark.{Logging, Utils}
import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils}
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.SparkHadoopUtil
class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging {
@@ -165,7 +165,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
Apps.addToEnvironment(env, Environment.CLASSPATH.name, "./*")
Apps.addToEnvironment(env, Environment.CLASSPATH.name, "$CLASSPATH")
Client.populateHadoopClasspath(yarnConf, env)
- SparkHadoopUtil.setYarnMode(env)
+ env("SPARK_YARN_MODE") = "true"
env("SPARK_YARN_JAR_PATH") =
localResources("spark.jar").getResource().getScheme.toString() + "://" +
localResources("spark.jar").getResource().getFile().toString()
@@ -185,6 +185,8 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
env("SPARK_YARN_LOG4J_SIZE") = log4jConfLocalRes.getSize().toString()
}
+ // allow users to specify some environment variables
+ Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
// Add each SPARK-* key to the environment
System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
@@ -221,6 +223,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
// Add Xmx for am memory
JAVA_OPTS += "-Xmx" + amMemory + "m "
+ JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(),
+ YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
+
+
// Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out.
// The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same
// node, spark gc effects all other containers performance (which can also be other spark containers)
@@ -241,7 +247,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
// Command for the ApplicationMaster
var javaCommand = "java";
val javaHome = System.getenv("JAVA_HOME")
- if (javaHome != null && !javaHome.isEmpty()) {
+ if ((javaHome != null && !javaHome.isEmpty()) || env.isDefinedAt("JAVA_HOME")) {
javaCommand = Environment.JAVA_HOME.$() + "/bin/java"
}
@@ -313,8 +319,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
object Client {
def main(argStrings: Array[String]) {
+ // Set an env variable indicating we are running in YARN mode.
+ // Note that anything with SPARK prefix gets propagated to all (remote) processes
+ System.setProperty("SPARK_YARN_MODE", "true")
+
val args = new ClientArguments(argStrings)
- SparkHadoopUtil.setYarnMode()
new Client(args).run
}
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 67aff03781..6cbfadc23b 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
-import spark.util.MemoryParam
-import spark.util.IntParam
+import org.apache.spark.util.MemoryParam
+import org.apache.spark.util.IntParam
import collection.mutable.{ArrayBuffer, HashMap}
-import spark.scheduler.{InputFormatInfo, SplitInfo}
+import org.apache.spark.scheduler.{InputFormatInfo, SplitInfo}
// TODO: Add code and support for ensuring that yarn resource 'asks' are location aware !
class ClientArguments(val args: Array[String]) {
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
index f458f2f6a1..72dcf7178e 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
import java.net.URI
import java.nio.ByteBuffer
@@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
import scala.collection.JavaConversions._
import scala.collection.mutable.HashMap
-import spark.{Logging, Utils}
+import org.apache.spark.{Logging, Utils}
class WorkerRunnable(container: Container, conf: Configuration, masterAddress: String,
slaveId: String, hostname: String, workerMemory: Int, workerCores: Int)
@@ -75,6 +75,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S
if (env.isDefinedAt("SPARK_JAVA_OPTS")) {
JAVA_OPTS += env("SPARK_JAVA_OPTS") + " "
}
+
+ JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(),
+ YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
+
// Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out.
// The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same
// node, spark gc effects all other containers performance (which can also be other spark containers)
@@ -104,7 +108,7 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S
var javaCommand = "java";
val javaHome = System.getenv("JAVA_HOME")
- if (javaHome != null && !javaHome.isEmpty()) {
+ if ((javaHome != null && !javaHome.isEmpty()) || env.isDefinedAt("JAVA_HOME")) {
javaCommand = Environment.JAVA_HOME.$() + "/bin/java"
}
@@ -115,7 +119,7 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S
// TODO: If the OOM is not recoverable by rescheduling it on different node, then do 'something' to fail job ... akin to blacklisting trackers in mapred ?
" -XX:OnOutOfMemoryError='kill %p' " +
JAVA_OPTS +
- " spark.executor.StandaloneExecutorBackend " +
+ " org.apache.spark.executor.StandaloneExecutorBackend " +
masterAddress + " " +
slaveId + " " +
hostname + " " +
@@ -187,6 +191,9 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S
Apps.addToEnvironment(env, Environment.CLASSPATH.name, "$CLASSPATH")
Client.populateHadoopClasspath(yarnConf, env)
+ // allow users to specify some environment variables
+ Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
+
System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
return env
}
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index b0af8baf08..26ff214e12 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -15,13 +15,13 @@
* limitations under the License.
*/
-package spark.deploy.yarn
+package org.apache.spark.deploy.yarn
-import spark.{Logging, Utils}
-import spark.scheduler.SplitInfo
+import org.apache.spark.{Logging, Utils}
+import org.apache.spark.scheduler.SplitInfo
import scala.collection
import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId, ContainerId, Priority, Resource, ResourceRequest, ContainerStatus, Container}
-import spark.scheduler.cluster.{ClusterScheduler, StandaloneSchedulerBackend}
+import org.apache.spark.scheduler.cluster.{ClusterScheduler, StandaloneSchedulerBackend}
import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResponse}
import org.apache.hadoop.yarn.util.{RackResolver, Records}
import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap}
diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 6122fdced0..ca2f1e2565 100644
--- a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -15,8 +15,9 @@
* limitations under the License.
*/
-package spark.deploy
+package org.apache.spark.deploy.yarn
+import org.apache.spark.deploy.SparkHadoopUtil
import collection.mutable.HashMap
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.security.UserGroupInformation
@@ -28,48 +29,17 @@ import java.security.PrivilegedExceptionAction
/**
* Contains util methods to interact with Hadoop from spark.
*/
-object SparkHadoopUtil {
-
- val yarnConf = newConfiguration()
-
- def getUserNameFromEnvironment(): String = {
- // defaulting to env if -D is not present ...
- val retval = System.getProperty(Environment.USER.name, System.getenv(Environment.USER.name))
-
- // If nothing found, default to user we are running as
- if (retval == null) System.getProperty("user.name") else retval
- }
-
- def runAsUser(func: (Product) => Unit, args: Product) {
- runAsUser(func, args, getUserNameFromEnvironment())
- }
-
- def runAsUser(func: (Product) => Unit, args: Product, user: String) {
- func(args)
- }
+class YarnSparkHadoopUtil extends SparkHadoopUtil {
// Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true.
- def isYarnMode(): Boolean = {
- val yarnMode = System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))
- java.lang.Boolean.valueOf(yarnMode)
- }
-
- // Set an env variable indicating we are running in YARN mode.
- // Note that anything with SPARK prefix gets propagated to all (remote) processes
- def setYarnMode() {
- System.setProperty("SPARK_YARN_MODE", "true")
- }
-
- def setYarnMode(env: HashMap[String, String]) {
- env("SPARK_YARN_MODE") = "true"
- }
+ override def isYarnMode(): Boolean = { true }
// Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
// Always create a new config, dont reuse yarnConf.
- def newConfiguration(): Configuration = new YarnConfiguration(new Configuration())
+ override def newConfiguration(): Configuration = new YarnConfiguration(new Configuration())
// add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
- def addCredentials(conf: JobConf) {
+ override def addCredentials(conf: JobConf) {
val jobCreds = conf.getCredentials();
jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
}
diff --git a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 307d96111c..3828ddfc4f 100644
--- a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -15,10 +15,10 @@
* limitations under the License.
*/
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
-import spark._
-import spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
+import org.apache.spark._
+import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
import org.apache.hadoop.conf.Configuration
/**
@@ -41,13 +41,6 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration)
if (retval != null) Some(retval) else None
}
- // By default, if rack is unknown, return nothing
- override def getCachedHostsForRack(rack: String): Option[Set[String]] = {
- if (rack == None || rack == null) return None
-
- YarnAllocationHandler.fetchCachedHostsForRack(rack)
- }
-
override def postStartHook() {
val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc)
if (sparkContextInitialized){