diff options
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | README.md | 77 | ||||
-rw-r--r-- | assembly/README | 13 | ||||
-rw-r--r-- | assembly/lib/PY4J_LICENSE.txt | 27 | ||||
-rw-r--r-- | assembly/lib/PY4J_VERSION.txt | 1 | ||||
-rw-r--r-- | assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar (renamed from python/lib/py4j0.7.jar) | bin | 103286 -> 103286 bytes | |||
-rw-r--r-- | assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom | 9 | ||||
-rw-r--r-- | assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml | 12 | ||||
-rw-r--r-- | assembly/pom.xml | 159 | ||||
-rw-r--r-- | assembly/src/main/assembly/assembly.xml | 85 | ||||
-rw-r--r-- | bagel/pom.xml | 111 | ||||
-rw-r--r-- | bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala (renamed from bagel/src/main/scala/spark/bagel/Bagel.scala) | 37 | ||||
-rw-r--r-- | bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala (renamed from bagel/src/test/scala/bagel/BagelSuite.scala) | 10 | ||||
-rw-r--r-- | bin/compute-classpath.cmd | 4 | ||||
-rwxr-xr-x | bin/compute-classpath.sh | 90 | ||||
-rwxr-xr-x | bin/slaves.sh | 4 | ||||
-rwxr-xr-x | bin/spark-daemon.sh | 24 | ||||
-rwxr-xr-x | bin/start-master.sh | 2 | ||||
-rwxr-xr-x | bin/start-slave.sh | 2 | ||||
-rwxr-xr-x | bin/start-slaves.sh | 2 | ||||
-rwxr-xr-x | bin/stop-all.sh | 1 | ||||
-rwxr-xr-x | bin/stop-master.sh | 2 | ||||
-rwxr-xr-x | bin/stop-slaves.sh | 4 | ||||
-rw-r--r-- | conf/metrics.properties.template | 90 | ||||
-rw-r--r-- | conf/slaves | 2 | ||||
-rwxr-xr-x | conf/spark-env.sh.template | 20 | ||||
-rw-r--r-- | core/pom.xml | 228 | ||||
-rw-r--r-- | core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala | 30 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileClient.java (renamed from core/src/main/java/spark/network/netty/FileClient.java) | 2 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java (renamed from core/src/main/java/spark/network/netty/FileClientChannelInitializer.java) | 2 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java (renamed from core/src/main/java/spark/network/netty/FileClientHandler.java) | 2 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileServer.java (renamed from core/src/main/java/spark/network/netty/FileServer.java) | 2 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java (renamed from core/src/main/java/spark/network/netty/FileServerChannelInitializer.java) | 2 | ||||
-rw-r--r-- | core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java (renamed from core/src/main/java/spark/network/netty/FileServerHandler.java) | 2 | ||||
-rwxr-xr-x | core/src/main/java/org/apache/spark/network/netty/PathResolver.java (renamed from core/src/main/java/spark/network/netty/PathResolver.java) | 2 | ||||
-rwxr-xr-x | core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css | 874 | ||||
-rw-r--r-- | core/src/main/resources/org/apache/spark/ui/static/sorttable.js (renamed from core/src/main/resources/spark/ui/static/sorttable.js) | 0 | ||||
-rw-r--r-- | core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png (renamed from core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png) | bin | 3536 -> 3536 bytes | |||
-rw-r--r-- | core/src/main/resources/org/apache/spark/ui/static/spark_logo.png (renamed from core/src/main/resources/spark/ui/static/spark_logo.png) | bin | 14233 -> 14233 bytes | |||
-rw-r--r-- | core/src/main/resources/org/apache/spark/ui/static/webui.css | 63 | ||||
-rw-r--r-- | core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css | 9 | ||||
-rw-r--r-- | core/src/main/resources/spark/ui/static/bootstrap.min.css | 9 | ||||
-rw-r--r-- | core/src/main/resources/spark/ui/static/webui.css | 49 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala | 45 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala | 69 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Accumulators.scala (renamed from core/src/main/scala/spark/Accumulators.scala) | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Aggregator.scala (renamed from core/src/main/scala/spark/Aggregator.scala) | 20 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala (renamed from core/src/main/scala/spark/BlockStoreShuffleFetcher.scala) | 21 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/CacheManager.scala (renamed from core/src/main/scala/spark/CacheManager.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Dependency.scala (renamed from core/src/main/scala/spark/Dependency.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/FetchFailedException.scala (renamed from core/src/main/scala/spark/FetchFailedException.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/HttpFileServer.scala (renamed from core/src/main/scala/spark/HttpFileServer.scala) | 3 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/HttpServer.scala (renamed from core/src/main/scala/spark/HttpServer.scala) | 3 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Logging.scala (renamed from core/src/main/scala/spark/Logging.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/MapOutputTracker.scala (renamed from core/src/main/scala/spark/MapOutputTracker.scala) | 70 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Partition.scala (renamed from core/src/main/scala/spark/Partition.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/Partitioner.scala (renamed from core/src/main/scala/spark/Partitioner.scala) | 25 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SerializableWritable.scala (renamed from core/src/main/scala/spark/SerializableWritable.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ShuffleFetcher.scala (renamed from core/src/main/scala/spark/ShuffleFetcher.scala) | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkContext.scala (renamed from core/src/main/scala/spark/SparkContext.scala) | 134 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkEnv.scala (renamed from core/src/main/scala/spark/SparkEnv.scala) | 78 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkException.scala (renamed from core/src/main/scala/spark/SparkException.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkFiles.java (renamed from core/src/main/scala/spark/SparkFiles.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala (renamed from core/src/main/scala/spark/HadoopWriter.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/TaskContext.scala (renamed from core/src/main/scala/spark/TaskContext.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/TaskEndReason.scala (renamed from core/src/main/scala/spark/TaskEndReason.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/TaskState.scala (renamed from core/src/main/scala/spark/TaskState.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaDoubleRDD.scala) | 51 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaPairRDD.scala) | 70 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala (renamed from core/src/main/scala/spark/api/java/JavaRDD.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala (renamed from core/src/main/scala/spark/api/java/JavaRDDLike.scala) | 29 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala (renamed from core/src/main/scala/spark/api/java/JavaSparkContext.scala) | 30 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java (renamed from core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala | 28 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/StorageLevels.java (renamed from core/src/main/scala/spark/api/java/StorageLevels.java) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java (renamed from core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java (renamed from core/src/main/scala/spark/api/java/function/DoubleFunction.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala (renamed from core/src/main/scala/spark/api/java/function/FlatMapFunction.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala (renamed from core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/Function.java (renamed from core/src/main/scala/spark/api/java/function/Function.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/Function2.java (renamed from core/src/main/scala/spark/api/java/function/Function2.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java (renamed from core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java (renamed from core/src/main/scala/spark/api/java/function/PairFunction.java) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala (renamed from core/src/main/scala/spark/api/java/function/VoidFunction.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala (renamed from core/src/main/scala/spark/api/java/function/WrappedFunction1.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala (renamed from core/src/main/scala/spark/api/java/function/WrappedFunction2.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala (renamed from core/src/main/scala/spark/api/python/PythonPartitioner.scala) | 31 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala (renamed from core/src/main/scala/spark/api/python/PythonRDD.scala) | 87 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala (renamed from core/src/main/scala/spark/api/python/PythonWorkerFactory.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala (renamed from core/src/main/scala/spark/broadcast/Broadcast.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala (renamed from core/src/main/scala/spark/broadcast/BroadcastFactory.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/HttpBroadcast.scala) | 40 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala (renamed from core/src/main/scala/spark/broadcast/MultiTracker.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala (renamed from core/src/main/scala/spark/broadcast/SourceInfo.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala (renamed from core/src/main/scala/spark/broadcast/TreeBroadcast.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala (renamed from core/src/main/scala/spark/deploy/ApplicationDescription.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/Command.scala (renamed from core/src/main/scala/spark/deploy/Command.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala | 130 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala (renamed from core/src/main/scala/spark/deploy/ExecutorState.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala (renamed from core/src/main/scala/spark/deploy/JsonProtocol.scala) | 16 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala (renamed from core/src/main/scala/spark/deploy/LocalSparkCluster.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala (renamed from core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala) | 18 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/WebUI.scala (renamed from core/src/main/scala/spark/deploy/WebUI.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/client/Client.scala (renamed from core/src/main/scala/spark/deploy/client/Client.scala) | 23 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala (renamed from core/src/main/scala/spark/deploy/client/ClientListener.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala (renamed from core/src/main/scala/spark/deploy/client/TestClient.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala (renamed from core/src/main/scala/spark/deploy/client/TestExecutor.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala (renamed from core/src/main/scala/spark/deploy/master/ApplicationInfo.scala) | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala | 24 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala (renamed from core/src/main/scala/spark/deploy/master/ApplicationState.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala (renamed from core/src/main/scala/spark/deploy/master/ExecutorInfo.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/Master.scala (renamed from core/src/main/scala/spark/deploy/master/Master.scala) | 97 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala (renamed from core/src/main/scala/spark/deploy/master/MasterArguments.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala | 25 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala (renamed from core/src/main/scala/spark/deploy/master/WorkerInfo.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala (renamed from core/src/main/scala/spark/deploy/master/WorkerState.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala (renamed from core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala) | 41 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala (renamed from core/src/main/scala/spark/deploy/master/ui/IndexPage.scala) | 67 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala (renamed from core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala) | 27 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala (renamed from core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala) | 50 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala (renamed from core/src/main/scala/spark/deploy/worker/Worker.scala) | 47 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala (renamed from core/src/main/scala/spark/deploy/worker/WorkerArguments.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala | 34 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala (renamed from core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala) | 46 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala (renamed from core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala) | 41 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/Executor.scala (renamed from core/src/main/scala/spark/executor/Executor.scala) | 73 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/ExecutorBackend.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala (renamed from core/src/main/scala/spark/executor/ExecutorExitCode.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala | 55 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala (renamed from core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/MesosExecutorBackend.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala (renamed from core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala) | 33 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala (renamed from core/src/main/scala/spark/executor/TaskMetrics.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/io/CompressionCodec.scala | 82 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala | 100 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala | 163 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala | 59 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala | 68 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala (renamed from core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala) | 20 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala | 55 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala (renamed from core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala) | 17 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/metrics/source/Source.scala | 25 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/BufferMessage.scala (renamed from core/src/main/scala/spark/network/BufferMessage.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/Connection.scala (renamed from core/src/main/scala/spark/network/Connection.scala) | 50 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/ConnectionManager.scala (renamed from core/src/main/scala/spark/network/ConnectionManager.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala (renamed from core/src/main/scala/spark/network/ConnectionManagerId.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala (renamed from core/src/main/scala/spark/network/ConnectionManagerTest.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/Message.scala (renamed from core/src/main/scala/spark/network/Message.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/MessageChunk.scala (renamed from core/src/main/scala/spark/network/MessageChunk.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala (renamed from core/src/main/scala/spark/network/MessageChunkHeader.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/ReceiverTest.scala (renamed from core/src/main/scala/spark/network/ReceiverTest.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/SenderTest.scala (renamed from core/src/main/scala/spark/network/SenderTest.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala (renamed from core/src/main/scala/spark/network/netty/FileHeader.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala (renamed from core/src/main/scala/spark/network/netty/ShuffleCopier.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala (renamed from core/src/main/scala/spark/network/netty/ShuffleSender.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/package.scala | 35 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala (renamed from core/src/main/scala/spark/partial/ApproximateActionListener.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala (renamed from core/src/main/scala/spark/partial/ApproximateEvaluator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala (renamed from core/src/main/scala/spark/partial/BoundedDouble.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala (renamed from core/src/main/scala/spark/partial/CountEvaluator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedCountEvaluator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala (renamed from core/src/main/scala/spark/partial/GroupedSumEvaluator.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala (renamed from core/src/main/scala/spark/partial/MeanEvaluator.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/PartialResult.scala (renamed from core/src/main/scala/spark/partial/PartialResult.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala (renamed from core/src/main/scala/spark/partial/StudentTCacher.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala (renamed from core/src/main/scala/spark/partial/SumEvaluator.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala (renamed from core/src/main/scala/spark/rdd/BlockRDD.scala) | 13 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala (renamed from core/src/main/scala/spark/rdd/CartesianRDD.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala (renamed from core/src/main/scala/spark/rdd/CheckpointRDD.scala) | 20 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala (renamed from core/src/main/scala/spark/rdd/CoGroupedRDD.scala) | 60 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala | 342 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala (renamed from core/src/main/scala/spark/DoubleRDDFunctions.scala) | 25 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala (renamed from core/src/main/scala/spark/rdd/EmptyRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala (renamed from core/src/main/scala/spark/rdd/FilteredRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala (renamed from core/src/main/scala/spark/rdd/FlatMappedRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala (renamed from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala) | 22 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala (renamed from core/src/main/scala/spark/rdd/GlommedRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala (renamed from core/src/main/scala/spark/rdd/HadoopRDD.scala) | 21 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala (renamed from core/src/main/scala/spark/rdd/JdbcRDD.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala (renamed from core/src/main/scala/spark/rdd/MapPartitionsRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala (renamed from core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala (renamed from core/src/main/scala/spark/rdd/MappedRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala (renamed from core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala) | 19 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala (renamed from core/src/main/scala/spark/rdd/NewHadoopRDD.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala | 52 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala (renamed from core/src/main/scala/spark/PairRDDFunctions.scala) | 204 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala (renamed from core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala) | 67 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala (renamed from core/src/main/scala/spark/rdd/PartitionPruningRDD.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala (renamed from core/src/main/scala/spark/rdd/PipedRDD.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala (renamed from core/src/main/scala/spark/RDD.scala) | 93 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala (renamed from core/src/main/scala/spark/RDDCheckpointData.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala (renamed from core/src/main/scala/spark/rdd/SampledRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala (renamed from core/src/main/scala/spark/SequenceFileRDDFunctions.scala) | 26 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala (renamed from core/src/main/scala/spark/rdd/ShuffledRDD.scala) | 35 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala (renamed from core/src/main/scala/spark/rdd/SubtractedRDD.scala) | 47 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala (renamed from core/src/main/scala/spark/rdd/UnionRDD.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala (renamed from core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala) | 34 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala (renamed from core/src/main/scala/spark/rdd/ZippedRDD.scala) | 27 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala (renamed from core/src/main/scala/spark/scheduler/ActiveJob.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala (renamed from core/src/main/scala/spark/scheduler/DAGScheduler.scala) | 294 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala (renamed from core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala) | 15 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala | 30 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala (renamed from core/src/main/scala/spark/scheduler/InputFormatInfo.scala) | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/JobListener.scala (renamed from core/src/main/scala/spark/scheduler/JobListener.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala (renamed from core/src/main/scala/spark/scheduler/JobLogger.scala) | 108 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/JobResult.scala (renamed from core/src/main/scala/spark/scheduler/JobResult.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala (renamed from core/src/main/scala/spark/scheduler/JobWaiter.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala (renamed from core/src/main/scala/spark/scheduler/MapStatus.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala (renamed from core/src/main/scala/spark/scheduler/ResultTask.scala) | 38 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala (renamed from core/src/main/scala/spark/scheduler/ShuffleMapTask.scala) | 44 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala (renamed from core/src/main/scala/spark/scheduler/SparkListener.scala) | 36 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala | 74 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala (renamed from core/src/main/scala/spark/scheduler/SplitInfo.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/Stage.scala (renamed from core/src/main/scala/spark/scheduler/Stage.scala) | 16 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala (renamed from core/src/main/scala/spark/scheduler/StageInfo.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/Task.scala (renamed from core/src/main/scala/spark/scheduler/Task.scala) | 12 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala | 34 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala (renamed from core/src/main/scala/spark/scheduler/TaskResult.scala) | 28 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala (renamed from core/src/main/scala/spark/scheduler/TaskScheduler.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala (renamed from core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala) | 13 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala (renamed from core/src/main/scala/spark/scheduler/TaskSet.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala | 440 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala | 712 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala (renamed from core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala (renamed from core/src/main/scala/spark/scheduler/cluster/Pool.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala (renamed from core/src/main/scala/spark/scheduler/cluster/Schedulable.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala) | 103 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala (renamed from core/src/main/scala/spark/SoftReferenceCache.scala) | 20 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala) | 17 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala | 62 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala) | 34 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala) | 19 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala | 32 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala (renamed from core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala) | 35 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala (renamed from core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala (renamed from core/src/main/scala/spark/scheduler/local/LocalScheduler.scala) | 67 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala (renamed from core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala) | 112 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala) | 37 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala (renamed from core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala) | 26 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala (renamed from core/src/main/scala/spark/JavaSerializer.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala | 159 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/serializer/Serializer.scala (renamed from core/src/main/scala/spark/serializer/Serializer.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala (renamed from core/src/main/scala/spark/serializer/SerializerManager.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockException.scala (renamed from core/src/main/scala/spark/storage/BlockException.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala (renamed from core/src/main/scala/spark/storage/BlockFetchTracker.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala (renamed from core/src/main/scala/spark/storage/BlockFetcherIterator.scala) | 23 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManager.scala (renamed from core/src/main/scala/spark/storage/BlockManager.scala) | 88 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala (renamed from core/src/main/scala/spark/storage/BlockManagerId.scala) | 8 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala (renamed from core/src/main/scala/spark/storage/BlockManagerMaster.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala (renamed from core/src/main/scala/spark/storage/BlockManagerMasterActor.scala) | 21 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala | 110 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala (renamed from core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala | 48 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala (renamed from core/src/main/scala/spark/storage/BlockManagerWorker.scala) | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockMessage.scala (renamed from core/src/main/scala/spark/storage/BlockMessage.scala) | 5 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala (renamed from core/src/main/scala/spark/storage/BlockMessageArray.scala) | 11 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala (renamed from core/src/main/scala/spark/storage/BlockObjectWriter.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/BlockStore.scala (renamed from core/src/main/scala/spark/storage/BlockStore.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/DiskStore.scala (renamed from core/src/main/scala/spark/storage/DiskStore.scala) | 19 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/MemoryStore.scala (renamed from core/src/main/scala/spark/storage/MemoryStore.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/PutResult.scala (renamed from core/src/main/scala/spark/storage/PutResult.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala (renamed from core/src/main/scala/spark/storage/ShuffleBlockManager.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/StorageLevel.scala (renamed from core/src/main/scala/spark/storage/StorageLevel.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/StorageUtils.scala (renamed from core/src/main/scala/spark/storage/StorageUtils.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala (renamed from core/src/main/scala/spark/storage/ThreadingTest.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/JettyUtils.scala (renamed from core/src/main/scala/spark/ui/JettyUtils.scala) | 15 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/Page.scala (renamed from core/src/main/scala/spark/ui/Page.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/SparkUI.scala (renamed from core/src/main/scala/spark/ui/SparkUI.scala) | 35 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/UIUtils.scala (renamed from core/src/main/scala/spark/ui/UIUtils.scala) | 98 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala (renamed from core/src/main/scala/spark/ui/UIWorkloadGenerator.scala) | 49 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala (renamed from core/src/main/scala/spark/ui/env/EnvironmentUI.scala) | 57 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala | 137 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala | 90 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala | 156 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala | 61 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala | 32 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala | 55 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala | 183 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala | 107 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala (renamed from core/src/main/scala/spark/ui/storage/BlockManagerUI.scala) | 6 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala (renamed from core/src/main/scala/spark/ui/storage/IndexPage.scala) | 38 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala (renamed from core/src/main/scala/spark/ui/storage/RDDPage.scala) | 44 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/AkkaUtils.scala (renamed from core/src/main/scala/spark/util/AkkaUtils.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala (renamed from core/src/main/scala/spark/util/BoundedPriorityQueue.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala (renamed from core/src/main/scala/spark/util/ByteBufferInputStream.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/Clock.scala | 29 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala (renamed from core/src/main/scala/spark/ClosureCleaner.scala) | 3 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/CompletionIterator.scala (renamed from core/src/main/scala/spark/util/CompletionIterator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/Distribution.scala (renamed from core/src/main/scala/spark/util/Distribution.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/IdGenerator.scala (renamed from core/src/main/scala/spark/util/IdGenerator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/IntParam.scala (renamed from core/src/main/scala/spark/util/IntParam.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/MemoryParam.scala (renamed from core/src/main/scala/spark/util/MemoryParam.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala (renamed from core/src/main/scala/spark/util/MetadataCleaner.scala) | 4 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/MutablePair.scala (renamed from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala) | 24 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/NextIterator.scala (renamed from core/src/main/scala/spark/util/NextIterator.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala (renamed from core/src/main/scala/spark/util/RateLimitedOutputStream.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala (renamed from core/src/main/scala/spark/util/SerializableBuffer.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/SizeEstimator.scala (renamed from core/src/main/scala/spark/SizeEstimator.scala) | 3 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/StatCounter.scala (renamed from core/src/main/scala/spark/util/StatCounter.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala (renamed from core/src/main/scala/spark/util/TimeStampedHashMap.scala) | 10 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala (renamed from core/src/main/scala/spark/util/TimeStampedHashSet.scala) | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/Utils.scala (renamed from core/src/main/scala/spark/Utils.scala) | 110 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/Vector.scala (renamed from core/src/main/scala/spark/util/Vector.scala) | 9 | ||||
-rw-r--r-- | core/src/main/scala/spark/Cache.scala | 80 | ||||
-rw-r--r-- | core/src/main/scala/spark/KryoSerializer.scala | 241 | ||||
-rw-r--r-- | core/src/main/scala/spark/deploy/DeployMessage.scala | 125 | ||||
-rw-r--r-- | core/src/main/scala/spark/package.scala | 32 | ||||
-rw-r--r-- | core/src/main/scala/spark/rdd/CoalescedRDD.scala | 81 | ||||
-rw-r--r-- | core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala | 631 | ||||
-rw-r--r-- | core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala | 765 | ||||
-rw-r--r-- | core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala | 62 | ||||
-rw-r--r-- | core/src/main/scala/spark/storage/BlockManagerMessages.scala | 123 | ||||
-rw-r--r-- | core/src/main/scala/spark/ui/jobs/IndexPage.scala | 129 | ||||
-rw-r--r-- | core/src/main/scala/spark/ui/jobs/JobProgressUI.scala | 144 | ||||
-rw-r--r-- | core/src/main/scala/spark/ui/jobs/StagePage.scala | 131 | ||||
-rw-r--r-- | core/src/test/resources/test_metrics_config.properties | 6 | ||||
-rw-r--r-- | core/src/test/resources/test_metrics_system.properties | 7 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/AccumulatorSuite.scala (renamed from core/src/test/scala/spark/AccumulatorSuite.scala) | 4 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/BroadcastSuite.scala (renamed from core/src/test/scala/spark/BroadcastSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/CheckpointSuite.scala (renamed from core/src/test/scala/spark/CheckpointSuite.scala) | 9 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/DistributedSuite.scala (renamed from core/src/test/scala/spark/DistributedSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/DriverSuite.scala (renamed from core/src/test/scala/spark/DriverSuite.scala) | 5 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/FailureSuite.scala (renamed from core/src/test/scala/spark/FailureSuite.scala) | 42 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/FileServerSuite.scala (renamed from core/src/test/scala/spark/FileServerSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/FileSuite.scala (renamed from core/src/test/scala/spark/FileSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/JavaAPISuite.java (renamed from core/src/test/scala/spark/JavaAPISuite.java) | 54 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/LocalSparkContext.scala (renamed from core/src/test/scala/spark/LocalSparkContext.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala (renamed from core/src/test/scala/spark/MapOutputTrackerSuite.scala) | 20 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala | 28 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/PartitioningSuite.scala (renamed from core/src/test/scala/spark/PartitioningSuite.scala) | 13 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/PipedRDDSuite.scala (renamed from core/src/test/scala/spark/PipedRDDSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/SharedSparkContext.scala (renamed from core/src/test/scala/spark/SharedSparkContext.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala (renamed from core/src/test/scala/spark/ShuffleNettySuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ShuffleSuite.scala (renamed from core/src/test/scala/spark/ShuffleSuite.scala) | 105 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala | 60 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ThreadingSuite.scala (renamed from core/src/test/scala/spark/ThreadingSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/UnpersistSuite.scala (renamed from core/src/test/scala/spark/UnpersistSuite.scala) | 4 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala (renamed from core/src/test/scala/spark/ZippedPartitionsSuite.scala) | 4 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala | 62 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala | 89 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala | 54 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala (renamed from core/src/test/scala/spark/rdd/JdbcRDDSuite.scala) | 6 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala (renamed from core/src/test/scala/spark/PairRDDFunctionsSuite.scala) | 10 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala (renamed from core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala (renamed from core/src/test/scala/spark/RDDSuite.scala) | 71 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala (renamed from core/src/test/scala/spark/SortingSuite.scala) | 6 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala) | 57 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala (renamed from core/src/test/scala/spark/scheduler/JobLoggerSuite.scala) | 14 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala (renamed from core/src/test/scala/spark/scheduler/SparkListenerSuite.scala) | 6 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala (renamed from core/src/test/scala/spark/scheduler/TaskContextSuite.scala) | 12 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala) | 41 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala | 273 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala | 26 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala (renamed from core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala) | 56 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala (renamed from core/src/test/scala/spark/KryoSerializerSuite.scala) | 71 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala (renamed from core/src/test/scala/spark/storage/BlockManagerSuite.scala) | 29 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ui/UISuite.scala (renamed from core/src/test/scala/spark/ui/UISuite.scala) | 9 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala (renamed from core/src/test/scala/spark/ClosureCleanerSuite.scala) | 7 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/DistributionSuite.scala (renamed from core/src/test/scala/spark/util/DistributionSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/FakeClock.scala | 26 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala (renamed from core/src/test/scala/spark/util/NextIteratorSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala (renamed from core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala (renamed from core/src/test/scala/spark/SizeEstimatorSuite.scala) | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/UtilsSuite.scala (renamed from core/src/test/scala/spark/UtilsSuite.scala) | 18 | ||||
-rw-r--r-- | docs/README.md | 2 | ||||
-rwxr-xr-x | docs/_layouts/global.html | 34 | ||||
-rw-r--r-- | docs/_plugins/copy_api_dirs.rb | 4 | ||||
-rw-r--r-- | docs/bagel-programming-guide.md | 37 | ||||
-rw-r--r-- | docs/building-with-maven.md | 58 | ||||
-rw-r--r-- | docs/configuration.md | 152 | ||||
-rw-r--r-- | docs/css/bootstrap.min.css | 2 | ||||
-rw-r--r-- | docs/ec2-scripts.md | 32 | ||||
-rw-r--r-- | docs/hardware-provisioning.md | 69 | ||||
-rw-r--r-- | docs/img/incubator-logo.png | bin | 0 -> 11651 bytes | |||
-rw-r--r-- | docs/index.md | 77 | ||||
-rw-r--r-- | docs/java-programming-guide.md | 33 | ||||
-rw-r--r-- | docs/mllib-guide.md | 6 | ||||
-rw-r--r-- | docs/python-programming-guide.md | 47 | ||||
-rw-r--r-- | docs/quick-start.md | 79 | ||||
-rw-r--r-- | docs/running-on-mesos.md | 3 | ||||
-rw-r--r-- | docs/running-on-yarn.md | 55 | ||||
-rw-r--r-- | docs/scala-programming-guide.md | 36 | ||||
-rw-r--r-- | docs/spark-debugger.md | 4 | ||||
-rw-r--r-- | docs/spark-simple-tutorial.md | 41 | ||||
-rw-r--r-- | docs/spark-standalone.md | 58 | ||||
-rw-r--r-- | docs/streaming-custom-receivers.md | 61 | ||||
-rw-r--r-- | docs/streaming-programming-guide.md | 45 | ||||
-rw-r--r-- | docs/tuning.md | 13 | ||||
-rw-r--r-- | ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh | 9 | ||||
-rw-r--r-- | ec2/deploy.generic/root/spark-ec2/ec2-variables.sh | 18 | ||||
-rwxr-xr-x | ec2/spark_ec2.py | 277 | ||||
-rw-r--r-- | examples/pom.xml | 293 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java (renamed from examples/src/main/java/spark/examples/JavaHdfsLR.java) | 10 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaKMeans.java (renamed from examples/src/main/java/spark/examples/JavaKMeans.java) | 14 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java (renamed from examples/src/main/java/spark/examples/JavaLogQuery.java) | 12 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaPageRank.java | 115 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java (renamed from examples/src/main/java/spark/examples/JavaSparkPi.java) | 10 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaTC.java (renamed from examples/src/main/java/spark/examples/JavaTC.java) | 8 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/JavaWordCount.java (renamed from examples/src/main/java/spark/examples/JavaWordCount.java) | 14 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java | 87 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java | 81 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java | 85 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java) | 10 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java) | 16 | ||||
-rw-r--r-- | examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java (renamed from examples/src/main/java/spark/streaming/examples/JavaQueueStream.java) | 16 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala (renamed from examples/src/main/scala/spark/examples/BroadcastTest.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala (renamed from examples/src/main/scala/spark/examples/CassandraTest.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala (renamed from examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala (renamed from examples/src/main/scala/spark/examples/GroupByTest.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala (renamed from examples/src/main/scala/spark/examples/HBaseTest.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala (renamed from examples/src/main/scala/spark/examples/HdfsTest.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LocalALS.scala (renamed from examples/src/main/scala/spark/examples/LocalALS.scala) | 2 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala (renamed from examples/src/main/scala/spark/examples/LocalFileLR.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala (renamed from examples/src/main/scala/spark/examples/LocalKMeans.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LocalLR.scala (renamed from examples/src/main/scala/spark/examples/LocalLR.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LocalPi.scala (renamed from examples/src/main/scala/spark/examples/LocalPi.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/LogQuery.scala (renamed from examples/src/main/scala/spark/examples/LogQuery.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala (renamed from examples/src/main/scala/spark/examples/MultiBroadcastTest.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SkewedGroupByTest.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkALS.scala (renamed from examples/src/main/scala/spark/examples/SparkALS.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala (renamed from examples/src/main/scala/spark/examples/SparkHdfsLR.scala) | 11 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala (renamed from examples/src/main/scala/spark/examples/SparkKMeans.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkLR.scala (renamed from examples/src/main/scala/spark/examples/SparkLR.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 46 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkPi.scala (renamed from examples/src/main/scala/spark/examples/SparkPi.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/SparkTC.scala (renamed from examples/src/main/scala/spark/examples/SparkTC.scala) | 4 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala (renamed from bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala) | 11 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala (renamed from bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala) | 12 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala (renamed from bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala) | 21 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala) | 18 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala) | 14 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/QueueStream.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala (renamed from examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala) | 12 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala) | 10 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala) | 8 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala) | 10 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala) | 6 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala) | 14 | ||||
-rwxr-xr-x | make-distribution.sh | 70 | ||||
-rw-r--r-- | mllib/pom.xml | 75 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala | 21 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala | 189 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala | 188 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/KMeans.scala) | 30 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala) | 8 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala (renamed from mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala) | 2 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala | 98 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala | 168 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala | 29 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala | 99 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala (renamed from mllib/src/main/scala/spark/mllib/recommendation/ALS.scala) | 72 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala (renamed from mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala) | 15 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala | 160 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala | 26 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala | 211 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala | 168 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala (renamed from mllib/src/main/scala/spark/mllib/regression/Regression.scala) | 6 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala | 214 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala (renamed from core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala) | 41 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala | 85 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala | 133 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala (renamed from mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala) | 56 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala | 114 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala (renamed from mllib/src/main/scala/spark/mllib/util/MLUtils.scala) | 51 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala | 51 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/optimization/Gradient.scala | 50 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala | 79 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/optimization/Updater.scala | 44 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala | 175 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala | 211 | ||||
-rw-r--r-- | mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala | 72 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java | 98 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java | 98 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java | 115 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java | 110 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java | 97 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java | 94 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java | 110 | ||||
-rw-r--r-- | mllib/src/test/resources/log4j.properties | 2 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala | 150 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala | 169 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala (renamed from mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala) | 13 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala (renamed from mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala) | 66 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala | 121 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala | 72 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala | 90 | ||||
-rw-r--r-- | mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala | 74 | ||||
-rw-r--r-- | mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala | 64 | ||||
-rw-r--r-- | pagerank_data.txt | 6 | ||||
-rw-r--r-- | pom.xml | 316 | ||||
-rw-r--r-- | project/SparkBuild.scala | 176 | ||||
-rw-r--r-- | project/build.properties | 2 | ||||
-rw-r--r-- | project/plugins.sbt | 6 | ||||
-rwxr-xr-x | pyspark | 25 | ||||
-rwxr-xr-x | python/examples/als.py | 5 | ||||
-rwxr-xr-x[-rw-r--r--] | python/examples/kmeans.py | 3 | ||||
-rwxr-xr-x | python/examples/logistic_regression.py | 54 | ||||
-rwxr-xr-x | python/examples/pagerank.py | 70 | ||||
-rwxr-xr-x[-rw-r--r--] | python/examples/pi.py | 3 | ||||
-rwxr-xr-x[-rw-r--r--] | python/examples/transitive_closure.py | 5 | ||||
-rwxr-xr-x[-rw-r--r--] | python/examples/wordcount.py | 5 | ||||
-rw-r--r-- | python/pyspark/context.py | 29 | ||||
-rw-r--r-- | python/pyspark/files.py | 2 | ||||
-rw-r--r-- | python/pyspark/java_gateway.py | 12 | ||||
-rw-r--r-- | python/pyspark/rdd.py | 191 | ||||
-rw-r--r-- | python/pyspark/rddsampler.py | 112 | ||||
-rw-r--r-- | python/pyspark/shell.py | 20 | ||||
-rw-r--r-- | python/pyspark/statcounter.py | 109 | ||||
-rw-r--r-- | python/pyspark/tests.py | 24 | ||||
-rw-r--r-- | python/pyspark/worker.py | 13 | ||||
-rwxr-xr-x | python/run-tests | 25 | ||||
-rw-r--r-- | python/test_support/userlib-0.1-py2.7.egg | bin | 0 -> 1945 bytes | |||
-rw-r--r-- | repl-bin/pom.xml | 168 | ||||
-rwxr-xr-x | repl-bin/src/deb/bin/run | 2 | ||||
-rwxr-xr-x | repl-bin/src/deb/bin/spark-executor | 2 | ||||
-rwxr-xr-x | repl-bin/src/deb/bin/spark-shell | 2 | ||||
-rw-r--r-- | repl/pom.xml | 223 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala (renamed from repl/src/main/scala/spark/repl/ExecutorClassLoader.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/Main.scala (renamed from repl/src/main/scala/spark/repl/Main.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala (renamed from repl/src/main/scala/spark/repl/SparkHelper.scala) | 0 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala (renamed from repl/src/main/scala/spark/repl/SparkILoop.scala) | 22 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala (renamed from repl/src/main/scala/spark/repl/SparkIMain.scala) | 10 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala (renamed from repl/src/main/scala/spark/repl/SparkISettings.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkImports.scala (renamed from repl/src/main/scala/spark/repl/SparkImports.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala (renamed from repl/src/main/scala/spark/repl/SparkJLineCompletion.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala (renamed from repl/src/main/scala/spark/repl/SparkJLineReader.scala) | 2 | ||||
-rw-r--r-- | repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala (renamed from repl/src/main/scala/spark/repl/SparkMemberHandlers.scala) | 2 | ||||
-rw-r--r-- | repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala (renamed from repl/src/test/scala/spark/repl/ReplSuite.scala) | 6 | ||||
-rwxr-xr-x | run-example | 81 | ||||
-rwxr-xr-x | sbt/sbt | 6 | ||||
-rwxr-xr-x | spark-class (renamed from run) | 80 | ||||
-rwxr-xr-x | spark-executor | 2 | ||||
-rwxr-xr-x | spark-shell | 7 | ||||
-rw-r--r-- | spark-shell.cmd | 2 | ||||
-rw-r--r-- | streaming/pom.xml | 110 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala (renamed from streaming/src/main/scala/spark/streaming/Checkpoint.scala) | 28 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/DStream.scala (renamed from streaming/src/main/scala/spark/streaming/DStream.scala) | 17 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala (renamed from streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala) | 4 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala (renamed from streaming/src/main/scala/spark/streaming/DStreamGraph.scala) | 4 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Duration.scala (renamed from streaming/src/main/scala/spark/streaming/Duration.scala) | 10 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Interval.scala (renamed from streaming/src/main/scala/spark/streaming/Interval.scala) | 2 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Job.scala (renamed from streaming/src/main/scala/spark/streaming/Job.scala) | 2 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala (renamed from streaming/src/main/scala/spark/streaming/JobManager.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala (renamed from streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala) | 12 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala (renamed from streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala) | 29 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala (renamed from streaming/src/main/scala/spark/streaming/Scheduler.scala) | 9 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala (renamed from streaming/src/main/scala/spark/streaming/StreamingContext.scala) | 21 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/Time.scala (renamed from streaming/src/main/scala/spark/streaming/Time.scala) | 2 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala) | 16 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala) | 10 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala) | 30 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala (renamed from streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala) | 34 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala) | 9 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala) | 17 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala) | 4 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala) | 29 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala) | 4 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala) | 16 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala) | 9 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala) | 12 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala (renamed from streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala) | 10 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala (renamed from streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala) | 8 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala (renamed from streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala) | 4 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala (renamed from streaming/src/main/scala/spark/streaming/util/Clock.scala) | 2 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala (renamed from streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala) | 9 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala (renamed from streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala) | 6 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala (renamed from streaming/src/main/scala/spark/streaming/util/RawTextSender.scala) | 7 | ||||
-rw-r--r-- | streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala (renamed from streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala) | 2 | ||||
-rw-r--r-- | streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java (renamed from streaming/src/test/java/spark/streaming/JavaAPISuite.java) | 32 | ||||
-rw-r--r-- | streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala (renamed from streaming/src/test/java/spark/streaming/JavaTestUtils.scala) | 17 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala) | 6 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala (renamed from streaming/src/test/scala/spark/streaming/CheckpointSuite.scala) | 8 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala (renamed from streaming/src/test/scala/spark/streaming/FailureSuite.scala) | 6 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala) | 12 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala (renamed from streaming/src/test/scala/spark/streaming/TestSuiteBase.scala) | 11 | ||||
-rw-r--r-- | streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala (renamed from streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala) | 6 | ||||
-rw-r--r-- | tools/pom.xml | 69 | ||||
-rw-r--r-- | tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala | 360 | ||||
-rw-r--r-- | yarn/pom.xml | 111 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala) | 32 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala) | 4 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala) | 21 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala) | 8 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala) | 15 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala) | 8 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala (renamed from core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala) | 42 | ||||
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala (renamed from core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala) | 13 |
616 files changed, 16182 insertions, 8290 deletions
diff --git a/.gitignore b/.gitignore index ae39c52b11..e1f64a1133 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ dependency-reduced-pom.xml .ensime_lucene checkpoint derby.log +dist/ +spark-*-bin.tar.gz +unit-tests.log @@ -1,12 +1,12 @@ -# Spark +# Apache Spark -Lightning-Fast Cluster Computing - <http://www.spark-project.org/> +Lightning-Fast Cluster Computing - <http://spark.incubator.apache.org/> ## Online Documentation You can find the latest Spark documentation, including a programming -guide, on the project webpage at <http://spark-project.org/documentation.html>. +guide, on the project webpage at <http://spark.incubator.apache.org/documentation.html>. This README file only contains basic setup instructions. @@ -16,26 +16,24 @@ Spark requires Scala 2.9.3 (Scala 2.10 is not yet supported). The project is built using Simple Build Tool (SBT), which is packaged with it. To build Spark and its example programs, run: - sbt/sbt package + sbt/sbt assembly -Spark also supports building using Maven. If you would like to build using Maven, -see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html) -in the spark documentation.. +Once you've built Spark, the easiest way to start using it is the shell: -To run Spark, you will need to have Scala's bin directory in your `PATH`, or -you will need to set the `SCALA_HOME` environment variable to point to where -you've installed Scala. Scala must be accessible through one of these -methods on your cluster's worker nodes as well as its master. + ./spark-shell -To run one of the examples, use `./run <class> <params>`. For example: +Or, for the Python API, the Python shell (`./pyspark`). - ./run spark.examples.SparkLR local[2] +Spark also comes with several sample programs in the `examples` directory. +To run one of them, use `./run-example <class> <params>`. For example: + + ./run-example org.apache.spark.examples.SparkLR local[2] will run the Logistic Regression example locally on 2 CPUs. Each of the example programs prints usage help if no params are given. -All of the Spark samples take a `<host>` parameter that is the cluster URL +All of the Spark samples take a `<master>` parameter that is the cluster URL to connect to. This can be a mesos:// or spark:// URL, or "local" to run locally with one thread, or "local[N]" to run locally with N threads. @@ -43,23 +41,52 @@ locally with one thread, or "local[N]" to run locally with N threads. ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported -storage systems. Because the HDFS API has changed in different versions of +storage systems. Because the protocols have changed in different versions of Hadoop, you must build Spark against the same version that your cluster runs. -You can change the version by setting the `HADOOP_VERSION` variable at the top -of `project/SparkBuild.scala`, then rebuilding Spark. +You can change the version by setting the `SPARK_HADOOP_VERSION` environment +when building Spark. +For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop +versions without YARN, use: -## Configuration + # Apache Hadoop 1.2.1 + $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly + + # Cloudera CDH 4.2.0 with MapReduce v1 + $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly + +For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions +with YARN, also set `SPARK_YARN=true`: + + # Apache Hadoop 2.0.5-alpha + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly + + # Cloudera CDH 4.2.0 with MapReduce v2 + $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly -Please refer to the "Configuration" guide in the online documentation for a -full overview on how to configure Spark. At the minimum, you will need to -create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and -set the following two variables: +For convenience, these variables may also be set through the `conf/spark-env.sh` file +described below. -- `SCALA_HOME`: Location where Scala is installed. +When developing a Spark application, specify the Hadoop version by adding the +"hadoop-client" artifact to your project's dependencies. For example, if you're +using Hadoop 1.0.1 and build your application using SBT, add this entry to +`libraryDependencies`: + + "org.apache.hadoop" % "hadoop-client" % "1.2.1" + +If your project is built with Maven, add this to your POM file's `<dependencies>` section: + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>1.2.1</version> + </dependency> + + +## Configuration -- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run - on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux. +Please refer to the [Configuration guide](http://spark.incubator.apache.org/docs/latest/configuration.html) +in the online documentation for an overview on how to configure Spark. ## Contributing to Spark diff --git a/assembly/README b/assembly/README new file mode 100644 index 0000000000..6ee2a536d7 --- /dev/null +++ b/assembly/README @@ -0,0 +1,13 @@ +This is an assembly module for Spark project. + +It creates a single tar.gz file that includes all needed dependency of the project +except for org.apache.hadoop.* jars that are supposed to be available from the +deployed Hadoop cluster. + +This module is off by default to avoid spending extra time on top of repl-bin +module. To activate it specify the profile in the command line + -Passembly + +In case you want to avoid building time-expensive repl-bin module, that shaders +all the dependency into a big flat jar supplement maven command with + -DnoExpensive diff --git a/assembly/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt new file mode 100644 index 0000000000..a70279ca14 --- /dev/null +++ b/assembly/lib/PY4J_LICENSE.txt @@ -0,0 +1,27 @@ + +Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +- The name of the author may not be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/assembly/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt new file mode 100644 index 0000000000..04a0cd52a8 --- /dev/null +++ b/assembly/lib/PY4J_VERSION.txt @@ -0,0 +1 @@ +b7924aabe9c5e63f0a4d8bbd17019534c7ec014e diff --git a/python/lib/py4j0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar Binary files differindex 73b7ddb7d1..73b7ddb7d1 100644 --- a/python/lib/py4j0.7.jar +++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom new file mode 100644 index 0000000000..1c730e19b4 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <version>0.7</version> + <description>POM was created from install:install-file</description> +</project> diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml new file mode 100644 index 0000000000..6942ff45e7 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8"?> +<metadata> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <versioning> + <release>0.7</release> + <versions> + <version>0.7</version> + </versions> + <lastUpdated>20130828020333</lastUpdated> + </versioning> +</metadata> diff --git a/assembly/pom.xml b/assembly/pom.xml new file mode 100644 index 0000000000..d19f44d292 --- /dev/null +++ b/assembly/pom.xml @@ -0,0 +1,159 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent</artifactId> + <version>0.8.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.spark</groupId> + <artifactId>spark-assembly</artifactId> + <name>Spark Project Assembly</name> + <url>http://spark.incubator.apache.org/</url> + + <repositories> + <!-- A repository in the local filesystem for the Py4J JAR, which is not in Maven central --> + <repository> + <id>lib</id> + <url>file://${project.basedir}/lib</url> + </repository> + </repositories> + + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-bagel</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-repl</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-streaming</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <version>0.7</version> + </dependency> + </dependencies> + + <build> + <plugins> + <!-- Use the shade plugin to create a big JAR with all the dependencies --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile> + <artifactSet> + <includes> + <include>*:*</include> + </includes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>hadoop2-yarn</id> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + <profile> + <id>bigtop-dist</id> + <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop + that contains Spark but not the Hadoop JARs it depends on. --> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <version>2.4</version> + <executions> + <execution> + <id>dist</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <descriptors> + <descriptor>src/main/assembly/assembly.xml</descriptor> + </descriptors> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project> diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml new file mode 100644 index 0000000000..47d3fa93d0 --- /dev/null +++ b/assembly/src/main/assembly/assembly.xml @@ -0,0 +1,85 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> +<assembly> + <id>dist</id> + <formats> + <format>tar.gz</format> + <format>dir</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + + <fileSets> + <fileSet> + <includes> + <include>README</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ + </directory> + <outputDirectory>/ui-resources/org/apache/spark/ui/static</outputDirectory> + <includes> + <include>**/*</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir}/bin/ + </directory> + <outputDirectory>/bin</outputDirectory> + <includes> + <include>**/*</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir} + </directory> + <outputDirectory>/bin</outputDirectory> + <includes> + <include>run-example*</include> + <include>spark-class*</include> + <include>spark-shell*</include> + <include>spark-executor*</include> + </includes> + </fileSet> + </fileSets> + + <dependencySets> + <dependencySet> + <includes> + <include>org.apache.spark:*:jar</include> + </includes> + <excludes> + <exclude>org.apache.spark:spark-assembly:jar</exclude> + </excludes> + </dependencySet> + <dependencySet> + <outputDirectory>lib</outputDirectory> + <useTransitiveDependencies>true</useTransitiveDependencies> + <unpack>false</unpack> + <scope>runtime</scope> + <useProjectArtifact>false</useProjectArtifact> + <excludes> + <exclude>org.apache.hadoop:*:jar</exclude> + <exclude>org.apache.spark:*:jar</exclude> + </excludes> + </dependencySet> + </dependencySets> + +</assembly> diff --git a/bagel/pom.xml b/bagel/pom.xml index 60bbc49e6c..51173c32b2 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -19,24 +19,28 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-bagel</artifactId> <packaging>jar</packaging> <name>Spark Project Bagel</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <dependencies> <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> </dependency> - <dependency> <groupId>org.scalatest</groupId> <artifactId>scalatest_${scala.version}</artifactId> @@ -58,103 +62,4 @@ </plugin> </plugins> </build> - - <profiles> - <profile> - <id>hadoop1</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop1</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2-yarn</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - </profiles> </project> diff --git a/bagel/src/main/scala/spark/bagel/Bagel.scala b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala index 80c8d53d2b..44e26bbb9e 100644 --- a/bagel/src/main/scala/spark/bagel/Bagel.scala +++ b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala @@ -15,32 +15,31 @@ * limitations under the License. */ -package spark.bagel +package org.apache.spark.bagel -import spark._ -import spark.SparkContext._ - -import scala.collection.mutable.ArrayBuffer -import storage.StorageLevel +import org.apache.spark._ +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel object Bagel extends Logging { val DEFAULT_STORAGE_LEVEL = StorageLevel.MEMORY_AND_DISK /** * Runs a Bagel program. - * @param sc [[spark.SparkContext]] to use for the program. + * @param sc [[org.apache.spark.SparkContext]] to use for the program. * @param vertices vertices of the graph represented as an RDD of (Key, Vertex) pairs. Often the Key will be * the vertex id. * @param messages initial set of messages represented as an RDD of (Key, Message) pairs. Often this will be an * empty array, i.e. sc.parallelize(Array[K, Message]()). - * @param combiner [[spark.bagel.Combiner]] combines multiple individual messages to a given vertex into one + * @param combiner [[org.apache.spark.bagel.Combiner]] combines multiple individual messages to a given vertex into one * message before sending (which often involves network I/O). - * @param aggregator [[spark.bagel.Aggregator]] performs a reduce across all vertices after each superstep, + * @param aggregator [[org.apache.spark.bagel.Aggregator]] performs a reduce across all vertices after each superstep, * and provides the result to each vertex in the next superstep. - * @param partitioner [[spark.Partitioner]] partitions values by key + * @param partitioner [[org.apache.spark.Partitioner]] partitions values by key * @param numPartitions number of partitions across which to split the graph. * Default is the default parallelism of the SparkContext - * @param storageLevel [[spark.storage.StorageLevel]] to use for caching of intermediate RDDs in each superstep. + * @param storageLevel [[org.apache.spark.storage.StorageLevel]] to use for caching of intermediate RDDs in each superstep. * Defaults to caching in memory. * @param compute function that takes a Vertex, optional set of (possibly combined) messages to the Vertex, * optional Aggregator and the current superstep, @@ -98,7 +97,7 @@ object Bagel extends Logging { verts } - /** Runs a Bagel program with no [[spark.bagel.Aggregator]] and the default storage level */ + /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default storage level */ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest]( sc: SparkContext, vertices: RDD[(K, V)], @@ -110,7 +109,7 @@ object Bagel extends Logging { compute: (V, Option[C], Int) => (V, Array[M]) ): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute) - /** Runs a Bagel program with no [[spark.bagel.Aggregator]] */ + /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] */ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest]( sc: SparkContext, vertices: RDD[(K, V)], @@ -128,7 +127,7 @@ object Bagel extends Logging { } /** - * Runs a Bagel program with no [[spark.bagel.Aggregator]], default [[spark.HashPartitioner]] + * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]] * and default storage level */ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest]( @@ -141,7 +140,7 @@ object Bagel extends Logging { compute: (V, Option[C], Int) => (V, Array[M]) ): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute) - /** Runs a Bagel program with no [[spark.bagel.Aggregator]] and the default [[spark.HashPartitioner]]*/ + /** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default [[org.apache.spark.HashPartitioner]]*/ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest]( sc: SparkContext, vertices: RDD[(K, V)], @@ -159,8 +158,8 @@ object Bagel extends Logging { } /** - * Runs a Bagel program with no [[spark.bagel.Aggregator]], default [[spark.HashPartitioner]], - * [[spark.bagel.DefaultCombiner]] and the default storage level + * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]], + * [[org.apache.spark.bagel.DefaultCombiner]] and the default storage level */ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest]( sc: SparkContext, @@ -172,8 +171,8 @@ object Bagel extends Logging { ): RDD[(K, V)] = run(sc, vertices, messages, numPartitions, DEFAULT_STORAGE_LEVEL)(compute) /** - * Runs a Bagel program with no [[spark.bagel.Aggregator]], the default [[spark.HashPartitioner]] - * and [[spark.bagel.DefaultCombiner]] + * Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], the default [[org.apache.spark.HashPartitioner]] + * and [[org.apache.spark.bagel.DefaultCombiner]] */ def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest]( sc: SparkContext, diff --git a/bagel/src/test/scala/bagel/BagelSuite.scala b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala index ef2d57fbd0..7b954a4775 100644 --- a/bagel/src/test/scala/bagel/BagelSuite.scala +++ b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala @@ -15,16 +15,14 @@ * limitations under the License. */ -package spark.bagel +package org.apache.spark.bagel -import org.scalatest.{FunSuite, Assertions, BeforeAndAfter} +import org.scalatest.{BeforeAndAfter, FunSuite, Assertions} import org.scalatest.concurrent.Timeouts import org.scalatest.time.SpanSugar._ -import scala.collection.mutable.ArrayBuffer - -import spark._ -import storage.StorageLevel +import org.apache.spark._ +import org.apache.spark.storage.StorageLevel class TestVertex(val active: Boolean, val age: Int) extends Vertex with Serializable class TestMessage(val targetId: String) extends Message[String] with Serializable diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index 835d1af794..9178b852e6 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -33,6 +33,8 @@ set REPL_DIR=%FWDIR%repl set EXAMPLES_DIR=%FWDIR%examples set BAGEL_DIR=%FWDIR%bagel set MLLIB_DIR=%FWDIR%mllib +set TOOLS_DIR=%FWDIR%tools +set YARN_DIR=%FWDIR%yarn set STREAMING_DIR=%FWDIR%streaming set PYSPARK_DIR=%FWDIR%python @@ -48,6 +50,8 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\* set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\* set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes +set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes +set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes rem Add hadoop conf dir - else FileSystem.*, etc fail rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 4c539649a2..c7819d4932 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,85 +30,25 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then . $FWDIR/conf/spark-env.sh fi -CORE_DIR="$FWDIR/core" -REPL_DIR="$FWDIR/repl" -REPL_BIN_DIR="$FWDIR/repl-bin" -EXAMPLES_DIR="$FWDIR/examples" -BAGEL_DIR="$FWDIR/bagel" -MLLIB_DIR="$FWDIR/mllib" -STREAMING_DIR="$FWDIR/streaming" -PYSPARK_DIR="$FWDIR/python" - # Build up classpath -CLASSPATH="$SPARK_CLASSPATH" - -function dev_classpath { - CLASSPATH="$CLASSPATH:$FWDIR/conf" - CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes" - if [ -n "$SPARK_TESTING" ] ; then - CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes" - CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes" - fi - CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources" - CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes" - CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes" - CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes" - CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar - if [ -e "$FWDIR/lib_managed" ]; then - CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*" - CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*" - fi - CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*" - # Add the shaded JAR for Maven builds - if [ -e $REPL_BIN_DIR/target ]; then - for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do - CLASSPATH="$CLASSPATH:$jar" - done - # The shaded JAR doesn't contain examples, so include those separately - EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar` - CLASSPATH+=":$EXAMPLES_JAR" - fi - CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes" - CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes" - for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do - CLASSPATH="$CLASSPATH:$jar" - done - - # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack - # to avoid the -sources and -doc packages that are built by publish-local. - if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then - # Use the JAR from the SBT build - export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar` - fi - if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then - # Use the JAR from the Maven build - export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar` - fi - - # Add Scala standard library - if [ -z "$SCALA_LIBRARY_PATH" ]; then - if [ -z "$SCALA_HOME" ]; then - echo "SCALA_HOME is not set" >&2 - exit 1 - fi - SCALA_LIBRARY_PATH="$SCALA_HOME/lib" - fi - CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar" - CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar" - CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar" -} - -function release_classpath { - CLASSPATH="$CLASSPATH:$FWDIR/jars/*" -} - +CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf" if [ -f "$FWDIR/RELEASE" ]; then - release_classpath + ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar` else - dev_classpath + ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar` +fi +CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" + +# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1 +if [[ $SPARK_TESTING == 1 ]]; then + CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes" + CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes" + CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes" + CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes" + CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes" fi -# Add hadoop conf dir - else FileSystem.*, etc fail ! +# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail ! # Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts # the configurtion files. if [ "x" != "x$HADOOP_CONF_DIR" ]; then diff --git a/bin/slaves.sh b/bin/slaves.sh index c8fb5ca473..752565b759 100755 --- a/bin/slaves.sh +++ b/bin/slaves.sh @@ -42,7 +42,7 @@ bin=`cd "$bin"; pwd` . "$bin/spark-config.sh" # If the slaves file is specified in the command line, -# then it takes precedence over the definition in +# then it takes precedence over the definition in # spark-env.sh. Save it here. HOSTLIST=$SPARK_SLAVES @@ -58,8 +58,6 @@ if [ "$HOSTLIST" = "" ]; then fi fi -echo $"${@// /\\ }" - # By default disable strict host key checking if [ "$SPARK_SSH_OPTS" = "" ]; then SPARK_SSH_OPTS="-o StrictHostKeyChecking=no" diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh index a5b88ca785..5bfe967fbf 100755 --- a/bin/spark-daemon.sh +++ b/bin/spark-daemon.sh @@ -75,6 +75,9 @@ if [ "$SPARK_IDENT_STRING" = "" ]; then export SPARK_IDENT_STRING="$USER" fi + +export SPARK_PRINT_LAUNCH_COMMAND="1" + # get log directory if [ "$SPARK_LOG_DIR" = "" ]; then export SPARK_LOG_DIR="$SPARK_HOME/logs" @@ -85,7 +88,7 @@ TEST_LOG_DIR=$? if [ "${TEST_LOG_DIR}" = "0" ]; then rm -f $SPARK_LOG_DIR/.spark_test else - chown $SPARK_IDENT_STRING $SPARK_LOG_DIR + chown $SPARK_IDENT_STRING $SPARK_LOG_DIR fi if [ "$SPARK_PID_DIR" = "" ]; then @@ -107,7 +110,7 @@ fi case $startStop in (start) - + mkdir -p "$SPARK_PID_DIR" if [ -f $pid ]; then @@ -122,14 +125,21 @@ case $startStop in rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME" fi - spark_rotate_log $log + spark_rotate_log "$log" echo starting $command, logging to $log cd "$SPARK_PREFIX" - nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null & - echo $! > $pid - sleep 1; head "$log" + nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/spark-class $command "$@" >> "$log" 2>&1 < /dev/null & + newpid=$! + echo $newpid > $pid + sleep 2 + # Check if the process has died; in that case we'll tail the log so the user can see + if ! kill -0 $newpid >/dev/null 2>&1; then + echo "failed to launch $command:" + tail -2 "$log" | sed 's/^/ /' + echo "full log in $log" + fi ;; - + (stop) if [ -f $pid ]; then diff --git a/bin/start-master.sh b/bin/start-master.sh index 2288fb19d7..648c7ae75f 100755 --- a/bin/start-master.sh +++ b/bin/start-master.sh @@ -49,4 +49,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then fi fi -"$bin"/spark-daemon.sh start spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT +"$bin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT diff --git a/bin/start-slave.sh b/bin/start-slave.sh index d6db16882d..4eefa20944 100755 --- a/bin/start-slave.sh +++ b/bin/start-slave.sh @@ -32,4 +32,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then fi fi -"$bin"/spark-daemon.sh start spark.deploy.worker.Worker "$@" +"$bin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@" diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh index dad7c3df76..00dc4888b2 100755 --- a/bin/start-slaves.sh +++ b/bin/start-slaves.sh @@ -35,8 +35,6 @@ if [ "$SPARK_MASTER_IP" = "" ]; then SPARK_MASTER_IP=`hostname` fi -echo "Master IP: $SPARK_MASTER_IP" - # Launch the slaves if [ "$SPARK_WORKER_INSTANCES" = "" ]; then exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT diff --git a/bin/stop-all.sh b/bin/stop-all.sh index a043ac0095..b6c83a7ba4 100755 --- a/bin/stop-all.sh +++ b/bin/stop-all.sh @@ -20,6 +20,7 @@ # Start all spark daemons. # Run this on the master nde + bin=`dirname "$0"` bin=`cd "$bin"; pwd` diff --git a/bin/stop-master.sh b/bin/stop-master.sh index 31a610bf9d..310e33bedc 100755 --- a/bin/stop-master.sh +++ b/bin/stop-master.sh @@ -24,4 +24,4 @@ bin=`cd "$bin"; pwd` . "$bin/spark-config.sh" -"$bin"/spark-daemon.sh stop spark.deploy.master.Master 1 +"$bin"/spark-daemon.sh stop org.apache.spark.deploy.master.Master 1 diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh index 8e056f23d4..03e416a132 100755 --- a/bin/stop-slaves.sh +++ b/bin/stop-slaves.sh @@ -29,9 +29,9 @@ if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then fi if [ "$SPARK_WORKER_INSTANCES" = "" ]; then - "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker 1 + "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker 1 else for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do - "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker $(( $i + 1 )) + "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 )) done fi diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template new file mode 100644 index 0000000000..6c36f3cca4 --- /dev/null +++ b/conf/metrics.properties.template @@ -0,0 +1,90 @@ +# syntax: [instance].sink|source.[name].[options]=[value] + +# This file configures Spark's internal metrics system. The metrics system is +# divided into instances which correspond to internal components. +# Each instance can be configured to report its metrics to one or more sinks. +# Accepted values for [instance] are "master", "worker", "executor", "driver", +# and "applications". A wild card "*" can be used as an instance name, in +# which case all instances will inherit the supplied property. +# +# Within an instance, a "source" specifies a particular set of grouped metrics. +# there are two kinds of sources: +# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will +# collect a Spark component's internal state. Each instance is paired with a +# Spark source that is added automatically. +# 2. Common sources, like JvmSource, which will collect low level state. +# These can be added through configuration options and are then loaded +# using reflection. +# +# A "sink" specifies where metrics are delivered to. Each instance can be +# assigned one or more sinks. +# +# The sink|source field specifies whether the property relates to a sink or +# source. +# +# The [name] field specifies the name of source or sink. +# +# The [options] field is the specific property of this source or sink. The +# source or sink is responsible for parsing this property. +# +# Notes: +# 1. To add a new sink, set the "class" option to a fully qualified class +# name (see examples below). +# 2. Some sinks involve a polling period. The minimum allowed polling period +# is 1 second. +# 3. Wild card properties can be overridden by more specific properties. +# For example, master.sink.console.period takes precedence over +# *.sink.console.period. +# 4. A metrics specific configuration +# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be +# added to Java properties using -Dspark.metrics.conf=xxx if you want to +# customize metrics system. You can also put the file in ${SPARK_HOME}/conf +# and it will be loaded automatically. +# 5. MetricsServlet is added by default as a sink in master, worker and client +# driver, you can send http request "/metrics/json" to get a snapshot of all the +# registered metrics in json format. For master, requests "/metrics/master/json" and +# "/metrics/applications/json" can be sent seperately to get metrics snapshot of +# instance master and applications. MetricsServlet may not be configured by self. +# + +# Enable JmxSink for all instances by class name +#*.sink.jmx.class=spark.metrics.sink.JmxSink + +# Enable ConsoleSink for all instances by class name +#*.sink.console.class=spark.metrics.sink.ConsoleSink + +# Polling period for ConsoleSink +#*.sink.console.period=10 + +#*.sink.console.unit=seconds + +# Master instance overlap polling period +#master.sink.console.period=15 + +#master.sink.console.unit=seconds + +# Enable CsvSink for all instances +#*.sink.csv.class=spark.metrics.sink.CsvSink + +# Polling period for CsvSink +#*.sink.csv.period=1 + +#*.sink.csv.unit=minutes + +# Polling directory for CsvSink +#*.sink.csv.directory=/tmp/ + +# Worker instance overlap polling period +#worker.sink.csv.period=10 + +#worker.sink.csv.unit=minutes + +# Enable jvm source for instance master, worker, driver and executor +#master.source.jvm.class=spark.metrics.source.JvmSource + +#worker.source.jvm.class=spark.metrics.source.JvmSource + +#driver.source.jvm.class=spark.metrics.source.JvmSource + +#executor.source.jvm.class=spark.metrics.source.JvmSource + diff --git a/conf/slaves b/conf/slaves index 6e315a8540..da0a01343d 100644 --- a/conf/slaves +++ b/conf/slaves @@ -1,2 +1,2 @@ -# A Spark Worker will be started on each of the machines listes below. +# A Spark Worker will be started on each of the machines listed below. localhost
\ No newline at end of file diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index b8936314ec..0a35ee7c79 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -1,19 +1,21 @@ #!/usr/bin/env bash # This file contains environment variables required to run Spark. Copy it as -# spark-env.sh and edit that to configure Spark for your site. At a minimum, -# the following two variables should be set: -# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to -# point to the directory for Scala library JARs (if you install Scala as a -# Debian or RPM package, these are in a separate path, often /usr/share/java) +# spark-env.sh and edit that to configure Spark for your site. +# +# The following variables can be set in this file: +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos +# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that +# we recommend setting app-wide options in the application's driver program. +# Examples of node-specific options : -Dspark.local.dir, GC options +# Examples of app-wide options : -Dspark.serializer # -# If using the standalone deploy mode, you can also set variables for it: -# - SPARK_MASTER_IP, to bind the master to a different IP address +# If using the standalone deploy mode, you can also set variables for it here: +# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports # - SPARK_WORKER_CORES, to set the number of cores to use on this machine # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g) # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT -# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes -# to be spawned on every slave machine +# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node diff --git a/core/pom.xml b/core/pom.xml index 6329b2fbd8..5738b7406f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -19,20 +19,32 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-core</artifactId> <packaging>jar</packaging> <name>Spark Project Core</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <dependencies> <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + </dependency> + <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> </dependency> @@ -41,6 +53,10 @@ <artifactId>guava</artifactId> </dependency> <dependency> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </dependency> + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </dependency> @@ -49,6 +65,10 @@ <artifactId>compress-lzf</artifactId> </dependency> <dependency> + <groupId>org.xerial.snappy</groupId> + <artifactId>snappy-java</artifactId> + </dependency> + <dependency> <groupId>org.ow2.asm</groupId> <artifactId>asm</artifactId> </dependency> @@ -57,8 +77,14 @@ <artifactId>protobuf-java</artifactId> </dependency> <dependency> - <groupId>de.javakaffee</groupId> - <artifactId>kryo-serializers</artifactId> + <groupId>com.twitter</groupId> + <artifactId>chill_2.9.3</artifactId> + <version>0.3.1</version> + </dependency> + <dependency> + <groupId>com.twitter</groupId> + <artifactId>chill-java</artifactId> + <version>0.3.1</version> </dependency> <dependency> <groupId>com.typesafe.akka</groupId> @@ -108,7 +134,18 @@ <groupId>log4j</groupId> <artifactId>log4j</artifactId> </dependency> - + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-core</artifactId> + </dependency> + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-jvm</artifactId> + </dependency> + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-json</artifactId> + </dependency> <dependency> <groupId>org.apache.derby</groupId> <artifactId>derby</artifactId> @@ -186,183 +223,4 @@ </plugin> </plugins> </build> - - <profiles> - <profile> - <id>hadoop1</id> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <executions> - <execution> - <id>add-source</id> - <phase>generate-sources</phase> - <goals> - <goal>add-source</goal> - </goals> - <configuration> - <sources> - <source>src/main/scala</source> - <source>src/hadoop1/scala</source> - </sources> - </configuration> - </execution> - <execution> - <id>add-scala-test-sources</id> - <phase>generate-test-sources</phase> - <goals> - <goal>add-test-source</goal> - </goals> - <configuration> - <sources> - <source>src/test/scala</source> - </sources> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop1</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2</id> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <executions> - <execution> - <id>add-source</id> - <phase>generate-sources</phase> - <goals> - <goal>add-source</goal> - </goals> - <configuration> - <sources> - <source>src/main/scala</source> - <source>src/hadoop2/scala</source> - </sources> - </configuration> - </execution> - <execution> - <id>add-scala-test-sources</id> - <phase>generate-test-sources</phase> - <goals> - <goal>add-test-source</goal> - </goals> - <configuration> - <sources> - <source>src/test/scala</source> - </sources> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>build-helper-maven-plugin</artifactId> - <executions> - <execution> - <id>add-source</id> - <phase>generate-sources</phase> - <goals> - <goal>add-source</goal> - </goals> - <configuration> - <sources> - <source>src/main/scala</source> - <source>src/hadoop2-yarn/scala</source> - </sources> - </configuration> - </execution> - <execution> - <id>add-scala-test-sources</id> - <phase>generate-test-sources</phase> - <goals> - <goal>add-test-source</goal> - </goals> - <configuration> - <sources> - <source>src/test/scala</source> - </sources> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2-yarn</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - </profiles> </project> diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala deleted file mode 100644 index aa3b1ed3a5..0000000000 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapreduce - -import org.apache.hadoop.conf.Configuration -import task.{TaskAttemptContextImpl, JobContextImpl} - -trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) -} diff --git a/core/src/main/java/spark/network/netty/FileClient.java b/core/src/main/java/org/apache/spark/network/netty/FileClient.java index 0625a6d502..20a7a3aa8c 100644 --- a/core/src/main/java/spark/network/netty/FileClient.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileClient.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import io.netty.bootstrap.Bootstrap; import io.netty.channel.Channel; diff --git a/core/src/main/java/spark/network/netty/FileClientChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java index 05ad4b61d7..65ee15d63b 100644 --- a/core/src/main/java/spark/network/netty/FileClientChannelInitializer.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import io.netty.buffer.BufType; import io.netty.channel.ChannelInitializer; diff --git a/core/src/main/java/spark/network/netty/FileClientHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java index e8cd9801f6..c4aa2669e0 100644 --- a/core/src/main/java/spark/network/netty/FileClientHandler.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; diff --git a/core/src/main/java/spark/network/netty/FileServer.java b/core/src/main/java/org/apache/spark/network/netty/FileServer.java index 9f009a61d5..666432474d 100644 --- a/core/src/main/java/spark/network/netty/FileServer.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileServer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import java.net.InetSocketAddress; diff --git a/core/src/main/java/spark/network/netty/FileServerChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java index 50c57a81a3..833af1632d 100644 --- a/core/src/main/java/spark/network/netty/FileServerChannelInitializer.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import io.netty.channel.ChannelInitializer; import io.netty.channel.socket.SocketChannel; diff --git a/core/src/main/java/spark/network/netty/FileServerHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java index 176ba8da49..d3d57a0255 100644 --- a/core/src/main/java/spark/network/netty/FileServerHandler.java +++ b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty; +package org.apache.spark.network.netty; import java.io.File; import java.io.FileInputStream; diff --git a/core/src/main/java/spark/network/netty/PathResolver.java b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java index f446c55b19..94c034cad0 100755 --- a/core/src/main/java/spark/network/netty/PathResolver.java +++ b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java @@ -15,7 +15,7 @@ * limitations under the License.
*/
-package spark.network.netty;
+package org.apache.spark.network.netty;
public interface PathResolver {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css b/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css new file mode 100755 index 0000000000..13cef3d6f1 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/bootstrap.min.css @@ -0,0 +1,874 @@ +/*! + * Bootstrap v2.3.2 + * + * Copyright 2013 Twitter, Inc + * Licensed under the Apache License v2.0 + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Designed and built with all the love in the world @twitter by @mdo and @fat. + */ +.clearfix{*zoom:1;}.clearfix:before,.clearfix:after{display:table;content:"";line-height:0;} +.clearfix:after{clear:both;} +.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0;} +.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} +article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block;} +audio,canvas,video{display:inline-block;*display:inline;*zoom:1;} +audio:not([controls]){display:none;} +html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;} +a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;} +a:hover,a:active{outline:0;} +sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline;} +sup{top:-0.5em;} +sub{bottom:-0.25em;} +img{max-width:100%;width:auto\9;height:auto;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic;} +#map_canvas img,.google-maps img{max-width:none;} +button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle;} +button,input{*overflow:visible;line-height:normal;} +button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0;} +button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;} +label,select,button,input[type="button"],input[type="reset"],input[type="submit"],input[type="radio"],input[type="checkbox"]{cursor:pointer;} +input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield;} +input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none;} +textarea{overflow:auto;vertical-align:top;} +@media print{*{text-shadow:none !important;color:#000 !important;background:transparent !important;box-shadow:none !important;} a,a:visited{text-decoration:underline;} a[href]:after{content:" (" attr(href) ")";} abbr[title]:after{content:" (" attr(title) ")";} .ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:"";} pre,blockquote{border:1px solid #999;page-break-inside:avoid;} thead{display:table-header-group;} tr,img{page-break-inside:avoid;} img{max-width:100% !important;} @page {margin:0.5cm;}p,h2,h3{orphans:3;widows:3;} h2,h3{page-break-after:avoid;}}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333333;background-color:#ffffff;} +a{color:#0088cc;text-decoration:none;} +a:hover,a:focus{color:#005580;text-decoration:underline;} +.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;} +.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);-webkit-box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);-moz-box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);box-shadow:0 1px 3px rgba(0, 0, 0, 0.1);} +.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px;} +.row{margin-left:-20px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;} +.row:after{clear:both;} +[class*="span"]{float:left;min-height:1px;margin-left:20px;} +.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px;} +.span12{width:940px;} +.span11{width:860px;} +.span10{width:780px;} +.span9{width:700px;} +.span8{width:620px;} +.span7{width:540px;} +.span6{width:460px;} +.span5{width:380px;} +.span4{width:300px;} +.span3{width:220px;} +.span2{width:140px;} +.span1{width:60px;} +.offset12{margin-left:980px;} +.offset11{margin-left:900px;} +.offset10{margin-left:820px;} +.offset9{margin-left:740px;} +.offset8{margin-left:660px;} +.offset7{margin-left:580px;} +.offset6{margin-left:500px;} +.offset5{margin-left:420px;} +.offset4{margin-left:340px;} +.offset3{margin-left:260px;} +.offset2{margin-left:180px;} +.offset1{margin-left:100px;} +.row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;} +.row-fluid:after{clear:both;} +.row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;} +.row-fluid [class*="span"]:first-child{margin-left:0;} +.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.127659574468085%;} +.row-fluid .span12{width:100%;*width:99.94680851063829%;} +.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%;} +.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%;} +.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%;} +.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%;} +.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%;} +.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%;} +.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%;} +.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%;} +.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%;} +.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%;} +.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%;} +.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%;} +.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%;} +.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%;} +.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%;} +.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%;} +.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%;} +.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%;} +.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%;} +.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%;} +.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%;} +.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%;} +.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%;} +.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%;} +.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%;} +.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%;} +.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%;} +.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%;} +.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%;} +.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%;} +.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%;} +.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%;} +.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%;} +.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%;} +.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%;} +[class*="span"].hide,.row-fluid [class*="span"].hide{display:none;} +[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right;} +.container{margin-right:auto;margin-left:auto;*zoom:1;}.container:before,.container:after{display:table;content:"";line-height:0;} +.container:after{clear:both;} +.container-fluid{padding-right:20px;padding-left:20px;*zoom:1;}.container-fluid:before,.container-fluid:after{display:table;content:"";line-height:0;} +.container-fluid:after{clear:both;} +p{margin:0 0 10px;} +.lead{margin-bottom:20px;font-size:21px;font-weight:200;line-height:30px;} +small{font-size:85%;} +strong{font-weight:bold;} +em{font-style:italic;} +cite{font-style:normal;} +.muted{color:#999999;} +a.muted:hover,a.muted:focus{color:#808080;} +.text-warning{color:#c09853;} +a.text-warning:hover,a.text-warning:focus{color:#a47e3c;} +.text-error{color:#b94a48;} +a.text-error:hover,a.text-error:focus{color:#953b39;} +.text-info{color:#3a87ad;} +a.text-info:hover,a.text-info:focus{color:#2d6987;} +.text-success{color:#468847;} +a.text-success:hover,a.text-success:focus{color:#356635;} +.text-left{text-align:left;} +.text-right{text-align:right;} +.text-center{text-align:center;} +h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:20px;color:inherit;text-rendering:optimizelegibility;}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999999;} +h1,h2,h3{line-height:40px;} +h1{font-size:38.5px;} +h2{font-size:31.5px;} +h3{font-size:24.5px;} +h4{font-size:17.5px;} +h5{font-size:14px;} +h6{font-size:11.9px;} +h1 small{font-size:24.5px;} +h2 small{font-size:17.5px;} +h3 small{font-size:14px;} +h4 small{font-size:14px;} +.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eeeeee;} +ul,ol{padding:0;margin:0 0 10px 25px;} +ul ul,ul ol,ol ol,ol ul{margin-bottom:0;} +li{line-height:20px;} +ul.unstyled,ol.unstyled{margin-left:0;list-style:none;} +ul.inline,ol.inline{margin-left:0;list-style:none;}ul.inline>li,ol.inline>li{display:inline-block;*display:inline;*zoom:1;padding-left:5px;padding-right:5px;} +dl{margin-bottom:20px;} +dt,dd{line-height:20px;} +dt{font-weight:bold;} +dd{margin-left:10px;} +.dl-horizontal{*zoom:1;}.dl-horizontal:before,.dl-horizontal:after{display:table;content:"";line-height:0;} +.dl-horizontal:after{clear:both;} +.dl-horizontal dt{float:left;width:160px;clear:left;text-align:right;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;} +.dl-horizontal dd{margin-left:180px;} +hr{margin:20px 0;border:0;border-top:1px solid #eeeeee;border-bottom:1px solid #ffffff;} +abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted #999999;} +abbr.initialism{font-size:90%;text-transform:uppercase;} +blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eeeeee;}blockquote p{margin-bottom:0;font-size:17.5px;font-weight:300;line-height:1.25;} +blockquote small{display:block;line-height:20px;color:#999999;}blockquote small:before{content:'\2014 \00A0';} +blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eeeeee;border-left:0;}blockquote.pull-right p,blockquote.pull-right small{text-align:right;} +blockquote.pull-right small:before{content:'';} +blockquote.pull-right small:after{content:'\00A0 \2014';} +q:before,q:after,blockquote:before,blockquote:after{content:"";} +address{display:block;margin-bottom:20px;font-style:normal;line-height:20px;} +code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8;white-space:nowrap;} +pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}pre.prettyprint{margin-bottom:20px;} +pre code{padding:0;color:inherit;white-space:pre;white-space:pre-wrap;background-color:transparent;border:0;} +.pre-scrollable{max-height:340px;overflow-y:scroll;} +.label,.badge{display:inline-block;padding:2px 4px;font-size:11.844px;font-weight:bold;line-height:14px;color:#ffffff;vertical-align:baseline;white-space:nowrap;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#999999;} +.label{-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +.badge{padding-left:9px;padding-right:9px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px;} +.label:empty,.badge:empty{display:none;} +a.label:hover,a.label:focus,a.badge:hover,a.badge:focus{color:#ffffff;text-decoration:none;cursor:pointer;} +.label-important,.badge-important{background-color:#b94a48;} +.label-important[href],.badge-important[href]{background-color:#953b39;} +.label-warning,.badge-warning{background-color:#f89406;} +.label-warning[href],.badge-warning[href]{background-color:#c67605;} +.label-success,.badge-success{background-color:#468847;} +.label-success[href],.badge-success[href]{background-color:#356635;} +.label-info,.badge-info{background-color:#3a87ad;} +.label-info[href],.badge-info[href]{background-color:#2d6987;} +.label-inverse,.badge-inverse{background-color:#333333;} +.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a;} +.btn .label,.btn .badge{position:relative;top:-1px;} +.btn-mini .label,.btn-mini .badge{top:0;} +table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0;} +.table{width:100%;margin-bottom:20px;}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #dddddd;} +.table th{font-weight:bold;} +.table thead th{vertical-align:bottom;} +.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0;} +.table tbody+tbody{border-top:2px solid #dddddd;} +.table .table{background-color:#ffffff;} +.table-condensed th,.table-condensed td{padding:4px 5px;} +.table-bordered{border:1px solid #dddddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}.table-bordered th,.table-bordered td{border-left:1px solid #dddddd;} +.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0;} +.table-bordered thead:first-child tr:first-child>th:first-child,.table-bordered tbody:first-child tr:first-child>td:first-child,.table-bordered tbody:first-child tr:first-child>th:first-child{-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;} +.table-bordered thead:first-child tr:first-child>th:last-child,.table-bordered tbody:first-child tr:first-child>td:last-child,.table-bordered tbody:first-child tr:first-child>th:last-child{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;} +.table-bordered thead:last-child tr:last-child>th:first-child,.table-bordered tbody:last-child tr:last-child>td:first-child,.table-bordered tbody:last-child tr:last-child>th:first-child,.table-bordered tfoot:last-child tr:last-child>td:first-child,.table-bordered tfoot:last-child tr:last-child>th:first-child{-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;} +.table-bordered thead:last-child tr:last-child>th:last-child,.table-bordered tbody:last-child tr:last-child>td:last-child,.table-bordered tbody:last-child tr:last-child>th:last-child,.table-bordered tfoot:last-child tr:last-child>td:last-child,.table-bordered tfoot:last-child tr:last-child>th:last-child{-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;} +.table-bordered tfoot+tbody:last-child tr:last-child td:first-child{-webkit-border-bottom-left-radius:0;-moz-border-radius-bottomleft:0;border-bottom-left-radius:0;} +.table-bordered tfoot+tbody:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:0;-moz-border-radius-bottomright:0;border-bottom-right-radius:0;} +.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;} +.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;} +.table-striped tbody>tr:nth-child(odd)>td,.table-striped tbody>tr:nth-child(odd)>th{background-color:#f9f9f9;} +.table-hover tbody tr:hover>td,.table-hover tbody tr:hover>th{background-color:#f5f5f5;} +table td[class*="span"],table th[class*="span"],.row-fluid table td[class*="span"],.row-fluid table th[class*="span"]{display:table-cell;float:none;margin-left:0;} +.table td.span1,.table th.span1{float:none;width:44px;margin-left:0;} +.table td.span2,.table th.span2{float:none;width:124px;margin-left:0;} +.table td.span3,.table th.span3{float:none;width:204px;margin-left:0;} +.table td.span4,.table th.span4{float:none;width:284px;margin-left:0;} +.table td.span5,.table th.span5{float:none;width:364px;margin-left:0;} +.table td.span6,.table th.span6{float:none;width:444px;margin-left:0;} +.table td.span7,.table th.span7{float:none;width:524px;margin-left:0;} +.table td.span8,.table th.span8{float:none;width:604px;margin-left:0;} +.table td.span9,.table th.span9{float:none;width:684px;margin-left:0;} +.table td.span10,.table th.span10{float:none;width:764px;margin-left:0;} +.table td.span11,.table th.span11{float:none;width:844px;margin-left:0;} +.table td.span12,.table th.span12{float:none;width:924px;margin-left:0;} +.table tbody tr.success>td{background-color:#dff0d8;} +.table tbody tr.error>td{background-color:#f2dede;} +.table tbody tr.warning>td{background-color:#fcf8e3;} +.table tbody tr.info>td{background-color:#d9edf7;} +.table-hover tbody tr.success:hover>td{background-color:#d0e9c6;} +.table-hover tbody tr.error:hover>td{background-color:#ebcccc;} +.table-hover tbody tr.warning:hover>td{background-color:#faf2cc;} +.table-hover tbody tr.info:hover>td{background-color:#c4e3f3;} +form{margin:0 0 20px;} +fieldset{padding:0;margin:0;border:0;} +legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333333;border:0;border-bottom:1px solid #e5e5e5;}legend small{font-size:15px;color:#999999;} +label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px;} +input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;} +label{display:block;margin-bottom:5px;} +select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:10px;font-size:14px;line-height:20px;color:#555555;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;vertical-align:middle;} +input,textarea,.uneditable-input{width:206px;} +textarea{height:auto;} +textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#ffffff;border:1px solid #cccccc;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-webkit-transition:border linear .2s, box-shadow linear .2s;-moz-transition:border linear .2s, box-shadow linear .2s;-o-transition:border linear .2s, box-shadow linear .2s;transition:border linear .2s, box-shadow linear .2s;}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82, 168, 236, 0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);box-shadow:inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);} +input[type="radio"],input[type="checkbox"]{margin:4px 0 0;*margin-top:0;margin-top:1px \9;line-height:normal;} +input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto;} +select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px;} +select{width:220px;border:1px solid #cccccc;background-color:#ffffff;} +select[multiple],select[size]{height:auto;} +select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;} +.uneditable-input,.uneditable-textarea{color:#999999;background-color:#fcfcfc;border-color:#cccccc;-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.025);cursor:not-allowed;} +.uneditable-input{overflow:hidden;white-space:nowrap;} +.uneditable-textarea{width:auto;height:auto;} +input:-moz-placeholder,textarea:-moz-placeholder{color:#999999;} +input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999999;} +input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999999;} +.radio,.checkbox{min-height:20px;padding-left:20px;} +.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-20px;} +.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px;} +.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle;} +.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px;} +.input-mini{width:60px;} +.input-small{width:90px;} +.input-medium{width:150px;} +.input-large{width:210px;} +.input-xlarge{width:270px;} +.input-xxlarge{width:530px;} +input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0;} +.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block;} +input,textarea,.uneditable-input{margin-left:0;} +.controls-row [class*="span"]+[class*="span"]{margin-left:20px;} +input.span12,textarea.span12,.uneditable-input.span12{width:926px;} +input.span11,textarea.span11,.uneditable-input.span11{width:846px;} +input.span10,textarea.span10,.uneditable-input.span10{width:766px;} +input.span9,textarea.span9,.uneditable-input.span9{width:686px;} +input.span8,textarea.span8,.uneditable-input.span8{width:606px;} +input.span7,textarea.span7,.uneditable-input.span7{width:526px;} +input.span6,textarea.span6,.uneditable-input.span6{width:446px;} +input.span5,textarea.span5,.uneditable-input.span5{width:366px;} +input.span4,textarea.span4,.uneditable-input.span4{width:286px;} +input.span3,textarea.span3,.uneditable-input.span3{width:206px;} +input.span2,textarea.span2,.uneditable-input.span2{width:126px;} +input.span1,textarea.span1,.uneditable-input.span1{width:46px;} +.controls-row{*zoom:1;}.controls-row:before,.controls-row:after{display:table;content:"";line-height:0;} +.controls-row:after{clear:both;} +.controls-row [class*="span"],.row-fluid .controls-row [class*="span"]{float:left;} +.controls-row .checkbox[class*="span"],.controls-row .radio[class*="span"]{padding-top:5px;} +input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eeeeee;} +input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent;} +.control-group.warning .control-label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853;} +.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;} +.control-group.warning input,.control-group.warning select,.control-group.warning textarea{border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #dbc59e;} +.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853;} +.control-group.error .control-label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48;} +.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;} +.control-group.error input,.control-group.error select,.control-group.error textarea{border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #d59392;} +.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48;} +.control-group.success .control-label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847;} +.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;} +.control-group.success input,.control-group.success select,.control-group.success textarea{border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7aba7b;} +.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847;} +.control-group.info .control-label,.control-group.info .help-block,.control-group.info .help-inline{color:#3a87ad;} +.control-group.info .checkbox,.control-group.info .radio,.control-group.info input,.control-group.info select,.control-group.info textarea{color:#3a87ad;} +.control-group.info input,.control-group.info select,.control-group.info textarea{border-color:#3a87ad;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075);}.control-group.info input:focus,.control-group.info select:focus,.control-group.info textarea:focus{border-color:#2d6987;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.075),0 0 6px #7ab5d3;} +.control-group.info .input-prepend .add-on,.control-group.info .input-append .add-on{color:#3a87ad;background-color:#d9edf7;border-color:#3a87ad;} +input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#b94a48;border-color:#ee5f5b;}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7;} +.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1;}.form-actions:before,.form-actions:after{display:table;content:"";line-height:0;} +.form-actions:after{clear:both;} +.help-block,.help-inline{color:#595959;} +.help-block{display:block;margin-bottom:10px;} +.help-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle;padding-left:5px;} +.input-append,.input-prepend{display:inline-block;margin-bottom:10px;vertical-align:middle;font-size:0;white-space:nowrap;}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input,.input-append .dropdown-menu,.input-prepend .dropdown-menu,.input-append .popover,.input-prepend .popover{font-size:14px;} +.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;vertical-align:top;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2;} +.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #ffffff;background-color:#eeeeee;border:1px solid #ccc;} +.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn,.input-append .btn-group>.dropdown-toggle,.input-prepend .btn-group>.dropdown-toggle{vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546;} +.input-prepend .add-on,.input-prepend .btn{margin-right:-1px;} +.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;} +.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;}.input-append input+.btn-group .btn:last-child,.input-append select+.btn-group .btn:last-child,.input-append .uneditable-input+.btn-group .btn:last-child{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.input-append .add-on,.input-append .btn,.input-append .btn-group{margin-left:-1px;} +.input-append .add-on:last-child,.input-append .btn:last-child,.input-append .btn-group:last-child>.dropdown-toggle{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;}.input-prepend.input-append input+.btn-group .btn,.input-prepend.input-append select+.btn-group .btn,.input-prepend.input-append .uneditable-input+.btn-group .btn{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;} +.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.input-prepend.input-append .btn-group:first-child{margin-left:0;} +input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;} +.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px;} +.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0;} +.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0;} +.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px;} +.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;*zoom:1;margin-bottom:0;vertical-align:middle;} +.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none;} +.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block;} +.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0;} +.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle;} +.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0;} +.control-group{margin-bottom:10px;} +legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate;} +.form-horizontal .control-group{margin-bottom:20px;*zoom:1;}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;content:"";line-height:0;} +.form-horizontal .control-group:after{clear:both;} +.form-horizontal .control-label{float:left;width:160px;padding-top:5px;text-align:right;} +.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:180px;*margin-left:0;}.form-horizontal .controls:first-child{*padding-left:180px;} +.form-horizontal .help-block{margin-bottom:0;} +.form-horizontal input+.help-block,.form-horizontal select+.help-block,.form-horizontal textarea+.help-block,.form-horizontal .uneditable-input+.help-block,.form-horizontal .input-prepend+.help-block,.form-horizontal .input-append+.help-block{margin-top:10px;} +.form-horizontal .form-actions{padding-left:180px;} +.btn{display:inline-block;*display:inline;*zoom:1;padding:4px 12px;margin-bottom:0;font-size:14px;line-height:20px;text-align:center;vertical-align:middle;cursor:pointer;color:#333333;text-shadow:0 1px 1px rgba(255, 255, 255, 0.75);background-color:#f5f5f5;background-image:-moz-linear-gradient(top, #ffffff, #e6e6e6);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#e6e6e6));background-image:-webkit-linear-gradient(top, #ffffff, #e6e6e6);background-image:-o-linear-gradient(top, #ffffff, #e6e6e6);background-image:linear-gradient(to bottom, #ffffff, #e6e6e6);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#ffe6e6e6', GradientType=0);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#e6e6e6;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);border:1px solid #cccccc;*border:0;border-bottom-color:#b3b3b3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;*margin-left:.3em;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);}.btn:hover,.btn:focus,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333333;background-color:#e6e6e6;*background-color:#d9d9d9;} +.btn:active,.btn.active{background-color:#cccccc \9;} +.btn:first-child{*margin-left:0;} +.btn:hover,.btn:focus{color:#333333;text-decoration:none;background-position:0 -15px;-webkit-transition:background-position 0.1s linear;-moz-transition:background-position 0.1s linear;-o-transition:background-position 0.1s linear;transition:background-position 0.1s linear;} +.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px;} +.btn.active,.btn:active{background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);} +.btn.disabled,.btn[disabled]{cursor:default;background-image:none;opacity:0.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} +.btn-large{padding:11px 19px;font-size:17.5px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;} +.btn-large [class^="icon-"],.btn-large [class*=" icon-"]{margin-top:4px;} +.btn-small{padding:2px 10px;font-size:11.9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +.btn-small [class^="icon-"],.btn-small [class*=" icon-"]{margin-top:0;} +.btn-mini [class^="icon-"],.btn-mini [class*=" icon-"]{margin-top:-1px;} +.btn-mini{padding:0 6px;font-size:10.5px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +.btn-block{display:block;width:100%;padding-left:0;padding-right:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} +.btn-block+.btn-block{margin-top:5px;} +input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%;} +.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255, 255, 255, 0.75);} +.btn-primary{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#006dcc;background-image:-moz-linear-gradient(top, #0088cc, #0044cc);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0044cc));background-image:-webkit-linear-gradient(top, #0088cc, #0044cc);background-image:-o-linear-gradient(top, #0088cc, #0044cc);background-image:linear-gradient(to bottom, #0088cc, #0044cc);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0044cc', GradientType=0);border-color:#0044cc #0044cc #002a80;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#0044cc;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-primary:hover,.btn-primary:focus,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#ffffff;background-color:#0044cc;*background-color:#003bb3;} +.btn-primary:active,.btn-primary.active{background-color:#003399 \9;} +.btn-warning{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(to bottom, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450', endColorstr='#fff89406', GradientType=0);border-color:#f89406 #f89406 #ad6704;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#f89406;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-warning:hover,.btn-warning:focus,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#ffffff;background-color:#f89406;*background-color:#df8505;} +.btn-warning:active,.btn-warning.active{background-color:#c67605 \9;} +.btn-danger{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#da4f49;background-image:-moz-linear-gradient(top, #ee5f5b, #bd362f);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#bd362f));background-image:-webkit-linear-gradient(top, #ee5f5b, #bd362f);background-image:-o-linear-gradient(top, #ee5f5b, #bd362f);background-image:linear-gradient(to bottom, #ee5f5b, #bd362f);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b', endColorstr='#ffbd362f', GradientType=0);border-color:#bd362f #bd362f #802420;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#bd362f;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-danger:hover,.btn-danger:focus,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#ffffff;background-color:#bd362f;*background-color:#a9302a;} +.btn-danger:active,.btn-danger.active{background-color:#942a25 \9;} +.btn-success{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#5bb75b;background-image:-moz-linear-gradient(top, #62c462, #51a351);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#51a351));background-image:-webkit-linear-gradient(top, #62c462, #51a351);background-image:-o-linear-gradient(top, #62c462, #51a351);background-image:linear-gradient(to bottom, #62c462, #51a351);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462', endColorstr='#ff51a351', GradientType=0);border-color:#51a351 #51a351 #387038;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#51a351;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-success:hover,.btn-success:focus,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#ffffff;background-color:#51a351;*background-color:#499249;} +.btn-success:active,.btn-success.active{background-color:#408140 \9;} +.btn-info{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#49afcd;background-image:-moz-linear-gradient(top, #5bc0de, #2f96b4);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#2f96b4));background-image:-webkit-linear-gradient(top, #5bc0de, #2f96b4);background-image:-o-linear-gradient(top, #5bc0de, #2f96b4);background-image:linear-gradient(to bottom, #5bc0de, #2f96b4);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff2f96b4', GradientType=0);border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#2f96b4;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-info:hover,.btn-info:focus,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#ffffff;background-color:#2f96b4;*background-color:#2a85a0;} +.btn-info:active,.btn-info.active{background-color:#24748c \9;} +.btn-inverse{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#363636;background-image:-moz-linear-gradient(top, #444444, #222222);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#444444), to(#222222));background-image:-webkit-linear-gradient(top, #444444, #222222);background-image:-o-linear-gradient(top, #444444, #222222);background-image:linear-gradient(to bottom, #444444, #222222);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff444444', endColorstr='#ff222222', GradientType=0);border-color:#222222 #222222 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#222222;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.btn-inverse:hover,.btn-inverse:focus,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#ffffff;background-color:#222222;*background-color:#151515;} +.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9;} +button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px;}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0;} +button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px;} +button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px;} +button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px;} +.btn-link,.btn-link:active,.btn-link[disabled]{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} +.btn-link{border-color:transparent;cursor:pointer;color:#0088cc;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.btn-link:hover,.btn-link:focus{color:#005580;text-decoration:underline;background-color:transparent;} +.btn-link[disabled]:hover,.btn-link[disabled]:focus{color:#333333;text-decoration:none;} +[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat;margin-top:1px;} +.icon-white,.nav-pills>.active>a>[class^="icon-"],.nav-pills>.active>a>[class*=" icon-"],.nav-list>.active>a>[class^="icon-"],.nav-list>.active>a>[class*=" icon-"],.navbar-inverse .nav>.active>a>[class^="icon-"],.navbar-inverse .nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:focus>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>li>a:focus>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"],.dropdown-submenu:hover>a>[class^="icon-"],.dropdown-submenu:focus>a>[class^="icon-"],.dropdown-submenu:hover>a>[class*=" icon-"],.dropdown-submenu:focus>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png");} +.icon-glass{background-position:0 0;} +.icon-music{background-position:-24px 0;} +.icon-search{background-position:-48px 0;} +.icon-envelope{background-position:-72px 0;} +.icon-heart{background-position:-96px 0;} +.icon-star{background-position:-120px 0;} +.icon-star-empty{background-position:-144px 0;} +.icon-user{background-position:-168px 0;} +.icon-film{background-position:-192px 0;} +.icon-th-large{background-position:-216px 0;} +.icon-th{background-position:-240px 0;} +.icon-th-list{background-position:-264px 0;} +.icon-ok{background-position:-288px 0;} +.icon-remove{background-position:-312px 0;} +.icon-zoom-in{background-position:-336px 0;} +.icon-zoom-out{background-position:-360px 0;} +.icon-off{background-position:-384px 0;} +.icon-signal{background-position:-408px 0;} +.icon-cog{background-position:-432px 0;} +.icon-trash{background-position:-456px 0;} +.icon-home{background-position:0 -24px;} +.icon-file{background-position:-24px -24px;} +.icon-time{background-position:-48px -24px;} +.icon-road{background-position:-72px -24px;} +.icon-download-alt{background-position:-96px -24px;} +.icon-download{background-position:-120px -24px;} +.icon-upload{background-position:-144px -24px;} +.icon-inbox{background-position:-168px -24px;} +.icon-play-circle{background-position:-192px -24px;} +.icon-repeat{background-position:-216px -24px;} +.icon-refresh{background-position:-240px -24px;} +.icon-list-alt{background-position:-264px -24px;} +.icon-lock{background-position:-287px -24px;} +.icon-flag{background-position:-312px -24px;} +.icon-headphones{background-position:-336px -24px;} +.icon-volume-off{background-position:-360px -24px;} +.icon-volume-down{background-position:-384px -24px;} +.icon-volume-up{background-position:-408px -24px;} +.icon-qrcode{background-position:-432px -24px;} +.icon-barcode{background-position:-456px -24px;} +.icon-tag{background-position:0 -48px;} +.icon-tags{background-position:-25px -48px;} +.icon-book{background-position:-48px -48px;} +.icon-bookmark{background-position:-72px -48px;} +.icon-print{background-position:-96px -48px;} +.icon-camera{background-position:-120px -48px;} +.icon-font{background-position:-144px -48px;} +.icon-bold{background-position:-167px -48px;} +.icon-italic{background-position:-192px -48px;} +.icon-text-height{background-position:-216px -48px;} +.icon-text-width{background-position:-240px -48px;} +.icon-align-left{background-position:-264px -48px;} +.icon-align-center{background-position:-288px -48px;} +.icon-align-right{background-position:-312px -48px;} +.icon-align-justify{background-position:-336px -48px;} +.icon-list{background-position:-360px -48px;} +.icon-indent-left{background-position:-384px -48px;} +.icon-indent-right{background-position:-408px -48px;} +.icon-facetime-video{background-position:-432px -48px;} +.icon-picture{background-position:-456px -48px;} +.icon-pencil{background-position:0 -72px;} +.icon-map-marker{background-position:-24px -72px;} +.icon-adjust{background-position:-48px -72px;} +.icon-tint{background-position:-72px -72px;} +.icon-edit{background-position:-96px -72px;} +.icon-share{background-position:-120px -72px;} +.icon-check{background-position:-144px -72px;} +.icon-move{background-position:-168px -72px;} +.icon-step-backward{background-position:-192px -72px;} +.icon-fast-backward{background-position:-216px -72px;} +.icon-backward{background-position:-240px -72px;} +.icon-play{background-position:-264px -72px;} +.icon-pause{background-position:-288px -72px;} +.icon-stop{background-position:-312px -72px;} +.icon-forward{background-position:-336px -72px;} +.icon-fast-forward{background-position:-360px -72px;} +.icon-step-forward{background-position:-384px -72px;} +.icon-eject{background-position:-408px -72px;} +.icon-chevron-left{background-position:-432px -72px;} +.icon-chevron-right{background-position:-456px -72px;} +.icon-plus-sign{background-position:0 -96px;} +.icon-minus-sign{background-position:-24px -96px;} +.icon-remove-sign{background-position:-48px -96px;} +.icon-ok-sign{background-position:-72px -96px;} +.icon-question-sign{background-position:-96px -96px;} +.icon-info-sign{background-position:-120px -96px;} +.icon-screenshot{background-position:-144px -96px;} +.icon-remove-circle{background-position:-168px -96px;} +.icon-ok-circle{background-position:-192px -96px;} +.icon-ban-circle{background-position:-216px -96px;} +.icon-arrow-left{background-position:-240px -96px;} +.icon-arrow-right{background-position:-264px -96px;} +.icon-arrow-up{background-position:-289px -96px;} +.icon-arrow-down{background-position:-312px -96px;} +.icon-share-alt{background-position:-336px -96px;} +.icon-resize-full{background-position:-360px -96px;} +.icon-resize-small{background-position:-384px -96px;} +.icon-plus{background-position:-408px -96px;} +.icon-minus{background-position:-433px -96px;} +.icon-asterisk{background-position:-456px -96px;} +.icon-exclamation-sign{background-position:0 -120px;} +.icon-gift{background-position:-24px -120px;} +.icon-leaf{background-position:-48px -120px;} +.icon-fire{background-position:-72px -120px;} +.icon-eye-open{background-position:-96px -120px;} +.icon-eye-close{background-position:-120px -120px;} +.icon-warning-sign{background-position:-144px -120px;} +.icon-plane{background-position:-168px -120px;} +.icon-calendar{background-position:-192px -120px;} +.icon-random{background-position:-216px -120px;width:16px;} +.icon-comment{background-position:-240px -120px;} +.icon-magnet{background-position:-264px -120px;} +.icon-chevron-up{background-position:-288px -120px;} +.icon-chevron-down{background-position:-313px -119px;} +.icon-retweet{background-position:-336px -120px;} +.icon-shopping-cart{background-position:-360px -120px;} +.icon-folder-close{background-position:-384px -120px;width:16px;} +.icon-folder-open{background-position:-408px -120px;width:16px;} +.icon-resize-vertical{background-position:-432px -119px;} +.icon-resize-horizontal{background-position:-456px -118px;} +.icon-hdd{background-position:0 -144px;} +.icon-bullhorn{background-position:-24px -144px;} +.icon-bell{background-position:-48px -144px;} +.icon-certificate{background-position:-72px -144px;} +.icon-thumbs-up{background-position:-96px -144px;} +.icon-thumbs-down{background-position:-120px -144px;} +.icon-hand-right{background-position:-144px -144px;} +.icon-hand-left{background-position:-168px -144px;} +.icon-hand-up{background-position:-192px -144px;} +.icon-hand-down{background-position:-216px -144px;} +.icon-circle-arrow-right{background-position:-240px -144px;} +.icon-circle-arrow-left{background-position:-264px -144px;} +.icon-circle-arrow-up{background-position:-288px -144px;} +.icon-circle-arrow-down{background-position:-312px -144px;} +.icon-globe{background-position:-336px -144px;} +.icon-wrench{background-position:-360px -144px;} +.icon-tasks{background-position:-384px -144px;} +.icon-filter{background-position:-408px -144px;} +.icon-briefcase{background-position:-432px -144px;} +.icon-fullscreen{background-position:-456px -144px;} +.btn-group{position:relative;display:inline-block;*display:inline;*zoom:1;font-size:0;vertical-align:middle;white-space:nowrap;*margin-left:.3em;}.btn-group:first-child{*margin-left:0;} +.btn-group+.btn-group{margin-left:5px;} +.btn-toolbar{font-size:0;margin-top:10px;margin-bottom:10px;}.btn-toolbar>.btn+.btn,.btn-toolbar>.btn-group+.btn,.btn-toolbar>.btn+.btn-group{margin-left:5px;} +.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.btn-group>.btn+.btn{margin-left:-1px;} +.btn-group>.btn,.btn-group>.dropdown-menu,.btn-group>.popover{font-size:14px;} +.btn-group>.btn-mini{font-size:10.5px;} +.btn-group>.btn-small{font-size:11.9px;} +.btn-group>.btn-large{font-size:17.5px;} +.btn-group>.btn:first-child{margin-left:0;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;} +.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;} +.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-top-left-radius:6px;-moz-border-radius-topleft:6px;border-top-left-radius:6px;-webkit-border-bottom-left-radius:6px;-moz-border-radius-bottomleft:6px;border-bottom-left-radius:6px;} +.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;-moz-border-radius-topright:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;-moz-border-radius-bottomright:6px;border-bottom-right-radius:6px;} +.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2;} +.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0;} +.btn-group>.btn+.dropdown-toggle{padding-left:8px;padding-right:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 1px 0 0 rgba(255,255,255,.125), inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);*padding-top:5px;*padding-bottom:5px;} +.btn-group>.btn-mini+.dropdown-toggle{padding-left:5px;padding-right:5px;*padding-top:2px;*padding-bottom:2px;} +.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px;} +.btn-group>.btn-large+.dropdown-toggle{padding-left:12px;padding-right:12px;*padding-top:7px;*padding-bottom:7px;} +.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);box-shadow:inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);} +.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6;} +.btn-group.open .btn-primary.dropdown-toggle{background-color:#0044cc;} +.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406;} +.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f;} +.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351;} +.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4;} +.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222222;} +.btn .caret{margin-top:8px;margin-left:0;} +.btn-large .caret{margin-top:6px;} +.btn-large .caret{border-left-width:5px;border-right-width:5px;border-top-width:5px;} +.btn-mini .caret,.btn-small .caret{margin-top:8px;} +.dropup .btn-large .caret{border-bottom-width:5px;} +.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;} +.btn-group-vertical{display:inline-block;*display:inline;*zoom:1;} +.btn-group-vertical>.btn{display:block;float:none;max-width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.btn-group-vertical>.btn+.btn{margin-left:0;margin-top:-1px;} +.btn-group-vertical>.btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;} +.btn-group-vertical>.btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;} +.btn-group-vertical>.btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0;} +.btn-group-vertical>.btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;} +.nav{margin-left:0;margin-bottom:20px;list-style:none;} +.nav>li>a{display:block;} +.nav>li>a:hover,.nav>li>a:focus{text-decoration:none;background-color:#eeeeee;} +.nav>li>a>img{max-width:none;} +.nav>.pull-right{float:right;} +.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999999;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);text-transform:uppercase;} +.nav li+.nav-header{margin-top:9px;} +.nav-list{padding-left:15px;padding-right:15px;margin-bottom:0;} +.nav-list>li>a,.nav-list .nav-header{margin-left:-15px;margin-right:-15px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);} +.nav-list>li>a{padding:3px 15px;} +.nav-list>.active>a,.nav-list>.active>a:hover,.nav-list>.active>a:focus{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.2);background-color:#0088cc;} +.nav-list [class^="icon-"],.nav-list [class*=" icon-"]{margin-right:2px;} +.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;} +.nav-tabs,.nav-pills{*zoom:1;}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;content:"";line-height:0;} +.nav-tabs:after,.nav-pills:after{clear:both;} +.nav-tabs>li,.nav-pills>li{float:left;} +.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px;} +.nav-tabs{border-bottom:1px solid #ddd;} +.nav-tabs>li{margin-bottom:-1px;} +.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0;}.nav-tabs>li>a:hover,.nav-tabs>li>a:focus{border-color:#eeeeee #eeeeee #dddddd;} +.nav-tabs>.active>a,.nav-tabs>.active>a:hover,.nav-tabs>.active>a:focus{color:#555555;background-color:#ffffff;border:1px solid #ddd;border-bottom-color:transparent;cursor:default;} +.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px;} +.nav-pills>.active>a,.nav-pills>.active>a:hover,.nav-pills>.active>a:focus{color:#ffffff;background-color:#0088cc;} +.nav-stacked>li{float:none;} +.nav-stacked>li>a{margin-right:0;} +.nav-tabs.nav-stacked{border-bottom:0;} +.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;} +.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;} +.nav-tabs.nav-stacked>li>a:hover,.nav-tabs.nav-stacked>li>a:focus{border-color:#ddd;z-index:2;} +.nav-pills.nav-stacked>li>a{margin-bottom:3px;} +.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px;} +.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;} +.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;} +.nav .dropdown-toggle .caret{border-top-color:#0088cc;border-bottom-color:#0088cc;margin-top:6px;} +.nav .dropdown-toggle:hover .caret,.nav .dropdown-toggle:focus .caret{border-top-color:#005580;border-bottom-color:#005580;} +.nav-tabs .dropdown-toggle .caret{margin-top:8px;} +.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff;} +.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555555;border-bottom-color:#555555;} +.nav>.dropdown.active>a:hover,.nav>.dropdown.active>a:focus{cursor:pointer;} +.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover,.nav>li.dropdown.open.active>a:focus{color:#ffffff;background-color:#999999;border-color:#999999;} +.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret,.nav li.dropdown.open a:focus .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;opacity:1;filter:alpha(opacity=100);} +.tabs-stacked .open>a:hover,.tabs-stacked .open>a:focus{border-color:#999999;} +.tabbable{*zoom:1;}.tabbable:before,.tabbable:after{display:table;content:"";line-height:0;} +.tabbable:after{clear:both;} +.tab-content{overflow:auto;} +.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0;} +.tab-content>.tab-pane,.pill-content>.pill-pane{display:none;} +.tab-content>.active,.pill-content>.active{display:block;} +.tabs-below>.nav-tabs{border-top:1px solid #ddd;} +.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0;} +.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px;}.tabs-below>.nav-tabs>li>a:hover,.tabs-below>.nav-tabs>li>a:focus{border-bottom-color:transparent;border-top-color:#ddd;} +.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover,.tabs-below>.nav-tabs>.active>a:focus{border-color:transparent #ddd #ddd #ddd;} +.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none;} +.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px;} +.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd;} +.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px;} +.tabs-left>.nav-tabs>li>a:hover,.tabs-left>.nav-tabs>li>a:focus{border-color:#eeeeee #dddddd #eeeeee #eeeeee;} +.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover,.tabs-left>.nav-tabs .active>a:focus{border-color:#ddd transparent #ddd #ddd;*border-right-color:#ffffff;} +.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd;} +.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0;} +.tabs-right>.nav-tabs>li>a:hover,.tabs-right>.nav-tabs>li>a:focus{border-color:#eeeeee #eeeeee #eeeeee #dddddd;} +.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover,.tabs-right>.nav-tabs .active>a:focus{border-color:#ddd #ddd #ddd transparent;*border-left-color:#ffffff;} +.nav>.disabled>a{color:#999999;} +.nav>.disabled>a:hover,.nav>.disabled>a:focus{text-decoration:none;background-color:transparent;cursor:default;} +.navbar{overflow:visible;margin-bottom:20px;*position:relative;*z-index:2;} +.navbar-inner{min-height:40px;padding-left:20px;padding-right:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top, #ffffff, #f2f2f2);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#f2f2f2));background-image:-webkit-linear-gradient(top, #ffffff, #f2f2f2);background-image:-o-linear-gradient(top, #ffffff, #f2f2f2);background-image:linear-gradient(to bottom, #ffffff, #f2f2f2);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#fff2f2f2', GradientType=0);border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);-moz-box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);box-shadow:0 1px 4px rgba(0, 0, 0, 0.065);*zoom:1;}.navbar-inner:before,.navbar-inner:after{display:table;content:"";line-height:0;} +.navbar-inner:after{clear:both;} +.navbar .container{width:auto;} +.nav-collapse.collapse{height:auto;overflow:visible;} +.navbar .brand{float:left;display:block;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#777777;text-shadow:0 1px 0 #ffffff;}.navbar .brand:hover,.navbar .brand:focus{text-decoration:none;} +.navbar-text{margin-bottom:0;line-height:40px;color:#777777;} +.navbar-link{color:#777777;}.navbar-link:hover,.navbar-link:focus{color:#333333;} +.navbar .divider-vertical{height:40px;margin:0 9px;border-left:1px solid #f2f2f2;border-right:1px solid #ffffff;} +.navbar .btn,.navbar .btn-group{margin-top:5px;} +.navbar .btn-group .btn,.navbar .input-prepend .btn,.navbar .input-append .btn,.navbar .input-prepend .btn-group,.navbar .input-append .btn-group{margin-top:0;} +.navbar-form{margin-bottom:0;*zoom:1;}.navbar-form:before,.navbar-form:after{display:table;content:"";line-height:0;} +.navbar-form:after{clear:both;} +.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px;} +.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0;} +.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px;} +.navbar-form .input-append,.navbar-form .input-prepend{margin-top:5px;white-space:nowrap;}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0;} +.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0;}.navbar-search .search-query{margin-bottom:0;padding:4px 14px;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;} +.navbar-static-top{position:static;margin-bottom:0;}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0;} +.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{border-width:0 0 1px;} +.navbar-fixed-bottom .navbar-inner{border-width:1px 0 0;} +.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-left:0;padding-right:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;} +.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px;} +.navbar-fixed-top{top:0;} +.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:0 1px 10px rgba(0,0,0,.1);-moz-box-shadow:0 1px 10px rgba(0,0,0,.1);box-shadow:0 1px 10px rgba(0,0,0,.1);} +.navbar-fixed-bottom{bottom:0;}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:0 -1px 10px rgba(0,0,0,.1);-moz-box-shadow:0 -1px 10px rgba(0,0,0,.1);box-shadow:0 -1px 10px rgba(0,0,0,.1);} +.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0;} +.navbar .nav.pull-right{float:right;margin-right:0;} +.navbar .nav>li{float:left;} +.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#777777;text-decoration:none;text-shadow:0 1px 0 #ffffff;} +.navbar .nav .dropdown-toggle .caret{margin-top:8px;} +.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{background-color:transparent;color:#333333;text-decoration:none;} +.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);-moz-box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);box-shadow:inset 0 3px 8px rgba(0, 0, 0, 0.125);} +.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-left:5px;margin-right:5px;color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#ededed;background-image:-moz-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#f2f2f2), to(#e5e5e5));background-image:-webkit-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:-o-linear-gradient(top, #f2f2f2, #e5e5e5);background-image:linear-gradient(to bottom, #f2f2f2, #e5e5e5);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2f2f2', endColorstr='#ffe5e5e5', GradientType=0);border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#e5e5e5;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.075);}.navbar .btn-navbar:hover,.navbar .btn-navbar:focus,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#ffffff;background-color:#e5e5e5;*background-color:#d9d9d9;} +.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#cccccc \9;} +.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);-moz-box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);box-shadow:0 1px 0 rgba(0, 0, 0, 0.25);} +.btn-navbar .icon-bar+.icon-bar{margin-top:3px;} +.navbar .nav>li>.dropdown-menu:before{content:'';display:inline-block;border-left:7px solid transparent;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-bottom-color:rgba(0, 0, 0, 0.2);position:absolute;top:-7px;left:9px;} +.navbar .nav>li>.dropdown-menu:after{content:'';display:inline-block;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #ffffff;position:absolute;top:-6px;left:10px;} +.navbar-fixed-bottom .nav>li>.dropdown-menu:before{border-top:7px solid #ccc;border-top-color:rgba(0, 0, 0, 0.2);border-bottom:0;bottom:-7px;top:auto;} +.navbar-fixed-bottom .nav>li>.dropdown-menu:after{border-top:6px solid #ffffff;border-bottom:0;bottom:-6px;top:auto;} +.navbar .nav li.dropdown>a:hover .caret,.navbar .nav li.dropdown>a:focus .caret{border-top-color:#333333;border-bottom-color:#333333;} +.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{background-color:#e5e5e5;color:#555555;} +.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#777777;border-bottom-color:#777777;} +.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555555;border-bottom-color:#555555;} +.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{left:auto;right:0;}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{left:auto;right:12px;} +.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{left:auto;right:13px;} +.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{left:auto;right:100%;margin-left:0;margin-right:-1px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px;} +.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top, #222222, #111111);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#222222), to(#111111));background-image:-webkit-linear-gradient(top, #222222, #111111);background-image:-o-linear-gradient(top, #222222, #111111);background-image:linear-gradient(to bottom, #222222, #111111);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff222222', endColorstr='#ff111111', GradientType=0);border-color:#252525;} +.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999999;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover,.navbar-inverse .brand:focus,.navbar-inverse .nav>li>a:focus{color:#ffffff;} +.navbar-inverse .brand{color:#999999;} +.navbar-inverse .navbar-text{color:#999999;} +.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{background-color:transparent;color:#ffffff;} +.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#ffffff;background-color:#111111;} +.navbar-inverse .navbar-link{color:#999999;}.navbar-inverse .navbar-link:hover,.navbar-inverse .navbar-link:focus{color:#ffffff;} +.navbar-inverse .divider-vertical{border-left-color:#111111;border-right-color:#222222;} +.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{background-color:#111111;color:#ffffff;} +.navbar-inverse .nav li.dropdown>a:hover .caret,.navbar-inverse .nav li.dropdown>a:focus .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;} +.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999999;border-bottom-color:#999999;} +.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#ffffff;border-bottom-color:#ffffff;} +.navbar-inverse .navbar-search .search-query{color:#ffffff;background-color:#515151;border-color:#111111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);box-shadow:inset 0 1px 2px rgba(0,0,0,.1), 0 1px 0 rgba(255,255,255,.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none;}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#cccccc;} +.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#cccccc;} +.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#cccccc;} +.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333333;text-shadow:0 1px 0 #ffffff;background-color:#ffffff;border:0;-webkit-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);-moz-box-shadow:0 0 3px rgba(0, 0, 0, 0.15);box-shadow:0 0 3px rgba(0, 0, 0, 0.15);outline:0;} +.navbar-inverse .btn-navbar{color:#ffffff;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#0e0e0e;background-image:-moz-linear-gradient(top, #151515, #040404);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#151515), to(#040404));background-image:-webkit-linear-gradient(top, #151515, #040404);background-image:-o-linear-gradient(top, #151515, #040404);background-image:linear-gradient(to bottom, #151515, #040404);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff151515', endColorstr='#ff040404', GradientType=0);border-color:#040404 #040404 #000000;border-color:rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);*background-color:#040404;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:focus,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#ffffff;background-color:#040404;*background-color:#000000;} +.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000000 \9;} +.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;}.breadcrumb>li{display:inline-block;*display:inline;*zoom:1;text-shadow:0 1px 0 #ffffff;}.breadcrumb>li>.divider{padding:0 5px;color:#ccc;} +.breadcrumb>.active{color:#999999;} +.pagination{margin:20px 0;} +.pagination ul{display:inline-block;*display:inline;*zoom:1;margin-left:0;margin-bottom:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);-moz-box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);box-shadow:0 1px 2px rgba(0, 0, 0, 0.05);} +.pagination ul>li{display:inline;} +.pagination ul>li>a,.pagination ul>li>span{float:left;padding:4px 12px;line-height:20px;text-decoration:none;background-color:#ffffff;border:1px solid #dddddd;border-left-width:0;} +.pagination ul>li>a:hover,.pagination ul>li>a:focus,.pagination ul>.active>a,.pagination ul>.active>span{background-color:#f5f5f5;} +.pagination ul>.active>a,.pagination ul>.active>span{color:#999999;cursor:default;} +.pagination ul>.disabled>span,.pagination ul>.disabled>a,.pagination ul>.disabled>a:hover,.pagination ul>.disabled>a:focus{color:#999999;background-color:transparent;cursor:default;} +.pagination ul>li:first-child>a,.pagination ul>li:first-child>span{border-left-width:1px;-webkit-border-top-left-radius:4px;-moz-border-radius-topleft:4px;border-top-left-radius:4px;-webkit-border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px;border-bottom-left-radius:4px;} +.pagination ul>li:last-child>a,.pagination ul>li:last-child>span{-webkit-border-top-right-radius:4px;-moz-border-radius-topright:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px;border-bottom-right-radius:4px;} +.pagination-centered{text-align:center;} +.pagination-right{text-align:right;} +.pagination-large ul>li>a,.pagination-large ul>li>span{padding:11px 19px;font-size:17.5px;} +.pagination-large ul>li:first-child>a,.pagination-large ul>li:first-child>span{-webkit-border-top-left-radius:6px;-moz-border-radius-topleft:6px;border-top-left-radius:6px;-webkit-border-bottom-left-radius:6px;-moz-border-radius-bottomleft:6px;border-bottom-left-radius:6px;} +.pagination-large ul>li:last-child>a,.pagination-large ul>li:last-child>span{-webkit-border-top-right-radius:6px;-moz-border-radius-topright:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;-moz-border-radius-bottomright:6px;border-bottom-right-radius:6px;} +.pagination-mini ul>li:first-child>a,.pagination-small ul>li:first-child>a,.pagination-mini ul>li:first-child>span,.pagination-small ul>li:first-child>span{-webkit-border-top-left-radius:3px;-moz-border-radius-topleft:3px;border-top-left-radius:3px;-webkit-border-bottom-left-radius:3px;-moz-border-radius-bottomleft:3px;border-bottom-left-radius:3px;} +.pagination-mini ul>li:last-child>a,.pagination-small ul>li:last-child>a,.pagination-mini ul>li:last-child>span,.pagination-small ul>li:last-child>span{-webkit-border-top-right-radius:3px;-moz-border-radius-topright:3px;border-top-right-radius:3px;-webkit-border-bottom-right-radius:3px;-moz-border-radius-bottomright:3px;border-bottom-right-radius:3px;} +.pagination-small ul>li>a,.pagination-small ul>li>span{padding:2px 10px;font-size:11.9px;} +.pagination-mini ul>li>a,.pagination-mini ul>li>span{padding:0 6px;font-size:10.5px;} +.pager{margin:20px 0;list-style:none;text-align:center;*zoom:1;}.pager:before,.pager:after{display:table;content:"";line-height:0;} +.pager:after{clear:both;} +.pager li{display:inline;} +.pager li>a,.pager li>span{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px;} +.pager li>a:hover,.pager li>a:focus{text-decoration:none;background-color:#f5f5f5;} +.pager .next>a,.pager .next>span{float:right;} +.pager .previous>a,.pager .previous>span{float:left;} +.pager .disabled>a,.pager .disabled>a:hover,.pager .disabled>a:focus,.pager .disabled>span{color:#999999;background-color:#fff;cursor:default;} +.thumbnails{margin-left:-20px;list-style:none;*zoom:1;}.thumbnails:before,.thumbnails:after{display:table;content:"";line-height:0;} +.thumbnails:after{clear:both;} +.row-fluid .thumbnails{margin-left:0;} +.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px;} +.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);-moz-box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);box-shadow:0 1px 3px rgba(0, 0, 0, 0.055);-webkit-transition:all 0.2s ease-in-out;-moz-transition:all 0.2s ease-in-out;-o-transition:all 0.2s ease-in-out;transition:all 0.2s ease-in-out;} +a.thumbnail:hover,a.thumbnail:focus{border-color:#0088cc;-webkit-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);-moz-box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);box-shadow:0 1px 4px rgba(0, 105, 214, 0.25);} +.thumbnail>img{display:block;max-width:100%;margin-left:auto;margin-right:auto;} +.thumbnail .caption{padding:9px;color:#555555;} +.alert{padding:8px 35px 8px 14px;margin-bottom:20px;text-shadow:0 1px 0 rgba(255, 255, 255, 0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.alert,.alert h4{color:#c09853;} +.alert h4{margin:0;} +.alert .close{position:relative;top:-2px;right:-21px;line-height:20px;} +.alert-success{background-color:#dff0d8;border-color:#d6e9c6;color:#468847;} +.alert-success h4{color:#468847;} +.alert-danger,.alert-error{background-color:#f2dede;border-color:#eed3d7;color:#b94a48;} +.alert-danger h4,.alert-error h4{color:#b94a48;} +.alert-info{background-color:#d9edf7;border-color:#bce8f1;color:#3a87ad;} +.alert-info h4{color:#3a87ad;} +.alert-block{padding-top:14px;padding-bottom:14px;} +.alert-block>p,.alert-block>ul{margin-bottom:0;} +.alert-block p+p{margin-top:5px;} +@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}@-o-keyframes progress-bar-stripes{from{background-position:0 0;} to{background-position:40px 0;}}@keyframes progress-bar-stripes{from{background-position:40px 0;} to{background-position:0 0;}}.progress{overflow:hidden;height:20px;margin-bottom:20px;background-color:#f7f7f7;background-image:-moz-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#f5f5f5), to(#f9f9f9));background-image:-webkit-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:-o-linear-gradient(top, #f5f5f5, #f9f9f9);background-image:linear-gradient(to bottom, #f5f5f5, #f9f9f9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#fff9f9f9', GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-moz-box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);box-shadow:inset 0 1px 2px rgba(0, 0, 0, 0.1);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.progress .bar{width:0%;height:100%;color:#ffffff;float:left;font-size:12px;text-align:center;text-shadow:0 -1px 0 rgba(0, 0, 0, 0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top, #149bdf, #0480be);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#149bdf), to(#0480be));background-image:-webkit-linear-gradient(top, #149bdf, #0480be);background-image:-o-linear-gradient(top, #149bdf, #0480be);background-image:linear-gradient(to bottom, #149bdf, #0480be);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff149bdf', endColorstr='#ff0480be', GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);box-shadow:inset 0 -1px 0 rgba(0, 0, 0, 0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width 0.6s ease;-moz-transition:width 0.6s ease;-o-transition:width 0.6s ease;transition:width 0.6s ease;} +.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);box-shadow:inset 1px 0 0 rgba(0,0,0,.15), inset 0 -1px 0 rgba(0,0,0,.15);} +.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px;} +.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite;} +.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top, #ee5f5b, #c43c35);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#c43c35));background-image:-webkit-linear-gradient(top, #ee5f5b, #c43c35);background-image:-o-linear-gradient(top, #ee5f5b, #c43c35);background-image:linear-gradient(to bottom, #ee5f5b, #c43c35);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b', endColorstr='#ffc43c35', GradientType=0);} +.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top, #62c462, #57a957);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#57a957));background-image:-webkit-linear-gradient(top, #62c462, #57a957);background-image:-o-linear-gradient(top, #62c462, #57a957);background-image:linear-gradient(to bottom, #62c462, #57a957);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462', endColorstr='#ff57a957', GradientType=0);} +.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top, #5bc0de, #339bb9);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#339bb9));background-image:-webkit-linear-gradient(top, #5bc0de, #339bb9);background-image:-o-linear-gradient(top, #5bc0de, #339bb9);background-image:linear-gradient(to bottom, #5bc0de, #339bb9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff339bb9', GradientType=0);} +.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top, #fbb450, #f89406);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));background-image:-webkit-linear-gradient(top, #fbb450, #f89406);background-image:-o-linear-gradient(top, #fbb450, #f89406);background-image:linear-gradient(to bottom, #fbb450, #f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450', endColorstr='#fff89406', GradientType=0);} +.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));background-image:-webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:-o-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);} +.hero-unit{padding:60px;margin-bottom:30px;font-size:18px;font-weight:200;line-height:30px;color:inherit;background-color:#eeeeee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;color:inherit;letter-spacing:-1px;} +.hero-unit li{line-height:30px;} +.media,.media-body{overflow:hidden;*overflow:visible;zoom:1;} +.media,.media .media{margin-top:15px;} +.media:first-child{margin-top:0;} +.media-object{display:block;} +.media-heading{margin:0 0 5px;} +.media>.pull-left{margin-right:10px;} +.media>.pull-right{margin-left:10px;} +.media-list{margin-left:0;list-style:none;} +.tooltip{position:absolute;z-index:1030;display:block;visibility:visible;font-size:11px;line-height:1.4;opacity:0;filter:alpha(opacity=0);}.tooltip.in{opacity:0.8;filter:alpha(opacity=80);} +.tooltip.top{margin-top:-3px;padding:5px 0;} +.tooltip.right{margin-left:3px;padding:0 5px;} +.tooltip.bottom{margin-top:3px;padding:5px 0;} +.tooltip.left{margin-left:-3px;padding:0 5px;} +.tooltip-inner{max-width:200px;padding:8px;color:#ffffff;text-align:center;text-decoration:none;background-color:#000000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid;} +.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-width:5px 5px 0;border-top-color:#000000;} +.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-width:5px 5px 5px 0;border-right-color:#000000;} +.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-width:5px 0 5px 5px;border-left-color:#000000;} +.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-width:0 5px 5px;border-bottom-color:#000000;} +.popover{position:absolute;top:0;left:0;z-index:1010;display:none;max-width:276px;padding:1px;text-align:left;background-color:#ffffff;-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-moz-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);white-space:normal;}.popover.top{margin-top:-10px;} +.popover.right{margin-left:10px;} +.popover.bottom{margin-top:10px;} +.popover.left{margin-left:-10px;} +.popover-title{margin:0;padding:8px 14px;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0;}.popover-title:empty{display:none;} +.popover-content{padding:9px 14px;} +.popover .arrow,.popover .arrow:after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid;} +.popover .arrow{border-width:11px;} +.popover .arrow:after{border-width:10px;content:"";} +.popover.top .arrow{left:50%;margin-left:-11px;border-bottom-width:0;border-top-color:#999;border-top-color:rgba(0, 0, 0, 0.25);bottom:-11px;}.popover.top .arrow:after{bottom:1px;margin-left:-10px;border-bottom-width:0;border-top-color:#ffffff;} +.popover.right .arrow{top:50%;left:-11px;margin-top:-11px;border-left-width:0;border-right-color:#999;border-right-color:rgba(0, 0, 0, 0.25);}.popover.right .arrow:after{left:1px;bottom:-10px;border-left-width:0;border-right-color:#ffffff;} +.popover.bottom .arrow{left:50%;margin-left:-11px;border-top-width:0;border-bottom-color:#999;border-bottom-color:rgba(0, 0, 0, 0.25);top:-11px;}.popover.bottom .arrow:after{top:1px;margin-left:-10px;border-top-width:0;border-bottom-color:#ffffff;} +.popover.left .arrow{top:50%;right:-11px;margin-top:-11px;border-right-width:0;border-left-color:#999;border-left-color:rgba(0, 0, 0, 0.25);}.popover.left .arrow:after{right:1px;border-right-width:0;border-left-color:#ffffff;bottom:-10px;} +.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000000;}.modal-backdrop.fade{opacity:0;} +.modal-backdrop,.modal-backdrop.fade.in{opacity:0.8;filter:alpha(opacity=80);} +.modal{position:fixed;top:10%;left:50%;z-index:1050;width:560px;margin-left:-280px;background-color:#ffffff;border:1px solid #999;border:1px solid rgba(0, 0, 0, 0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-moz-box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);box-shadow:0 3px 7px rgba(0, 0, 0, 0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box;outline:none;}.modal.fade{-webkit-transition:opacity .3s linear, top .3s ease-out;-moz-transition:opacity .3s linear, top .3s ease-out;-o-transition:opacity .3s linear, top .3s ease-out;transition:opacity .3s linear, top .3s ease-out;top:-25%;} +.modal.fade.in{top:10%;} +.modal-header{padding:9px 15px;border-bottom:1px solid #eee;}.modal-header .close{margin-top:2px;} +.modal-header h3{margin:0;line-height:30px;} +.modal-body{position:relative;overflow-y:auto;max-height:400px;padding:15px;} +.modal-form{margin-bottom:0;} +.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;-webkit-box-shadow:inset 0 1px 0 #ffffff;-moz-box-shadow:inset 0 1px 0 #ffffff;box-shadow:inset 0 1px 0 #ffffff;*zoom:1;}.modal-footer:before,.modal-footer:after{display:table;content:"";line-height:0;} +.modal-footer:after{clear:both;} +.modal-footer .btn+.btn{margin-left:5px;margin-bottom:0;} +.modal-footer .btn-group .btn+.btn{margin-left:-1px;} +.modal-footer .btn-block+.btn-block{margin-left:0;} +.dropup,.dropdown{position:relative;} +.dropdown-toggle{*margin-bottom:-3px;} +.dropdown-toggle:active,.open .dropdown-toggle{outline:0;} +.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000000;border-right:4px solid transparent;border-left:4px solid transparent;content:"";} +.dropdown .caret{margin-top:8px;margin-left:2px;} +.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#ffffff;border:1px solid #ccc;border:1px solid rgba(0, 0, 0, 0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-moz-box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);box-shadow:0 5px 10px rgba(0, 0, 0, 0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box;}.dropdown-menu.pull-right{right:0;left:auto;} +.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #ffffff;} +.dropdown-menu>li>a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333333;white-space:nowrap;} +.dropdown-menu>li>a:hover,.dropdown-menu>li>a:focus,.dropdown-submenu:hover>a,.dropdown-submenu:focus>a{text-decoration:none;color:#ffffff;background-color:#0081c2;background-image:-moz-linear-gradient(top, #0088cc, #0077b3);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0077b3));background-image:-webkit-linear-gradient(top, #0088cc, #0077b3);background-image:-o-linear-gradient(top, #0088cc, #0077b3);background-image:linear-gradient(to bottom, #0088cc, #0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0077b3', GradientType=0);} +.dropdown-menu>.active>a,.dropdown-menu>.active>a:hover,.dropdown-menu>.active>a:focus{color:#ffffff;text-decoration:none;outline:0;background-color:#0081c2;background-image:-moz-linear-gradient(top, #0088cc, #0077b3);background-image:-webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0077b3));background-image:-webkit-linear-gradient(top, #0088cc, #0077b3);background-image:-o-linear-gradient(top, #0088cc, #0077b3);background-image:linear-gradient(to bottom, #0088cc, #0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0077b3', GradientType=0);} +.dropdown-menu>.disabled>a,.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{color:#999999;} +.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{text-decoration:none;background-color:transparent;background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);cursor:default;} +.open{*z-index:1000;}.open>.dropdown-menu{display:block;} +.dropdown-backdrop{position:fixed;left:0;right:0;bottom:0;top:0;z-index:990;} +.pull-right>.dropdown-menu{right:0;left:auto;} +.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000000;content:"";} +.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px;} +.dropdown-submenu{position:relative;} +.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px;} +.dropdown-submenu:hover>.dropdown-menu{display:block;} +.dropup .dropdown-submenu>.dropdown-menu{top:auto;bottom:0;margin-top:0;margin-bottom:-2px;-webkit-border-radius:5px 5px 5px 0;-moz-border-radius:5px 5px 5px 0;border-radius:5px 5px 5px 0;} +.dropdown-submenu>a:after{display:block;content:" ";float:right;width:0;height:0;border-color:transparent;border-style:solid;border-width:5px 0 5px 5px;border-left-color:#cccccc;margin-top:5px;margin-right:-10px;} +.dropdown-submenu:hover>a:after{border-left-color:#ffffff;} +.dropdown-submenu.pull-left{float:none;}.dropdown-submenu.pull-left>.dropdown-menu{left:-100%;margin-left:10px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px;} +.dropdown .dropdown-menu .nav-header{padding-left:20px;padding-right:20px;} +.typeahead{z-index:1051;margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.accordion{margin-bottom:20px;} +.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} +.accordion-heading{border-bottom:0;} +.accordion-heading .accordion-toggle{display:block;padding:8px 15px;} +.accordion-toggle{cursor:pointer;} +.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5;} +.carousel{position:relative;margin-bottom:20px;line-height:1;} +.carousel-inner{overflow:hidden;width:100%;position:relative;} +.carousel-inner>.item{display:none;position:relative;-webkit-transition:0.6s ease-in-out left;-moz-transition:0.6s ease-in-out left;-o-transition:0.6s ease-in-out left;transition:0.6s ease-in-out left;}.carousel-inner>.item>img,.carousel-inner>.item>a>img{display:block;line-height:1;} +.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block;} +.carousel-inner>.active{left:0;} +.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%;} +.carousel-inner>.next{left:100%;} +.carousel-inner>.prev{left:-100%;} +.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0;} +.carousel-inner>.active.left{left:-100%;} +.carousel-inner>.active.right{left:100%;} +.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#ffffff;text-align:center;background:#222222;border:3px solid #ffffff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:0.5;filter:alpha(opacity=50);}.carousel-control.right{left:auto;right:15px;} +.carousel-control:hover,.carousel-control:focus{color:#ffffff;text-decoration:none;opacity:0.9;filter:alpha(opacity=90);} +.carousel-indicators{position:absolute;top:15px;right:15px;z-index:5;margin:0;list-style:none;}.carousel-indicators li{display:block;float:left;width:10px;height:10px;margin-left:5px;text-indent:-999px;background-color:#ccc;background-color:rgba(255, 255, 255, 0.25);border-radius:5px;} +.carousel-indicators .active{background-color:#fff;} +.carousel-caption{position:absolute;left:0;right:0;bottom:0;padding:15px;background:#333333;background:rgba(0, 0, 0, 0.75);} +.carousel-caption h4,.carousel-caption p{color:#ffffff;line-height:20px;} +.carousel-caption h4{margin:0 0 5px;} +.carousel-caption p{margin-bottom:0;} +.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);-moz-box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);box-shadow:inset 0 1px 1px rgba(0, 0, 0, 0.05);}.well blockquote{border-color:#ddd;border-color:rgba(0, 0, 0, 0.15);} +.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;} +.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} +.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000000;text-shadow:0 1px 0 #ffffff;opacity:0.2;filter:alpha(opacity=20);}.close:hover,.close:focus{color:#000000;text-decoration:none;cursor:pointer;opacity:0.4;filter:alpha(opacity=40);} +button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none;} +.pull-right{float:right;} +.pull-left{float:left;} +.hide{display:none;} +.show{display:block;} +.invisible{visibility:hidden;} +.affix{position:fixed;} +.fade{opacity:0;-webkit-transition:opacity 0.15s linear;-moz-transition:opacity 0.15s linear;-o-transition:opacity 0.15s linear;transition:opacity 0.15s linear;}.fade.in{opacity:1;} +.collapse{position:relative;height:0;overflow:hidden;-webkit-transition:height 0.35s ease;-moz-transition:height 0.35s ease;-o-transition:height 0.35s ease;transition:height 0.35s ease;}.collapse.in{height:auto;} +@-ms-viewport{width:device-width;}.hidden{display:none;visibility:hidden;} +.visible-phone{display:none !important;} +.visible-tablet{display:none !important;} +.hidden-desktop{display:none !important;} +.visible-desktop{display:inherit !important;} +@media (min-width:768px) and (max-width:979px){.hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important ;} .visible-tablet{display:inherit !important;} .hidden-tablet{display:none !important;}}@media (max-width:767px){.hidden-desktop{display:inherit !important;} .visible-desktop{display:none !important;} .visible-phone{display:inherit !important;} .hidden-phone{display:none !important;}}.visible-print{display:none !important;} +@media print{.visible-print{display:inherit !important;} .hidden-print{display:none !important;}}@media (max-width:767px){body{padding-left:20px;padding-right:20px;} .navbar-fixed-top,.navbar-fixed-bottom,.navbar-static-top{margin-left:-20px;margin-right:-20px;} .container-fluid{padding:0;} .dl-horizontal dt{float:none;clear:none;width:auto;text-align:left;} .dl-horizontal dd{margin-left:0;} .container{width:auto;} .row-fluid{width:100%;} .row,.thumbnails{margin-left:0;} .thumbnails>li{float:none;margin-left:0;} [class*="span"],.uneditable-input[class*="span"],.row-fluid [class*="span"]{float:none;display:block;width:100%;margin-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .span12,.row-fluid .span12{width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .row-fluid [class*="offset"]:first-child{margin-left:0;} .input-large,.input-xlarge,.input-xxlarge,input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;} .input-prepend input,.input-append input,.input-prepend input[class*="span"],.input-append input[class*="span"]{display:inline-block;width:auto;} .controls-row [class*="span"]+[class*="span"]{margin-left:0;} .modal{position:fixed;top:20px;left:20px;right:20px;width:auto;margin:0;}.modal.fade{top:-100px;} .modal.fade.in{top:20px;}}@media (max-width:480px){.nav-collapse{-webkit-transform:translate3d(0, 0, 0);} .page-header h1 small{display:block;line-height:20px;} input[type="checkbox"],input[type="radio"]{border:1px solid #ccc;} .form-horizontal .control-label{float:none;width:auto;padding-top:0;text-align:left;} .form-horizontal .controls{margin-left:0;} .form-horizontal .control-list{padding-top:0;} .form-horizontal .form-actions{padding-left:10px;padding-right:10px;} .media .pull-left,.media .pull-right{float:none;display:block;margin-bottom:10px;} .media-object{margin-right:0;margin-left:0;} .modal{top:10px;left:10px;right:10px;} .modal-header .close{padding:10px;margin:-10px;} .carousel-caption{position:static;}}@media (min-width:768px) and (max-width:979px){.row{margin-left:-20px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;} .row:after{clear:both;} [class*="span"]{float:left;min-height:1px;margin-left:20px;} .container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:724px;} .span12{width:724px;} .span11{width:662px;} .span10{width:600px;} .span9{width:538px;} .span8{width:476px;} .span7{width:414px;} .span6{width:352px;} .span5{width:290px;} .span4{width:228px;} .span3{width:166px;} .span2{width:104px;} .span1{width:42px;} .offset12{margin-left:764px;} .offset11{margin-left:702px;} .offset10{margin-left:640px;} .offset9{margin-left:578px;} .offset8{margin-left:516px;} .offset7{margin-left:454px;} .offset6{margin-left:392px;} .offset5{margin-left:330px;} .offset4{margin-left:268px;} .offset3{margin-left:206px;} .offset2{margin-left:144px;} .offset1{margin-left:82px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.7624309392265194%;*margin-left:2.709239449864817%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.7624309392265194%;} .row-fluid .span12{width:100%;*width:99.94680851063829%;} .row-fluid .span11{width:91.43646408839778%;*width:91.38327259903608%;} .row-fluid .span10{width:82.87292817679558%;*width:82.81973668743387%;} .row-fluid .span9{width:74.30939226519337%;*width:74.25620077583166%;} .row-fluid .span8{width:65.74585635359117%;*width:65.69266486422946%;} .row-fluid .span7{width:57.18232044198895%;*width:57.12912895262725%;} .row-fluid .span6{width:48.61878453038674%;*width:48.56559304102504%;} .row-fluid .span5{width:40.05524861878453%;*width:40.00205712942283%;} .row-fluid .span4{width:31.491712707182323%;*width:31.43852121782062%;} .row-fluid .span3{width:22.92817679558011%;*width:22.87498530621841%;} .row-fluid .span2{width:14.3646408839779%;*width:14.311449394616199%;} .row-fluid .span1{width:5.801104972375691%;*width:5.747913483013988%;} .row-fluid .offset12{margin-left:105.52486187845304%;*margin-left:105.41847889972962%;} .row-fluid .offset12:first-child{margin-left:102.76243093922652%;*margin-left:102.6560479605031%;} .row-fluid .offset11{margin-left:96.96132596685082%;*margin-left:96.8549429881274%;} .row-fluid .offset11:first-child{margin-left:94.1988950276243%;*margin-left:94.09251204890089%;} .row-fluid .offset10{margin-left:88.39779005524862%;*margin-left:88.2914070765252%;} .row-fluid .offset10:first-child{margin-left:85.6353591160221%;*margin-left:85.52897613729868%;} .row-fluid .offset9{margin-left:79.8342541436464%;*margin-left:79.72787116492299%;} .row-fluid .offset9:first-child{margin-left:77.07182320441989%;*margin-left:76.96544022569647%;} .row-fluid .offset8{margin-left:71.2707182320442%;*margin-left:71.16433525332079%;} .row-fluid .offset8:first-child{margin-left:68.50828729281768%;*margin-left:68.40190431409427%;} .row-fluid .offset7{margin-left:62.70718232044199%;*margin-left:62.600799341718584%;} .row-fluid .offset7:first-child{margin-left:59.94475138121547%;*margin-left:59.838368402492065%;} .row-fluid .offset6{margin-left:54.14364640883978%;*margin-left:54.037263430116376%;} .row-fluid .offset6:first-child{margin-left:51.38121546961326%;*margin-left:51.27483249088986%;} .row-fluid .offset5{margin-left:45.58011049723757%;*margin-left:45.47372751851417%;} .row-fluid .offset5:first-child{margin-left:42.81767955801105%;*margin-left:42.71129657928765%;} .row-fluid .offset4{margin-left:37.01657458563536%;*margin-left:36.91019160691196%;} .row-fluid .offset4:first-child{margin-left:34.25414364640884%;*margin-left:34.14776066768544%;} .row-fluid .offset3{margin-left:28.45303867403315%;*margin-left:28.346655695309746%;} .row-fluid .offset3:first-child{margin-left:25.69060773480663%;*margin-left:25.584224756083227%;} .row-fluid .offset2{margin-left:19.88950276243094%;*margin-left:19.783119783707537%;} .row-fluid .offset2:first-child{margin-left:17.12707182320442%;*margin-left:17.02068884448102%;} .row-fluid .offset1{margin-left:11.32596685082873%;*margin-left:11.219583872105325%;} .row-fluid .offset1:first-child{margin-left:8.56353591160221%;*margin-left:8.457152932878806%;} input,textarea,.uneditable-input{margin-left:0;} .controls-row [class*="span"]+[class*="span"]{margin-left:20px;} input.span12,textarea.span12,.uneditable-input.span12{width:710px;} input.span11,textarea.span11,.uneditable-input.span11{width:648px;} input.span10,textarea.span10,.uneditable-input.span10{width:586px;} input.span9,textarea.span9,.uneditable-input.span9{width:524px;} input.span8,textarea.span8,.uneditable-input.span8{width:462px;} input.span7,textarea.span7,.uneditable-input.span7{width:400px;} input.span6,textarea.span6,.uneditable-input.span6{width:338px;} input.span5,textarea.span5,.uneditable-input.span5{width:276px;} input.span4,textarea.span4,.uneditable-input.span4{width:214px;} input.span3,textarea.span3,.uneditable-input.span3{width:152px;} input.span2,textarea.span2,.uneditable-input.span2{width:90px;} input.span1,textarea.span1,.uneditable-input.span1{width:28px;}}@media (min-width:1200px){.row{margin-left:-30px;*zoom:1;}.row:before,.row:after{display:table;content:"";line-height:0;} .row:after{clear:both;} [class*="span"]{float:left;min-height:1px;margin-left:30px;} .container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:1170px;} .span12{width:1170px;} .span11{width:1070px;} .span10{width:970px;} .span9{width:870px;} .span8{width:770px;} .span7{width:670px;} .span6{width:570px;} .span5{width:470px;} .span4{width:370px;} .span3{width:270px;} .span2{width:170px;} .span1{width:70px;} .offset12{margin-left:1230px;} .offset11{margin-left:1130px;} .offset10{margin-left:1030px;} .offset9{margin-left:930px;} .offset8{margin-left:830px;} .offset7{margin-left:730px;} .offset6{margin-left:630px;} .offset5{margin-left:530px;} .offset4{margin-left:430px;} .offset3{margin-left:330px;} .offset2{margin-left:230px;} .offset1{margin-left:130px;} .row-fluid{width:100%;*zoom:1;}.row-fluid:before,.row-fluid:after{display:table;content:"";line-height:0;} .row-fluid:after{clear:both;} .row-fluid [class*="span"]{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;float:left;margin-left:2.564102564102564%;*margin-left:2.5109110747408616%;} .row-fluid [class*="span"]:first-child{margin-left:0;} .row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.564102564102564%;} .row-fluid .span12{width:100%;*width:99.94680851063829%;} .row-fluid .span11{width:91.45299145299145%;*width:91.39979996362975%;} .row-fluid .span10{width:82.90598290598291%;*width:82.8527914166212%;} .row-fluid .span9{width:74.35897435897436%;*width:74.30578286961266%;} .row-fluid .span8{width:65.81196581196582%;*width:65.75877432260411%;} .row-fluid .span7{width:57.26495726495726%;*width:57.21176577559556%;} .row-fluid .span6{width:48.717948717948715%;*width:48.664757228587014%;} .row-fluid .span5{width:40.17094017094017%;*width:40.11774868157847%;} .row-fluid .span4{width:31.623931623931625%;*width:31.570740134569924%;} .row-fluid .span3{width:23.076923076923077%;*width:23.023731587561375%;} .row-fluid .span2{width:14.52991452991453%;*width:14.476723040552828%;} .row-fluid .span1{width:5.982905982905983%;*width:5.929714493544281%;} .row-fluid .offset12{margin-left:105.12820512820512%;*margin-left:105.02182214948171%;} .row-fluid .offset12:first-child{margin-left:102.56410256410257%;*margin-left:102.45771958537915%;} .row-fluid .offset11{margin-left:96.58119658119658%;*margin-left:96.47481360247316%;} .row-fluid .offset11:first-child{margin-left:94.01709401709402%;*margin-left:93.91071103837061%;} .row-fluid .offset10{margin-left:88.03418803418803%;*margin-left:87.92780505546462%;} .row-fluid .offset10:first-child{margin-left:85.47008547008548%;*margin-left:85.36370249136206%;} .row-fluid .offset9{margin-left:79.48717948717949%;*margin-left:79.38079650845607%;} .row-fluid .offset9:first-child{margin-left:76.92307692307693%;*margin-left:76.81669394435352%;} .row-fluid .offset8{margin-left:70.94017094017094%;*margin-left:70.83378796144753%;} .row-fluid .offset8:first-child{margin-left:68.37606837606839%;*margin-left:68.26968539734497%;} .row-fluid .offset7{margin-left:62.393162393162385%;*margin-left:62.28677941443899%;} .row-fluid .offset7:first-child{margin-left:59.82905982905982%;*margin-left:59.72267685033642%;} .row-fluid .offset6{margin-left:53.84615384615384%;*margin-left:53.739770867430444%;} .row-fluid .offset6:first-child{margin-left:51.28205128205128%;*margin-left:51.175668303327875%;} .row-fluid .offset5{margin-left:45.299145299145295%;*margin-left:45.1927623204219%;} .row-fluid .offset5:first-child{margin-left:42.73504273504273%;*margin-left:42.62865975631933%;} .row-fluid .offset4{margin-left:36.75213675213675%;*margin-left:36.645753773413354%;} .row-fluid .offset4:first-child{margin-left:34.18803418803419%;*margin-left:34.081651209310785%;} .row-fluid .offset3{margin-left:28.205128205128204%;*margin-left:28.0987452264048%;} .row-fluid .offset3:first-child{margin-left:25.641025641025642%;*margin-left:25.53464266230224%;} .row-fluid .offset2{margin-left:19.65811965811966%;*margin-left:19.551736679396257%;} .row-fluid .offset2:first-child{margin-left:17.094017094017094%;*margin-left:16.98763411529369%;} .row-fluid .offset1{margin-left:11.11111111111111%;*margin-left:11.004728132387708%;} .row-fluid .offset1:first-child{margin-left:8.547008547008547%;*margin-left:8.440625568285142%;} input,textarea,.uneditable-input{margin-left:0;} .controls-row [class*="span"]+[class*="span"]{margin-left:30px;} input.span12,textarea.span12,.uneditable-input.span12{width:1156px;} input.span11,textarea.span11,.uneditable-input.span11{width:1056px;} input.span10,textarea.span10,.uneditable-input.span10{width:956px;} input.span9,textarea.span9,.uneditable-input.span9{width:856px;} input.span8,textarea.span8,.uneditable-input.span8{width:756px;} input.span7,textarea.span7,.uneditable-input.span7{width:656px;} input.span6,textarea.span6,.uneditable-input.span6{width:556px;} input.span5,textarea.span5,.uneditable-input.span5{width:456px;} input.span4,textarea.span4,.uneditable-input.span4{width:356px;} input.span3,textarea.span3,.uneditable-input.span3{width:256px;} input.span2,textarea.span2,.uneditable-input.span2{width:156px;} input.span1,textarea.span1,.uneditable-input.span1{width:56px;} .thumbnails{margin-left:-30px;} .thumbnails>li{margin-left:30px;} .row-fluid .thumbnails{margin-left:0;}}@media (max-width:979px){body{padding-top:0;} .navbar-fixed-top,.navbar-fixed-bottom{position:static;} .navbar-fixed-top{margin-bottom:20px;} .navbar-fixed-bottom{margin-top:20px;} .navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding:5px;} .navbar .container{width:auto;padding:0;} .navbar .brand{padding-left:10px;padding-right:10px;margin:0 0 0 -5px;} .nav-collapse{clear:both;} .nav-collapse .nav{float:none;margin:0 0 10px;} .nav-collapse .nav>li{float:none;} .nav-collapse .nav>li>a{margin-bottom:2px;} .nav-collapse .nav>.divider-vertical{display:none;} .nav-collapse .nav .nav-header{color:#777777;text-shadow:none;} .nav-collapse .nav>li>a,.nav-collapse .dropdown-menu a{padding:9px 15px;font-weight:bold;color:#777777;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;} .nav-collapse .btn{padding:4px 10px 4px;font-weight:normal;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;} .nav-collapse .dropdown-menu li+li a{margin-bottom:2px;} .nav-collapse .nav>li>a:hover,.nav-collapse .nav>li>a:focus,.nav-collapse .dropdown-menu a:hover,.nav-collapse .dropdown-menu a:focus{background-color:#f2f2f2;} .navbar-inverse .nav-collapse .nav>li>a,.navbar-inverse .nav-collapse .dropdown-menu a{color:#999999;} .navbar-inverse .nav-collapse .nav>li>a:hover,.navbar-inverse .nav-collapse .nav>li>a:focus,.navbar-inverse .nav-collapse .dropdown-menu a:hover,.navbar-inverse .nav-collapse .dropdown-menu a:focus{background-color:#111111;} .nav-collapse.in .btn-group{margin-top:5px;padding:0;} .nav-collapse .dropdown-menu{position:static;top:auto;left:auto;float:none;display:none;max-width:none;margin:0 15px;padding:0;background-color:transparent;border:none;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none;} .nav-collapse .open>.dropdown-menu{display:block;} .nav-collapse .dropdown-menu:before,.nav-collapse .dropdown-menu:after{display:none;} .nav-collapse .dropdown-menu .divider{display:none;} .nav-collapse .nav>li>.dropdown-menu:before,.nav-collapse .nav>li>.dropdown-menu:after{display:none;} .nav-collapse .navbar-form,.nav-collapse .navbar-search{float:none;padding:10px 15px;margin:10px 0;border-top:1px solid #f2f2f2;border-bottom:1px solid #f2f2f2;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);box-shadow:inset 0 1px 0 rgba(255,255,255,.1), 0 1px 0 rgba(255,255,255,.1);} .navbar-inverse .nav-collapse .navbar-form,.navbar-inverse .nav-collapse .navbar-search{border-top-color:#111111;border-bottom-color:#111111;} .navbar .nav-collapse .nav.pull-right{float:none;margin-left:0;} .nav-collapse,.nav-collapse.collapse{overflow:hidden;height:0;} .navbar .btn-navbar{display:block;} .navbar-static .navbar-inner{padding-left:10px;padding-right:10px;}}@media (min-width:980px){.nav-collapse.collapse{height:auto !important;overflow:visible !important;}} diff --git a/core/src/main/resources/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index 7abb9011cc..7abb9011cc 100644 --- a/core/src/main/resources/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js diff --git a/core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png b/core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png Binary files differindex 6c5f0993c4..6c5f0993c4 100644 --- a/core/src/main/resources/spark/ui/static/spark-logo-77x50px-hd.png +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-logo-77x50px-hd.png diff --git a/core/src/main/resources/spark/ui/static/spark_logo.png b/core/src/main/resources/org/apache/spark/ui/static/spark_logo.png Binary files differindex 4b18734779..4b18734779 100644 --- a/core/src/main/resources/spark/ui/static/spark_logo.png +++ b/core/src/main/resources/org/apache/spark/ui/static/spark_logo.png diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css new file mode 100644 index 0000000000..a600c06c04 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -0,0 +1,63 @@ +.navbar { + height: 50px; + font-size: 15px; + margin-bottom: 15px; +} + +.navbar .navbar-inner { + height: 50px; +} + +.navbar .brand { + margin-right: 20px; + margin-bottom: 0; + margin-top: 0; + margin-left: 10px; + padding: 0; +} + +.navbar .nav > li { + height: 50px; +} + +.navbar .nav > li a { + height: 30px; + line-height: 30px; +} + +.navbar-text { + height: 50px; + line-height: 50px; +} + +table.sortable thead { + cursor: pointer; +} + +.progress { + margin-bottom: 0px; position: relative +} + +.progress-completed .bar, +.progress .bar-completed { + background-color: #3EC0FF; + background-image: -moz-linear-gradient(top, #44CBFF, #34B0EE); + background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#44CBFF), to(#34B0EE)); + background-image: -webkit-linear-gradient(top, #44CBFF, #34B0EE); + background-image: -o-linear-gradient(top, #44CBFF, #34B0EE); + background-image: linear-gradient(to bottom, #64CBFF, #54B0EE); + background-repeat: repeat-x; + filter: progid:dximagetransform.microsoft.gradient(startColorstr='#FF44CBFF', endColorstr='#FF34B0EE', GradientType=0); +} + +.progress-running .bar, +.progress .bar-running { + background-color: #A0DFFF; + background-image: -moz-linear-gradient(top, #A4EDFF, #94DDFF); + background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#A4EDFF), to(#94DDFF)); + background-image: -webkit-linear-gradient(top, #A4EDFF, #94DDFF); + background-image: -o-linear-gradient(top, #A4EDFF, #94DDFF); + background-image: linear-gradient(to bottom, #A4EDFF, #94DDFF); + background-repeat: repeat-x; + filter: progid:dximagetransform.microsoft.gradient(startColorstr='#FFA4EDFF', endColorstr='#FF94DDFF', GradientType=0); +} diff --git a/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css b/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css deleted file mode 100644 index f4ede63f32..0000000000 --- a/core/src/main/resources/spark/ui/static/bootstrap-responsive.min.css +++ /dev/null @@ -1,9 +0,0 @@ -/*! - * Bootstrap Responsive v2.3.2 - * - * Copyright 2012 Twitter, Inc - * Licensed under the Apache License v2.0 - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Designed and built with all the love in the world @twitter by @mdo and @fat. - */.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}@-ms-viewport{width:device-width}.hidden{display:none;visibility:hidden}.visible-phone{display:none!important}.visible-tablet{display:none!important}.hidden-desktop{display:none!important}.visible-desktop{display:inherit!important}@media(min-width:768px) and (max-width:979px){.hidden-desktop{display:inherit!important}.visible-desktop{display:none!important}.visible-tablet{display:inherit!important}.hidden-tablet{display:none!important}}@media(max-width:767px){.hidden-desktop{display:inherit!important}.visible-desktop{display:none!important}.visible-phone{display:inherit!important}.hidden-phone{display:none!important}}.visible-print{display:none!important}@media print{.visible-print{display:inherit!important}.hidden-print{display:none!important}}@media(min-width:1200px){.row{margin-left:-30px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:30px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:1170px}.span12{width:1170px}.span11{width:1070px}.span10{width:970px}.span9{width:870px}.span8{width:770px}.span7{width:670px}.span6{width:570px}.span5{width:470px}.span4{width:370px}.span3{width:270px}.span2{width:170px}.span1{width:70px}.offset12{margin-left:1230px}.offset11{margin-left:1130px}.offset10{margin-left:1030px}.offset9{margin-left:930px}.offset8{margin-left:830px}.offset7{margin-left:730px}.offset6{margin-left:630px}.offset5{margin-left:530px}.offset4{margin-left:430px}.offset3{margin-left:330px}.offset2{margin-left:230px}.offset1{margin-left:130px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.564102564102564%;*margin-left:2.5109110747408616%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.564102564102564%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.45299145299145%;*width:91.39979996362975%}.row-fluid .span10{width:82.90598290598291%;*width:82.8527914166212%}.row-fluid .span9{width:74.35897435897436%;*width:74.30578286961266%}.row-fluid .span8{width:65.81196581196582%;*width:65.75877432260411%}.row-fluid .span7{width:57.26495726495726%;*width:57.21176577559556%}.row-fluid .span6{width:48.717948717948715%;*width:48.664757228587014%}.row-fluid .span5{width:40.17094017094017%;*width:40.11774868157847%}.row-fluid .span4{width:31.623931623931625%;*width:31.570740134569924%}.row-fluid .span3{width:23.076923076923077%;*width:23.023731587561375%}.row-fluid .span2{width:14.52991452991453%;*width:14.476723040552828%}.row-fluid .span1{width:5.982905982905983%;*width:5.929714493544281%}.row-fluid .offset12{margin-left:105.12820512820512%;*margin-left:105.02182214948171%}.row-fluid .offset12:first-child{margin-left:102.56410256410257%;*margin-left:102.45771958537915%}.row-fluid .offset11{margin-left:96.58119658119658%;*margin-left:96.47481360247316%}.row-fluid .offset11:first-child{margin-left:94.01709401709402%;*margin-left:93.91071103837061%}.row-fluid .offset10{margin-left:88.03418803418803%;*margin-left:87.92780505546462%}.row-fluid .offset10:first-child{margin-left:85.47008547008548%;*margin-left:85.36370249136206%}.row-fluid .offset9{margin-left:79.48717948717949%;*margin-left:79.38079650845607%}.row-fluid .offset9:first-child{margin-left:76.92307692307693%;*margin-left:76.81669394435352%}.row-fluid .offset8{margin-left:70.94017094017094%;*margin-left:70.83378796144753%}.row-fluid .offset8:first-child{margin-left:68.37606837606839%;*margin-left:68.26968539734497%}.row-fluid .offset7{margin-left:62.393162393162385%;*margin-left:62.28677941443899%}.row-fluid .offset7:first-child{margin-left:59.82905982905982%;*margin-left:59.72267685033642%}.row-fluid .offset6{margin-left:53.84615384615384%;*margin-left:53.739770867430444%}.row-fluid .offset6:first-child{margin-left:51.28205128205128%;*margin-left:51.175668303327875%}.row-fluid .offset5{margin-left:45.299145299145295%;*margin-left:45.1927623204219%}.row-fluid .offset5:first-child{margin-left:42.73504273504273%;*margin-left:42.62865975631933%}.row-fluid .offset4{margin-left:36.75213675213675%;*margin-left:36.645753773413354%}.row-fluid .offset4:first-child{margin-left:34.18803418803419%;*margin-left:34.081651209310785%}.row-fluid .offset3{margin-left:28.205128205128204%;*margin-left:28.0987452264048%}.row-fluid .offset3:first-child{margin-left:25.641025641025642%;*margin-left:25.53464266230224%}.row-fluid .offset2{margin-left:19.65811965811966%;*margin-left:19.551736679396257%}.row-fluid .offset2:first-child{margin-left:17.094017094017094%;*margin-left:16.98763411529369%}.row-fluid .offset1{margin-left:11.11111111111111%;*margin-left:11.004728132387708%}.row-fluid .offset1:first-child{margin-left:8.547008547008547%;*margin-left:8.440625568285142%}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:30px}input.span12,textarea.span12,.uneditable-input.span12{width:1156px}input.span11,textarea.span11,.uneditable-input.span11{width:1056px}input.span10,textarea.span10,.uneditable-input.span10{width:956px}input.span9,textarea.span9,.uneditable-input.span9{width:856px}input.span8,textarea.span8,.uneditable-input.span8{width:756px}input.span7,textarea.span7,.uneditable-input.span7{width:656px}input.span6,textarea.span6,.uneditable-input.span6{width:556px}input.span5,textarea.span5,.uneditable-input.span5{width:456px}input.span4,textarea.span4,.uneditable-input.span4{width:356px}input.span3,textarea.span3,.uneditable-input.span3{width:256px}input.span2,textarea.span2,.uneditable-input.span2{width:156px}input.span1,textarea.span1,.uneditable-input.span1{width:56px}.thumbnails{margin-left:-30px}.thumbnails>li{margin-left:30px}.row-fluid .thumbnails{margin-left:0}}@media(min-width:768px) and (max-width:979px){.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:724px}.span12{width:724px}.span11{width:662px}.span10{width:600px}.span9{width:538px}.span8{width:476px}.span7{width:414px}.span6{width:352px}.span5{width:290px}.span4{width:228px}.span3{width:166px}.span2{width:104px}.span1{width:42px}.offset12{margin-left:764px}.offset11{margin-left:702px}.offset10{margin-left:640px}.offset9{margin-left:578px}.offset8{margin-left:516px}.offset7{margin-left:454px}.offset6{margin-left:392px}.offset5{margin-left:330px}.offset4{margin-left:268px}.offset3{margin-left:206px}.offset2{margin-left:144px}.offset1{margin-left:82px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.7624309392265194%;*margin-left:2.709239449864817%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.7624309392265194%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.43646408839778%;*width:91.38327259903608%}.row-fluid .span10{width:82.87292817679558%;*width:82.81973668743387%}.row-fluid .span9{width:74.30939226519337%;*width:74.25620077583166%}.row-fluid .span8{width:65.74585635359117%;*width:65.69266486422946%}.row-fluid .span7{width:57.18232044198895%;*width:57.12912895262725%}.row-fluid .span6{width:48.61878453038674%;*width:48.56559304102504%}.row-fluid .span5{width:40.05524861878453%;*width:40.00205712942283%}.row-fluid .span4{width:31.491712707182323%;*width:31.43852121782062%}.row-fluid .span3{width:22.92817679558011%;*width:22.87498530621841%}.row-fluid .span2{width:14.3646408839779%;*width:14.311449394616199%}.row-fluid .span1{width:5.801104972375691%;*width:5.747913483013988%}.row-fluid .offset12{margin-left:105.52486187845304%;*margin-left:105.41847889972962%}.row-fluid .offset12:first-child{margin-left:102.76243093922652%;*margin-left:102.6560479605031%}.row-fluid .offset11{margin-left:96.96132596685082%;*margin-left:96.8549429881274%}.row-fluid .offset11:first-child{margin-left:94.1988950276243%;*margin-left:94.09251204890089%}.row-fluid .offset10{margin-left:88.39779005524862%;*margin-left:88.2914070765252%}.row-fluid .offset10:first-child{margin-left:85.6353591160221%;*margin-left:85.52897613729868%}.row-fluid .offset9{margin-left:79.8342541436464%;*margin-left:79.72787116492299%}.row-fluid .offset9:first-child{margin-left:77.07182320441989%;*margin-left:76.96544022569647%}.row-fluid .offset8{margin-left:71.2707182320442%;*margin-left:71.16433525332079%}.row-fluid .offset8:first-child{margin-left:68.50828729281768%;*margin-left:68.40190431409427%}.row-fluid .offset7{margin-left:62.70718232044199%;*margin-left:62.600799341718584%}.row-fluid .offset7:first-child{margin-left:59.94475138121547%;*margin-left:59.838368402492065%}.row-fluid .offset6{margin-left:54.14364640883978%;*margin-left:54.037263430116376%}.row-fluid .offset6:first-child{margin-left:51.38121546961326%;*margin-left:51.27483249088986%}.row-fluid .offset5{margin-left:45.58011049723757%;*margin-left:45.47372751851417%}.row-fluid .offset5:first-child{margin-left:42.81767955801105%;*margin-left:42.71129657928765%}.row-fluid .offset4{margin-left:37.01657458563536%;*margin-left:36.91019160691196%}.row-fluid .offset4:first-child{margin-left:34.25414364640884%;*margin-left:34.14776066768544%}.row-fluid .offset3{margin-left:28.45303867403315%;*margin-left:28.346655695309746%}.row-fluid .offset3:first-child{margin-left:25.69060773480663%;*margin-left:25.584224756083227%}.row-fluid .offset2{margin-left:19.88950276243094%;*margin-left:19.783119783707537%}.row-fluid .offset2:first-child{margin-left:17.12707182320442%;*margin-left:17.02068884448102%}.row-fluid .offset1{margin-left:11.32596685082873%;*margin-left:11.219583872105325%}.row-fluid .offset1:first-child{margin-left:8.56353591160221%;*margin-left:8.457152932878806%}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:710px}input.span11,textarea.span11,.uneditable-input.span11{width:648px}input.span10,textarea.span10,.uneditable-input.span10{width:586px}input.span9,textarea.span9,.uneditable-input.span9{width:524px}input.span8,textarea.span8,.uneditable-input.span8{width:462px}input.span7,textarea.span7,.uneditable-input.span7{width:400px}input.span6,textarea.span6,.uneditable-input.span6{width:338px}input.span5,textarea.span5,.uneditable-input.span5{width:276px}input.span4,textarea.span4,.uneditable-input.span4{width:214px}input.span3,textarea.span3,.uneditable-input.span3{width:152px}input.span2,textarea.span2,.uneditable-input.span2{width:90px}input.span1,textarea.span1,.uneditable-input.span1{width:28px}}@media(max-width:767px){body{padding-right:20px;padding-left:20px}.navbar-fixed-top,.navbar-fixed-bottom,.navbar-static-top{margin-right:-20px;margin-left:-20px}.container-fluid{padding:0}.dl-horizontal dt{float:none;width:auto;clear:none;text-align:left}.dl-horizontal dd{margin-left:0}.container{width:auto}.row-fluid{width:100%}.row,.thumbnails{margin-left:0}.thumbnails>li{float:none;margin-left:0}[class*="span"],.uneditable-input[class*="span"],.row-fluid [class*="span"]{display:block;float:none;width:100%;margin-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.span12,.row-fluid .span12{width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="offset"]:first-child{margin-left:0}.input-large,.input-xlarge,.input-xxlarge,input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.input-prepend input,.input-append input,.input-prepend input[class*="span"],.input-append input[class*="span"]{display:inline-block;width:auto}.controls-row [class*="span"]+[class*="span"]{margin-left:0}.modal{position:fixed;top:20px;right:20px;left:20px;width:auto;margin:0}.modal.fade{top:-100px}.modal.fade.in{top:20px}}@media(max-width:480px){.nav-collapse{-webkit-transform:translate3d(0,0,0)}.page-header h1 small{display:block;line-height:20px}input[type="checkbox"],input[type="radio"]{border:1px solid #ccc}.form-horizontal .control-label{float:none;width:auto;padding-top:0;text-align:left}.form-horizontal .controls{margin-left:0}.form-horizontal .control-list{padding-top:0}.form-horizontal .form-actions{padding-right:10px;padding-left:10px}.media .pull-left,.media .pull-right{display:block;float:none;margin-bottom:10px}.media-object{margin-right:0;margin-left:0}.modal{top:10px;right:10px;left:10px}.modal-header .close{padding:10px;margin:-10px}.carousel-caption{position:static}}@media(max-width:979px){body{padding-top:0}.navbar-fixed-top,.navbar-fixed-bottom{position:static}.navbar-fixed-top{margin-bottom:20px}.navbar-fixed-bottom{margin-top:20px}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding:5px}.navbar .container{width:auto;padding:0}.navbar .brand{padding-right:10px;padding-left:10px;margin:0 0 0 -5px}.nav-collapse{clear:both}.nav-collapse .nav{float:none;margin:0 0 10px}.nav-collapse .nav>li{float:none}.nav-collapse .nav>li>a{margin-bottom:2px}.nav-collapse .nav>.divider-vertical{display:none}.nav-collapse .nav .nav-header{color:#777;text-shadow:none}.nav-collapse .nav>li>a,.nav-collapse .dropdown-menu a{padding:9px 15px;font-weight:bold;color:#777;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.nav-collapse .btn{padding:4px 10px 4px;font-weight:normal;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.nav-collapse .dropdown-menu li+li a{margin-bottom:2px}.nav-collapse .nav>li>a:hover,.nav-collapse .nav>li>a:focus,.nav-collapse .dropdown-menu a:hover,.nav-collapse .dropdown-menu a:focus{background-color:#f2f2f2}.navbar-inverse .nav-collapse .nav>li>a,.navbar-inverse .nav-collapse .dropdown-menu a{color:#999}.navbar-inverse .nav-collapse .nav>li>a:hover,.navbar-inverse .nav-collapse .nav>li>a:focus,.navbar-inverse .nav-collapse .dropdown-menu a:hover,.navbar-inverse .nav-collapse .dropdown-menu a:focus{background-color:#111}.nav-collapse.in .btn-group{padding:0;margin-top:5px}.nav-collapse .dropdown-menu{position:static;top:auto;left:auto;display:none;float:none;max-width:none;padding:0;margin:0 15px;background-color:transparent;border:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.nav-collapse .open>.dropdown-menu{display:block}.nav-collapse .dropdown-menu:before,.nav-collapse .dropdown-menu:after{display:none}.nav-collapse .dropdown-menu .divider{display:none}.nav-collapse .nav>li>.dropdown-menu:before,.nav-collapse .nav>li>.dropdown-menu:after{display:none}.nav-collapse .navbar-form,.nav-collapse .navbar-search{float:none;padding:10px 15px;margin:10px 0;border-top:1px solid #f2f2f2;border-bottom:1px solid #f2f2f2;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.1)}.navbar-inverse .nav-collapse .navbar-form,.navbar-inverse .nav-collapse .navbar-search{border-top-color:#111;border-bottom-color:#111}.navbar .nav-collapse .nav.pull-right{float:none;margin-left:0}.nav-collapse,.nav-collapse.collapse{height:0;overflow:hidden}.navbar .btn-navbar{display:block}.navbar-static .navbar-inner{padding-right:10px;padding-left:10px}}@media(min-width:980px){.nav-collapse.collapse{height:auto!important;overflow:visible!important}} diff --git a/core/src/main/resources/spark/ui/static/bootstrap.min.css b/core/src/main/resources/spark/ui/static/bootstrap.min.css deleted file mode 100644 index b6428e6958..0000000000 --- a/core/src/main/resources/spark/ui/static/bootstrap.min.css +++ /dev/null @@ -1,9 +0,0 @@ -/*! - * Bootstrap v2.3.2 - * - * Copyright 2012 Twitter, Inc - * Licensed under the Apache License v2.0 - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Designed and built with all the love in the world @twitter by @mdo and @fat. - */.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{width:auto\9;height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img,.google-maps img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,html input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}label,select,button,input[type="button"],input[type="reset"],input[type="submit"],input[type="radio"],input[type="checkbox"]{cursor:pointer}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}@media print{*{color:#000!important;text-shadow:none!important;background:transparent!important;box-shadow:none!important}a,a:visited{text-decoration:underline}a[href]:after{content:" (" attr(href) ")"}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover,a:focus{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;min-height:1px;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .controls-row [class*="span"]+[class*="span"]{margin-left:2.127659574468085%}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:21px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}a.muted:hover,a.muted:focus{color:#808080}.text-warning{color:#c09853}a.text-warning:hover,a.text-warning:focus{color:#a47e3c}.text-error{color:#b94a48}a.text-error:hover,a.text-error:focus{color:#953b39}.text-info{color:#3a87ad}a.text-info:hover,a.text-info:focus{color:#2d6987}.text-success{color:#468847}a.text-success:hover,a.text-success:focus{color:#356635}.text-left{text-align:left}.text-right{text-align:right}.text-center{text-align:center}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:20px;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1,h2,h3{line-height:40px}h1{font-size:38.5px}h2{font-size:31.5px}h3{font-size:24.5px}h4{font-size:17.5px}h5{font-size:14px}h6{font-size:11.9px}h1 small{font-size:24.5px}h2 small{font-size:17.5px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}ul.inline,ol.inline{margin-left:0;list-style:none}ul.inline>li,ol.inline>li{display:inline-block;*display:inline;padding-right:5px;padding-left:5px;*zoom:1}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal{*zoom:1}.dl-horizontal:before,.dl-horizontal:after{display:table;line-height:0;content:""}.dl-horizontal:after{clear:both}.dl-horizontal dt{float:left;width:160px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:180px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title],abbr[data-original-title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:17.5px;font-weight:300;line-height:1.25}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;white-space:nowrap;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;white-space:pre;white-space:pre-wrap;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:10px;font-size:14px;line-height:20px;color:#555;vertical-align:middle;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}input,textarea,.uneditable-input{width:206px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #ccc}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:20px;padding-left:20px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-20px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"],.row-fluid .controls-row [class*="span"]{float:left}.controls-row .checkbox[class*="span"],.controls-row .radio[class*="span"]{padding-top:5px}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning .control-label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853}.control-group.warning input,.control-group.warning select,.control-group.warning textarea{border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error .control-label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48}.control-group.error input,.control-group.error select,.control-group.error textarea{border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success .control-label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847}.control-group.success input,.control-group.success select,.control-group.success textarea{border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}.control-group.info .control-label,.control-group.info .help-block,.control-group.info .help-inline{color:#3a87ad}.control-group.info .checkbox,.control-group.info .radio,.control-group.info input,.control-group.info select,.control-group.info textarea{color:#3a87ad}.control-group.info input,.control-group.info select,.control-group.info textarea{border-color:#3a87ad;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.info input:focus,.control-group.info select:focus,.control-group.info textarea:focus{border-color:#2d6987;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7ab5d3}.control-group.info .input-prepend .add-on,.control-group.info .input-append .add-on{color:#3a87ad;background-color:#d9edf7;border-color:#3a87ad}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{display:inline-block;margin-bottom:10px;font-size:0;white-space:nowrap;vertical-align:middle}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input,.input-append .dropdown-menu,.input-prepend .dropdown-menu,.input-append .popover,.input-prepend .popover{font-size:14px}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;vertical-align:top;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn,.input-append .btn-group>.dropdown-toggle,.input-prepend .btn-group>.dropdown-toggle{vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-append input+.btn-group .btn:last-child,.input-append select+.btn-group .btn:last-child,.input-append .uneditable-input+.btn-group .btn:last-child{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-append .add-on,.input-append .btn,.input-append .btn-group{margin-left:-1px}.input-append .add-on:last-child,.input-append .btn:last-child,.input-append .btn-group:last-child>.dropdown-toggle{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append input+.btn-group .btn,.input-prepend.input-append select+.btn-group .btn,.input-prepend.input-append .uneditable-input+.btn-group .btn{-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.input-prepend.input-append .btn-group:first-child{margin-left:0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:160px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:180px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:180px}.form-horizontal .help-block{margin-bottom:0}.form-horizontal input+.help-block,.form-horizontal select+.help-block,.form-horizontal textarea+.help-block,.form-horizontal .uneditable-input+.help-block,.form-horizontal .input-prepend+.help-block,.form-horizontal .input-append+.help-block{margin-top:10px}.form-horizontal .form-actions{padding-left:180px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table .table{background-color:#fff}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child>th:first-child,.table-bordered tbody:first-child tr:first-child>td:first-child,.table-bordered tbody:first-child tr:first-child>th:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child>th:last-child,.table-bordered tbody:first-child tr:first-child>td:last-child,.table-bordered tbody:first-child tr:first-child>th:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child>th:first-child,.table-bordered tbody:last-child tr:last-child>td:first-child,.table-bordered tbody:last-child tr:last-child>th:first-child,.table-bordered tfoot:last-child tr:last-child>td:first-child,.table-bordered tfoot:last-child tr:last-child>th:first-child{-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child>th:last-child,.table-bordered tbody:last-child tr:last-child>td:last-child,.table-bordered tbody:last-child tr:last-child>th:last-child,.table-bordered tfoot:last-child tr:last-child>td:last-child,.table-bordered tfoot:last-child tr:last-child>th:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered tfoot+tbody:last-child tr:last-child td:first-child{-webkit-border-bottom-left-radius:0;border-bottom-left-radius:0;-moz-border-radius-bottomleft:0}.table-bordered tfoot+tbody:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:0;border-bottom-right-radius:0;-moz-border-radius-bottomright:0}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-striped tbody>tr:nth-child(odd)>td,.table-striped tbody>tr:nth-child(odd)>th{background-color:#f9f9f9}.table-hover tbody tr:hover>td,.table-hover tbody tr:hover>th{background-color:#f5f5f5}table td[class*="span"],table th[class*="span"],.row-fluid table td[class*="span"],.row-fluid table th[class*="span"]{display:table-cell;float:none;margin-left:0}.table td.span1,.table th.span1{float:none;width:44px;margin-left:0}.table td.span2,.table th.span2{float:none;width:124px;margin-left:0}.table td.span3,.table th.span3{float:none;width:204px;margin-left:0}.table td.span4,.table th.span4{float:none;width:284px;margin-left:0}.table td.span5,.table th.span5{float:none;width:364px;margin-left:0}.table td.span6,.table th.span6{float:none;width:444px;margin-left:0}.table td.span7,.table th.span7{float:none;width:524px;margin-left:0}.table td.span8,.table th.span8{float:none;width:604px;margin-left:0}.table td.span9,.table th.span9{float:none;width:684px;margin-left:0}.table td.span10,.table th.span10{float:none;width:764px;margin-left:0}.table td.span11,.table th.span11{float:none;width:844px;margin-left:0}.table td.span12,.table th.span12{float:none;width:924px;margin-left:0}.table tbody tr.success>td{background-color:#dff0d8}.table tbody tr.error>td{background-color:#f2dede}.table tbody tr.warning>td{background-color:#fcf8e3}.table tbody tr.info>td{background-color:#d9edf7}.table-hover tbody tr.success:hover>td{background-color:#d0e9c6}.table-hover tbody tr.error:hover>td{background-color:#ebcccc}.table-hover tbody tr.warning:hover>td{background-color:#faf2cc}.table-hover tbody tr.info:hover>td{background-color:#c4e3f3}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav-pills>.active>a>[class^="icon-"],.nav-pills>.active>a>[class*=" icon-"],.nav-list>.active>a>[class^="icon-"],.nav-list>.active>a>[class*=" icon-"],.navbar-inverse .nav>.active>a>[class^="icon-"],.navbar-inverse .nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:focus>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>li>a:focus>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"],.dropdown-submenu:hover>a>[class^="icon-"],.dropdown-submenu:focus>a>[class^="icon-"],.dropdown-submenu:hover>a>[class*=" icon-"],.dropdown-submenu:focus>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{width:16px;background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu>li>a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu>li>a:hover,.dropdown-menu>li>a:focus,.dropdown-submenu:hover>a,.dropdown-submenu:focus>a{color:#fff;text-decoration:none;background-color:#0081c2;background-image:-moz-linear-gradient(top,#08c,#0077b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#0077b3));background-image:-webkit-linear-gradient(top,#08c,#0077b3);background-image:-o-linear-gradient(top,#08c,#0077b3);background-image:linear-gradient(to bottom,#08c,#0077b3);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0077b3',GradientType=0)}.dropdown-menu>.active>a,.dropdown-menu>.active>a:hover,.dropdown-menu>.active>a:focus{color:#fff;text-decoration:none;background-color:#0081c2;background-image:-moz-linear-gradient(top,#08c,#0077b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#0077b3));background-image:-webkit-linear-gradient(top,#08c,#0077b3);background-image:-o-linear-gradient(top,#08c,#0077b3);background-image:linear-gradient(to bottom,#08c,#0077b3);background-repeat:repeat-x;outline:0;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0077b3',GradientType=0)}.dropdown-menu>.disabled>a,.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{color:#999}.dropdown-menu>.disabled>a:hover,.dropdown-menu>.disabled>a:focus{text-decoration:none;cursor:default;background-color:transparent;background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.open{*z-index:1000}.open>.dropdown-menu{display:block}.dropdown-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:990}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:""}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover>.dropdown-menu{display:block}.dropup .dropdown-submenu>.dropdown-menu{top:auto;bottom:0;margin-top:0;margin-bottom:-2px;-webkit-border-radius:5px 5px 5px 0;-moz-border-radius:5px 5px 5px 0;border-radius:5px 5px 5px 0}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown-submenu.pull-left{float:none}.dropdown-submenu.pull-left>.dropdown-menu{left:-100%;margin-left:10px;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{z-index:1051;margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover,.close:focus{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 12px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #ccc;*border:0;border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-bottom-color:#b3b3b3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:focus,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover,.btn:focus{color:#333;text-decoration:none;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:11px 19px;font-size:17.5px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.btn-large [class^="icon-"],.btn-large [class*=" icon-"]{margin-top:4px}.btn-small{padding:2px 10px;font-size:11.9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.btn-small [class^="icon-"],.btn-small [class*=" icon-"]{margin-top:0}.btn-mini [class^="icon-"],.btn-mini [class*=" icon-"]{margin-top:-1px}.btn-mini{padding:0 6px;font-size:10.5px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}input[type="submit"].btn-block,input[type="reset"].btn-block,input[type="button"].btn-block{width:100%}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-moz-linear-gradient(top,#08c,#04c);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:focus,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:focus,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:focus,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-moz-linear-gradient(top,#62c462,#51a351);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:focus,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:focus,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-moz-linear-gradient(top,#444,#222);background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:focus,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active,.btn-link[disabled]{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover,.btn-link:focus{color:#005580;text-decoration:underline;background-color:transparent}.btn-link[disabled]:hover,.btn-link[disabled]:focus{color:#333;text-decoration:none}.btn-group{position:relative;display:inline-block;*display:inline;*margin-left:.3em;font-size:0;white-space:nowrap;vertical-align:middle;*zoom:1}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar>.btn+.btn,.btn-toolbar>.btn-group+.btn,.btn-toolbar>.btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu,.btn-group>.popover{font-size:14px}.btn-group>.btn-mini{font-size:10.5px}.btn-group>.btn-small{font-size:11.9px}.btn-group>.btn-large{font-size:17.5px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.btn-mini .caret,.btn-small .caret{margin-top:8px}.dropup .btn-large .caret{border-bottom-width:5px}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical>.btn{display:block;float:none;max-width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical>.btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical>.btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical>.btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical>.btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical>.btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert,.alert h4{color:#c09853}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-success h4{color:#468847}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-danger h4,.alert-error h4{color:#b94a48}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-info h4{color:#3a87ad}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover,.nav>li>a:focus{text-decoration:none;background-color:#eee}.nav>li>a>img{max-width:none}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover,.nav-list>.active>a:focus{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"],.nav-list [class*=" icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover,.nav-tabs>li>a:focus{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover,.nav-tabs>.active>a:focus{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover,.nav-pills>.active>a:focus{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover,.nav-tabs.nav-stacked>li>a:focus{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret,.nav .dropdown-toggle:focus .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover,.nav>.dropdown.active>a:focus{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover,.nav>li.dropdown.open.active>a:focus{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret,.nav li.dropdown.open a:focus .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover,.tabs-stacked .open>a:focus{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover,.tabs-below>.nav-tabs>li>a:focus{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover,.tabs-below>.nav-tabs>.active>a:focus{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover,.tabs-left>.nav-tabs>li>a:focus{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover,.tabs-left>.nav-tabs .active>a:focus{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover,.tabs-right>.nav-tabs>li>a:focus{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover,.tabs-right>.nav-tabs .active>a:focus{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover,.nav>.disabled>a:focus{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#f2f2f2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#f2f2f2));background-image:-webkit-linear-gradient(top,#fff,#f2f2f2);background-image:-o-linear-gradient(top,#fff,#f2f2f2);background-image:linear-gradient(to bottom,#fff,#f2f2f2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff',endColorstr='#fff2f2f2',GradientType=0);*zoom:1;-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar-inner:before,.navbar-inner:after{display:table;line-height:0;content:""}.navbar-inner:after{clear:both}.navbar .container{width:auto}.nav-collapse.collapse{height:auto;overflow:visible}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#777;text-shadow:0 1px 0 #fff}.navbar .brand:hover,.navbar .brand:focus{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px;color:#777}.navbar-link{color:#777}.navbar-link:hover,.navbar-link:focus{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:5px}.navbar .btn-group .btn,.navbar .input-prepend .btn,.navbar .input-append .btn,.navbar .input-prepend .btn-group,.navbar .input-append .btn-group{margin-top:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:5px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{border-width:0 0 1px}.navbar-fixed-bottom .navbar-inner{border-width:1px 0 0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 10px rgba(0,0,0,0.1);box-shadow:0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:0 -1px 10px rgba(0,0,0,0.1);box-shadow:0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right;margin-right:0}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#777;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:focus,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown>a:hover .caret,.navbar .nav li.dropdown>a:focus .caret{border-top-color:#333;border-bottom-color:#333}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#777;border-bottom-color:#777}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover,.navbar-inverse .brand:focus,.navbar-inverse .nav>li>a:focus{color:#fff}.navbar-inverse .brand{color:#999}.navbar-inverse .navbar-text{color:#999}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover,.navbar-inverse .navbar-link:focus{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>a:hover .caret,.navbar-inverse .nav li.dropdown>a:focus .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-moz-linear-gradient(top,#151515,#040404);background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:focus,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb>li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb>li>.divider{padding:0 5px;color:#ccc}.breadcrumb>.active{color:#999}.pagination{margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination ul>li{display:inline}.pagination ul>li>a,.pagination ul>li>span{float:left;padding:4px 12px;line-height:20px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination ul>li>a:hover,.pagination ul>li>a:focus,.pagination ul>.active>a,.pagination ul>.active>span{background-color:#f5f5f5}.pagination ul>.active>a,.pagination ul>.active>span{color:#999;cursor:default}.pagination ul>.disabled>span,.pagination ul>.disabled>a,.pagination ul>.disabled>a:hover,.pagination ul>.disabled>a:focus{color:#999;cursor:default;background-color:transparent}.pagination ul>li:first-child>a,.pagination ul>li:first-child>span{border-left-width:1px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.pagination ul>li:last-child>a,.pagination ul>li:last-child>span{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pagination-large ul>li>a,.pagination-large ul>li>span{padding:11px 19px;font-size:17.5px}.pagination-large ul>li:first-child>a,.pagination-large ul>li:first-child>span{-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.pagination-large ul>li:last-child>a,.pagination-large ul>li:last-child>span{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.pagination-mini ul>li:first-child>a,.pagination-small ul>li:first-child>a,.pagination-mini ul>li:first-child>span,.pagination-small ul>li:first-child>span{-webkit-border-bottom-left-radius:3px;border-bottom-left-radius:3px;-webkit-border-top-left-radius:3px;border-top-left-radius:3px;-moz-border-radius-bottomleft:3px;-moz-border-radius-topleft:3px}.pagination-mini ul>li:last-child>a,.pagination-small ul>li:last-child>a,.pagination-mini ul>li:last-child>span,.pagination-small ul>li:last-child>span{-webkit-border-top-right-radius:3px;border-top-right-radius:3px;-webkit-border-bottom-right-radius:3px;border-bottom-right-radius:3px;-moz-border-radius-topright:3px;-moz-border-radius-bottomright:3px}.pagination-small ul>li>a,.pagination-small ul>li>span{padding:2px 10px;font-size:11.9px}.pagination-mini ul>li>a,.pagination-mini ul>li>span{padding:0 6px;font-size:10.5px}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager li>a,.pager li>span{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager li>a:hover,.pager li>a:focus{text-decoration:none;background-color:#f5f5f5}.pager .next>a,.pager .next>span{float:right}.pager .previous>a,.pager .previous>span{float:left}.pager .disabled>a,.pager .disabled>a:hover,.pager .disabled>a:focus,.pager .disabled>span{color:#999;cursor:default;background-color:#fff}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:10%;left:50%;z-index:1050;width:560px;margin-left:-280px;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;outline:0;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:10%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{position:relative;max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.modal-footer .btn-block+.btn-block{margin-left:0}.tooltip{position:absolute;z-index:1030;display:block;font-size:11px;line-height:1.4;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{padding:5px 0;margin-top:-3px}.tooltip.right{padding:0 5px;margin-left:3px}.tooltip.bottom{padding:5px 0;margin-top:3px}.tooltip.left{padding:0 5px;margin-left:-3px}.tooltip-inner{max-width:200px;padding:8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;max-width:276px;padding:1px;text-align:left;white-space:normal;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-top:-10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-left:-10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-title:empty{display:none}.popover-content{padding:9px 14px}.popover .arrow,.popover .arrow:after{position:absolute;display:block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow{border-width:11px}.popover .arrow:after{border-width:10px;content:""}.popover.top .arrow{bottom:-11px;left:50%;margin-left:-11px;border-top-color:#999;border-top-color:rgba(0,0,0,0.25);border-bottom-width:0}.popover.top .arrow:after{bottom:1px;margin-left:-10px;border-top-color:#fff;border-bottom-width:0}.popover.right .arrow{top:50%;left:-11px;margin-top:-11px;border-right-color:#999;border-right-color:rgba(0,0,0,0.25);border-left-width:0}.popover.right .arrow:after{bottom:-10px;left:1px;border-right-color:#fff;border-left-width:0}.popover.bottom .arrow{top:-11px;left:50%;margin-left:-11px;border-bottom-color:#999;border-bottom-color:rgba(0,0,0,0.25);border-top-width:0}.popover.bottom .arrow:after{top:1px;margin-left:-10px;border-bottom-color:#fff;border-top-width:0}.popover.left .arrow{top:50%;right:-11px;margin-top:-11px;border-left-color:#999;border-left-color:rgba(0,0,0,0.25);border-right-width:0}.popover.left .arrow:after{right:1px;bottom:-10px;border-left-color:#fff;border-right-width:0}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover,a.thumbnail:focus{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.media,.media-body{overflow:hidden;*overflow:visible;zoom:1}.media,.media .media{margin-top:15px}.media:first-child{margin-top:0}.media-object{display:block}.media-heading{margin:0 0 5px}.media>.pull-left{margin-right:10px}.media>.pull-right{margin-left:10px}.media-list{margin-left:0;list-style:none}.label,.badge{display:inline-block;padding:2px 4px;font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding-right:9px;padding-left:9px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}.label:empty,.badge:empty{display:none}a.label:hover,a.label:focus,a.badge:hover,a.badge:focus{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner>.item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel-inner>.item>img,.carousel-inner>.item>a>img{display:block;line-height:1}.carousel-inner>.active,.carousel-inner>.next,.carousel-inner>.prev{display:block}.carousel-inner>.active{left:0}.carousel-inner>.next,.carousel-inner>.prev{position:absolute;top:0;width:100%}.carousel-inner>.next{left:100%}.carousel-inner>.prev{left:-100%}.carousel-inner>.next.left,.carousel-inner>.prev.right{left:0}.carousel-inner>.active.left{left:-100%}.carousel-inner>.active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover,.carousel-control:focus{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-indicators{position:absolute;top:15px;right:15px;z-index:5;margin:0;list-style:none}.carousel-indicators li{display:block;float:left;width:10px;height:10px;margin-left:5px;text-indent:-999px;background-color:#ccc;background-color:rgba(255,255,255,0.25);border-radius:5px}.carousel-indicators .active{background-color:#fff}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;font-size:18px;font-weight:200;line-height:30px;color:inherit;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit li{line-height:30px}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed} diff --git a/core/src/main/resources/spark/ui/static/webui.css b/core/src/main/resources/spark/ui/static/webui.css deleted file mode 100644 index f7537bb766..0000000000 --- a/core/src/main/resources/spark/ui/static/webui.css +++ /dev/null @@ -1,49 +0,0 @@ -.navbar .brand { - height: 50px; - width: 110px; - margin-left: 1px; - padding: 0; -} - -.version { - line-height: 30px; - vertical-align: bottom; - font-size: 12px; - padding: 0; - margin: 0; - font-weight: bold; - color: #777; -} - -.navbar-inner { - padding-top: 2px; - height: 50px; -} - -.navbar-inner .nav { - margin-top: 5px; - font-size: 15px; -} - - -#infolist { - margin-left: 400px; - margin-top: 14px; -} - -#infolist li { - display: inline; - list-style-type: none; - list-style-position: outside; - padding-right: 20px; - padding-top: 10px; - padding-bottom: 10px; -} - -.progress-cell { - width: 134px; - border-right: 0; - padding: 0; - padding-top: 7px; - padding-left: 4px; -} diff --git a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala new file mode 100644 index 0000000000..f87460039b --- /dev/null +++ b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapred + +trait SparkHadoopMapRedUtil { + def newJobContext(conf: JobConf, jobId: JobID): JobContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", "org.apache.hadoop.mapred.JobContext"); + val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[org.apache.hadoop.mapreduce.JobID]) + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } + + def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl", "org.apache.hadoop.mapred.TaskAttemptContext") + val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID]) + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { + new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId) + } + + private def firstAvailableClass(first: String, second: String): Class[_] = { + try { + Class.forName(first) + } catch { + case e: ClassNotFoundException => + Class.forName(second) + } + } +} diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala new file mode 100644 index 0000000000..93180307fa --- /dev/null +++ b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce + +import org.apache.hadoop.conf.Configuration +import java.lang.{Integer => JInteger, Boolean => JBoolean} + +trait SparkHadoopMapReduceUtil { + def newJobContext(conf: Configuration, jobId: JobID): JobContext = { + val klass = firstAvailableClass( + "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn + "org.apache.hadoop.mapreduce.JobContext") // hadoop1 + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } + + def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = firstAvailableClass( + "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn + "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { + val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID"); + try { + // first, attempt to use the old-style constructor that takes a boolean isMap (not available in YARN) + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new + JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } catch { + case exc: NoSuchMethodException => { + // failed, look for the new ctor that takes a TaskType (not available in 1.x) + val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType").asInstanceOf[Class[Enum[_]]] + val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke(taskTypeClass, if(isMap) "MAP" else "REDUCE") + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new + JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } + } + } + + private def firstAvailableClass(first: String, second: String): Class[_] = { + try { + Class.forName(first) + } catch { + case e: ClassNotFoundException => + Class.forName(second) + } + } +} diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala index 6ff92ce833..6e922a612a 100644 --- a/core/src/main/scala/spark/Accumulators.scala +++ b/core/src/main/scala/org/apache/spark/Accumulators.scala @@ -15,12 +15,13 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io._ import scala.collection.mutable.Map import scala.collection.generic.Growable +import org.apache.spark.serializer.JavaSerializer /** * A datatype that can be accumulated, i.e. has an commutative and associative "add" operation, @@ -28,7 +29,7 @@ import scala.collection.generic.Growable * * You must define how to add data, and how to merge two of these together. For some datatypes, * such as a counter, these might be the same operation. In that case, you can use the simpler - * [[spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are + * [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are * accumulating a set. You will add items to the set, and you will union two sets together. * * @param initialValue initial value of accumulator @@ -176,7 +177,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser def zero(initialValue: R): R = { // We need to clone initialValue, but it's hard to specify that R should also be Cloneable. // Instead we'll serialize it to a buffer and load it back. - val ser = (new spark.JavaSerializer).newInstance() + val ser = new JavaSerializer().newInstance() val copy = ser.deserialize[R](ser.serialize(initialValue)) copy.clear() // In case it contained stuff copy @@ -184,7 +185,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser } /** - * A simpler value of [[spark.Accumulable]] where the result type being accumulated is the same + * A simpler value of [[org.apache.spark.Accumulable]] where the result type being accumulated is the same * as the types of elements being merged. * * @param initialValue initial value of accumulator @@ -195,7 +196,7 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T]) extends Accumulable[T,T](initialValue, param) /** - * A simpler version of [[spark.AccumulableParam]] where the only datatype you can add in is the same type + * A simpler version of [[org.apache.spark.AccumulableParam]] where the only datatype you can add in is the same type * as the accumulated value. An implicit AccumulatorParam object needs to be available when you create * Accumulators of a specific type. * diff --git a/core/src/main/scala/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala index 136b4da61e..3ef402926e 100644 --- a/core/src/main/scala/spark/Aggregator.scala +++ b/core/src/main/scala/org/apache/spark/Aggregator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.util.{HashMap => JHashMap} @@ -28,18 +28,18 @@ import scala.collection.JavaConversions._ * @param mergeCombiners function to merge outputs from multiple mergeValue function. */ case class Aggregator[K, V, C] ( - val createCombiner: V => C, - val mergeValue: (C, V) => C, - val mergeCombiners: (C, C) => C) { + createCombiner: V => C, + mergeValue: (C, V) => C, + mergeCombiners: (C, C) => C) { - def combineValuesByKey(iter: Iterator[(K, V)]) : Iterator[(K, C)] = { + def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = { val combiners = new JHashMap[K, C] - for ((k, v) <- iter) { - val oldC = combiners.get(k) + for (kv <- iter) { + val oldC = combiners.get(kv._1) if (oldC == null) { - combiners.put(k, createCombiner(v)) + combiners.put(kv._1, createCombiner(kv._2)) } else { - combiners.put(k, mergeValue(oldC, v)) + combiners.put(kv._1, mergeValue(oldC, kv._2)) } } combiners.iterator @@ -47,7 +47,7 @@ case class Aggregator[K, V, C] ( def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] = { val combiners = new JHashMap[K, C] - for ((k, c) <- iter) { + iter.foreach { case(k, c) => val oldC = combiners.get(k) if (oldC == null) { combiners.put(k, c) diff --git a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala index 8f6953b1f5..908ff56a6b 100644 --- a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala +++ b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala @@ -15,21 +15,22 @@ * limitations under the License. */ -package spark +package org.apache.spark import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap -import spark.executor.{ShuffleReadMetrics, TaskMetrics} -import spark.serializer.Serializer -import spark.storage.BlockManagerId -import spark.util.CompletionIterator +import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics} +import org.apache.spark.serializer.Serializer +import org.apache.spark.storage.BlockManagerId +import org.apache.spark.util.CompletionIterator private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging { - override def fetch[K, V]( - shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) = { + override def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) + : Iterator[T] = + { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager @@ -49,12 +50,12 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin (address, splits.map(s => ("shuffle_%d_%d_%d".format(shuffleId, s._1, reduceId), s._2))) } - def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[(K, V)] = { + def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Some(block) => { - block.asInstanceOf[Iterator[(K, V)]] + block.asInstanceOf[Iterator[T]] } case None => { val regex = "shuffle_([0-9]*)_([0-9]*)_([0-9]*)".r @@ -73,7 +74,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer) val itr = blockFetcherItr.flatMap(unpackBlock) - CompletionIterator[(K,V), Iterator[(K,V)]](itr, { + CompletionIterator[T, Iterator[T]](itr, { val shuffleMetrics = new ShuffleReadMetrics shuffleMetrics.shuffleFinishTime = System.currentTimeMillis shuffleMetrics.remoteFetchTime = blockFetcherItr.remoteFetchTime diff --git a/core/src/main/scala/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala index 81314805a9..e299a106ee 100644 --- a/core/src/main/scala/spark/CacheManager.scala +++ b/core/src/main/scala/org/apache/spark/CacheManager.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package spark +package org.apache.spark import scala.collection.mutable.{ArrayBuffer, HashSet} -import spark.storage.{BlockManager, StorageLevel} +import org.apache.spark.storage.{BlockManager, StorageLevel} +import org.apache.spark.rdd.RDD /** Spark class responsible for passing RDDs split contents to the BlockManager and making diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala index d17e70a4fa..cc30105940 100644 --- a/core/src/main/scala/spark/Dependency.scala +++ b/core/src/main/scala/org/apache/spark/Dependency.scala @@ -15,7 +15,9 @@ * limitations under the License. */ -package spark +package org.apache.spark + +import org.apache.spark.rdd.RDD /** * Base class for dependencies. @@ -39,16 +41,15 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) { /** * Represents a dependency on the output of a shuffle stage. - * @param shuffleId the shuffle id * @param rdd the parent RDD * @param partitioner partitioner used to partition the shuffle output * @param serializerClass class name of the serializer to use */ class ShuffleDependency[K, V]( - @transient rdd: RDD[(K, V)], + @transient rdd: RDD[_ <: Product2[K, V]], val partitioner: Partitioner, val serializerClass: String = null) - extends Dependency(rdd) { + extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) { val shuffleId: Int = rdd.context.newShuffleId() } diff --git a/core/src/main/scala/spark/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/FetchFailedException.scala index a2dae6cae9..d242047502 100644 --- a/core/src/main/scala/spark/FetchFailedException.scala +++ b/core/src/main/scala/org/apache/spark/FetchFailedException.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark +package org.apache.spark -import spark.storage.BlockManagerId +import org.apache.spark.storage.BlockManagerId private[spark] class FetchFailedException( taskEndReason: TaskEndReason, diff --git a/core/src/main/scala/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala index a13a7a2859..ad1ee20045 100644 --- a/core/src/main/scala/spark/HttpFileServer.scala +++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io.{File} import com.google.common.io.Files +import org.apache.spark.util.Utils private[spark] class HttpFileServer extends Logging { diff --git a/core/src/main/scala/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala index c9dffbc631..cdfc9dd54e 100644 --- a/core/src/main/scala/spark/HttpServer.scala +++ b/core/src/main/scala/org/apache/spark/HttpServer.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io.File import java.net.InetAddress @@ -26,6 +26,7 @@ import org.eclipse.jetty.server.handler.DefaultHandler import org.eclipse.jetty.server.handler.HandlerList import org.eclipse.jetty.server.handler.ResourceHandler import org.eclipse.jetty.util.thread.QueuedThreadPool +import org.apache.spark.util.Utils /** * Exception type thrown by HttpServer when it is in the wrong state for an operation. diff --git a/core/src/main/scala/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala index 79b0362830..6a973ea495 100644 --- a/core/src/main/scala/spark/Logging.scala +++ b/core/src/main/scala/org/apache/spark/Logging.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.slf4j.Logger import org.slf4j.LoggerFactory diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 2c417e31db..ae7cf2a893 100644 --- a/core/src/main/scala/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io._ import java.util.zip.{GZIPInputStream, GZIPOutputStream} @@ -30,9 +30,9 @@ import akka.remote._ import akka.util.Duration -import spark.scheduler.MapStatus -import spark.storage.BlockManagerId -import spark.util.{MetadataCleaner, TimeStampedHashMap} +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage.BlockManagerId +import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashMap} private[spark] sealed trait MapOutputTrackerMessage @@ -64,11 +64,11 @@ private[spark] class MapOutputTracker extends Logging { // Incremented every time a fetch fails so that client nodes know to clear // their cache of map output locations if this happens. - private var generation: Long = 0 - private val generationLock = new java.lang.Object + private var epoch: Long = 0 + private val epochLock = new java.lang.Object // Cache a serialized version of the output statuses for each shuffle to send them out faster - var cacheGeneration = generation + var cacheEpoch = epoch private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]] val metadataCleaner = new MetadataCleaner("MapOutputTracker", this.cleanup) @@ -108,10 +108,10 @@ private[spark] class MapOutputTracker extends Logging { def registerMapOutputs( shuffleId: Int, statuses: Array[MapStatus], - changeGeneration: Boolean = false) { + changeEpoch: Boolean = false) { mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses) - if (changeGeneration) { - incrementGeneration() + if (changeEpoch) { + incrementEpoch() } } @@ -124,7 +124,7 @@ private[spark] class MapOutputTracker extends Logging { array(mapId) = null } } - incrementGeneration() + incrementEpoch() } else { throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID") } @@ -206,58 +206,58 @@ private[spark] class MapOutputTracker extends Logging { trackerActor = null } - // Called on master to increment the generation number - def incrementGeneration() { - generationLock.synchronized { - generation += 1 - logDebug("Increasing generation to " + generation) + // Called on master to increment the epoch number + def incrementEpoch() { + epochLock.synchronized { + epoch += 1 + logDebug("Increasing epoch to " + epoch) } } - // Called on master or workers to get current generation number - def getGeneration: Long = { - generationLock.synchronized { - return generation + // Called on master or workers to get current epoch number + def getEpoch: Long = { + epochLock.synchronized { + return epoch } } - // Called on workers to update the generation number, potentially clearing old outputs - // because of a fetch failure. (Each Mesos task calls this with the latest generation + // Called on workers to update the epoch number, potentially clearing old outputs + // because of a fetch failure. (Each worker task calls this with the latest epoch // number on the master at the time it was created.) - def updateGeneration(newGen: Long) { - generationLock.synchronized { - if (newGen > generation) { - logInfo("Updating generation to " + newGen + " and clearing cache") + def updateEpoch(newEpoch: Long) { + epochLock.synchronized { + if (newEpoch > epoch) { + logInfo("Updating epoch to " + newEpoch + " and clearing cache") // mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]] mapStatuses.clear() - generation = newGen + epoch = newEpoch } } } def getSerializedLocations(shuffleId: Int): Array[Byte] = { var statuses: Array[MapStatus] = null - var generationGotten: Long = -1 - generationLock.synchronized { - if (generation > cacheGeneration) { + var epochGotten: Long = -1 + epochLock.synchronized { + if (epoch > cacheEpoch) { cachedSerializedStatuses.clear() - cacheGeneration = generation + cacheEpoch = epoch } cachedSerializedStatuses.get(shuffleId) match { case Some(bytes) => return bytes case None => statuses = mapStatuses(shuffleId) - generationGotten = generation + epochGotten = epoch } } // If we got here, we failed to find the serialized locations in the cache, so we pulled // out a snapshot of the locations as "locs"; let's serialize and return that val bytes = serializeStatuses(statuses) logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length)) - // Add them into the table only if the generation hasn't changed while we were working - generationLock.synchronized { - if (generation == generationGotten) { + // Add them into the table only if the epoch hasn't changed while we were working + epochLock.synchronized { + if (epoch == epochGotten) { cachedSerializedStatuses(shuffleId) = bytes } } diff --git a/core/src/main/scala/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala index 2a4edcec98..87914a061f 100644 --- a/core/src/main/scala/spark/Partition.scala +++ b/core/src/main/scala/org/apache/spark/Partition.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark /** * A partition of an RDD. diff --git a/core/src/main/scala/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala index 660af70d52..0e2c987a59 100644 --- a/core/src/main/scala/spark/Partitioner.scala +++ b/core/src/main/scala/org/apache/spark/Partitioner.scala @@ -15,7 +15,10 @@ * limitations under the License. */ -package spark +package org.apache.spark + +import org.apache.spark.util.Utils +import org.apache.spark.rdd.RDD /** * An object that defines how the elements in a key-value pair RDD are partitioned by key. @@ -56,7 +59,7 @@ object Partitioner { } /** - * A [[spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`. + * A [[org.apache.spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`. * * Java arrays have hashCodes that are based on the arrays' identities rather than their contents, * so attempting to partition an RDD[Array[_]] or RDD[(Array[_], _)] using a HashPartitioner will @@ -65,17 +68,9 @@ object Partitioner { class HashPartitioner(partitions: Int) extends Partitioner { def numPartitions = partitions - def getPartition(key: Any): Int = { - if (key == null) { - return 0 - } else { - val mod = key.hashCode % partitions - if (mod < 0) { - mod + partitions - } else { - mod // Guard against negative hash codes - } - } + def getPartition(key: Any): Int = key match { + case null => 0 + case _ => Utils.nonNegativeMod(key.hashCode, numPartitions) } override def equals(other: Any): Boolean = other match { @@ -87,12 +82,12 @@ class HashPartitioner(partitions: Int) extends Partitioner { } /** - * A [[spark.Partitioner]] that partitions sortable records by range into roughly equal ranges. + * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly equal ranges. * Determines the ranges by sampling the RDD passed in. */ class RangePartitioner[K <% Ordered[K]: ClassManifest, V]( partitions: Int, - @transient rdd: RDD[(K,V)], + @transient rdd: RDD[_ <: Product2[K,V]], private val ascending: Boolean = true) extends Partitioner { diff --git a/core/src/main/scala/spark/SerializableWritable.scala b/core/src/main/scala/org/apache/spark/SerializableWritable.scala index 0236611ef9..fdd4c24e23 100644 --- a/core/src/main/scala/spark/SerializableWritable.scala +++ b/core/src/main/scala/org/apache/spark/SerializableWritable.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io._ import org.apache.hadoop.io.ObjectWritable import org.apache.hadoop.io.Writable -import org.apache.hadoop.mapred.JobConf +import org.apache.hadoop.conf.Configuration class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable { def value = t @@ -35,7 +35,7 @@ class SerializableWritable[T <: Writable](@transient var t: T) extends Serializa private def readObject(in: ObjectInputStream) { in.defaultReadObject() val ow = new ObjectWritable() - ow.setConf(new JobConf()) + ow.setConf(new Configuration()) ow.readFields(in) t = ow.get().asInstanceOf[T] } diff --git a/core/src/main/scala/spark/ShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala index dcced035e7..307c383a89 100644 --- a/core/src/main/scala/spark/ShuffleFetcher.scala +++ b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala @@ -15,19 +15,20 @@ * limitations under the License. */ -package spark +package org.apache.spark -import spark.executor.TaskMetrics -import spark.serializer.Serializer +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.serializer.Serializer private[spark] abstract class ShuffleFetcher { + /** * Fetch the shuffle outputs for a given ShuffleDependency. * @return An iterator over the elements of the fetched shuffle outputs. */ - def fetch[K, V](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, - serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[(K,V)] + def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, + serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[T] /** Stop the fetcher */ def stop() {} diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 46b9935cb7..faf0c2362a 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -15,23 +15,19 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io._ import java.net.URI import java.util.Properties -import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger -import scala.collection.JavaConversions._ import scala.collection.Map import scala.collection.generic.Growable -import scala.collection.mutable.HashMap import scala.collection.JavaConversions._ +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.HashMap import scala.util.DynamicVariable -import scala.collection.mutable.{ConcurrentMap, HashMap} - -import akka.actor.Actor._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -53,20 +49,23 @@ import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import org.apache.hadoop.mapreduce.{Job => NewHadoopJob} import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat} -import org.apache.hadoop.security.UserGroupInformation import org.apache.mesos.MesosNativeLibrary -import spark.deploy.{LocalSparkCluster, SparkHadoopUtil} -import spark.partial.{ApproximateEvaluator, PartialResult} -import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD} -import spark.scheduler.{DAGScheduler, ResultTask, ShuffleMapTask, SparkListener, SplitInfo, Stage, StageInfo, TaskScheduler} -import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, ClusterScheduler} -import spark.scheduler.local.LocalScheduler -import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend} -import spark.storage.{StorageStatus, StorageUtils, RDDInfo} -import spark.util.{MetadataCleaner, TimeStampedHashMap} -import ui.{SparkUI} +import org.apache.spark.deploy.LocalSparkCluster +import org.apache.spark.partial.{ApproximateEvaluator, PartialResult} +import org.apache.spark.rdd._ +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, + ClusterScheduler, Schedulable, SchedulingMode} +import org.apache.spark.scheduler.local.LocalScheduler +import org.apache.spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend} +import org.apache.spark.storage.{StorageUtils, BlockManagerSource} +import org.apache.spark.ui.SparkUI +import org.apache.spark.util.{ClosureCleaner, Utils, MetadataCleaner, TimeStampedHashMap} +import org.apache.spark.scheduler.StageInfo +import org.apache.spark.storage.RDDInfo +import org.apache.spark.storage.StorageStatus /** * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark @@ -101,7 +100,7 @@ class SparkContext( System.setProperty("spark.driver.port", "0") } - private val isLocal = (master == "local" || master.startsWith("local[")) + val isLocal = (master == "local" || master.startsWith("local[")) // Create the Spark execution environment (cache, map output tracker, etc) private[spark] val env = SparkEnv.createFromSystemProperties( @@ -124,6 +123,8 @@ class SparkContext( private[spark] val ui = new SparkUI(this) ui.bind() + val startTime = System.currentTimeMillis() + // Add each JAR given through the constructor if (jars != null) { jars.foreach { addJar(_) } @@ -235,7 +236,8 @@ class SparkContext( /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */ val hadoopConfiguration = { - val conf = SparkHadoopUtil.newConfiguration() + val env = SparkEnv.get + val conf = env.hadoop.newConfiguration() // Explicitly check for S3 environment variables if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) { conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID")) @@ -261,15 +263,35 @@ class SparkContext( localProperties.value = new Properties() } - def addLocalProperties(key: String, value: String) { - if(localProperties.value == null) { + def setLocalProperty(key: String, value: String) { + if (localProperties.value == null) { localProperties.value = new Properties() } - localProperties.value.setProperty(key,value) + if (value == null) { + localProperties.value.remove(key) + } else { + localProperties.value.setProperty(key, value) + } + } + + /** Set a human readable description of the current job. */ + def setJobDescription(value: String) { + setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value) } + // Post init taskScheduler.postStartHook() + val dagSchedulerSource = new DAGSchedulerSource(this.dagScheduler) + val blockManagerSource = new BlockManagerSource(SparkEnv.get.blockManager) + + def initDriverMetrics() { + SparkEnv.get.metricsSystem.registerSource(dagSchedulerSource) + SparkEnv.get.metricsSystem.registerSource(blockManagerSource) + } + + initDriverMetrics() + // Methods for creating RDDs /** Distribute a local Scala collection to form an RDD. */ @@ -470,14 +492,14 @@ class SparkContext( // Methods for creating shared variables /** - * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values * to using the `+=` method. Only the driver can access the accumulator's `value`. */ def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) = new Accumulator(initialValue, param) /** - * Create an [[spark.Accumulable]] shared variable, to which tasks can add values with `+=`. + * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values with `+=`. * Only the driver can access the accumuable's `value`. * @tparam T accumulator type * @tparam R type that can be added to the accumulator @@ -497,7 +519,7 @@ class SparkContext( } /** - * Broadcast a read-only variable to the cluster, returning a [[spark.broadcast.Broadcast]] object for + * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.broadcast.Broadcast]] object for * reading it in distributed functions. The variable will be sent to each cluster only once. */ def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal) @@ -525,7 +547,7 @@ class SparkContext( } def addSparkListener(listener: SparkListener) { - dagScheduler.sparkListeners += listener + dagScheduler.addSparkListener(listener) } /** @@ -546,6 +568,12 @@ class SparkContext( StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this) } + /** + * Returns an immutable map of RDDs that have marked themselves as persistent via cache() call. + * Note that this does not necessarily mean the caching or computation was successful. + */ + def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap + def getStageInfo: Map[Stage,StageInfo] = { dagScheduler.stageToInfos } @@ -558,6 +586,28 @@ class SparkContext( } /** + * Return pools for fair scheduler + * TODO(xiajunluan): We should take nested pools into account + */ + def getAllPools: ArrayBuffer[Schedulable] = { + taskScheduler.rootPool.schedulableQueue + } + + /** + * Return the pool associated with the given name, if one exists + */ + def getPoolForName(pool: String): Option[Schedulable] = { + taskScheduler.rootPool.schedulableNameToSchedulable.get(pool) + } + + /** + * Return current scheduling mode + */ + def getSchedulingMode: SchedulingMode.SchedulingMode = { + taskScheduler.schedulingMode + } + + /** * Clear the job's list of files added by `addFile` so that they do not get downloaded to * any new nodes. */ @@ -566,6 +616,16 @@ class SparkContext( } /** + * Gets the locality information associated with the partition in a particular rdd + * @param rdd of interest + * @param partition to be looked up for locality + * @return list of preferred locations for the partition + */ + private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = { + dagScheduler.getPreferredLocs(rdd, partition) + } + + /** * Adds a JAR dependency for all tasks to be executed on this SparkContext in the future. * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported * filesystems), or an HTTP, HTTPS or FTP URI. @@ -575,9 +635,15 @@ class SparkContext( logWarning("null specified as parameter to addJar", new SparkException("null specified as parameter to addJar")) } else { + val env = SparkEnv.get val uri = new URI(path) val key = uri.getScheme match { - case null | "file" => env.httpFileServer.addJar(new File(uri.getPath)) + case null | "file" => + if (env.hadoop.isYarnMode()) { + logWarning("local jar specified as parameter to addJar under Yarn mode") + return + } + env.httpFileServer.addJar(new File(uri.getPath)) case _ => path } addedJars(key) = System.currentTimeMillis @@ -756,8 +822,9 @@ class SparkContext( * prevent accidental overriding of checkpoint files in the existing directory. */ def setCheckpointDir(dir: String, useExisting: Boolean = false) { + val env = SparkEnv.get val path = new Path(dir) - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = path.getFileSystem(env.hadoop.newConfiguration()) if (!useExisting) { if (fs.exists(path)) { throw new Exception("Checkpoint directory '" + path + "' already exists.") @@ -774,11 +841,11 @@ class SparkContext( /** Default min number of partitions for Hadoop RDDs when not given by user */ def defaultMinSplits: Int = math.min(defaultParallelism, 2) - private var nextShuffleId = new AtomicInteger(0) + private val nextShuffleId = new AtomicInteger(0) private[spark] def newShuffleId(): Int = nextShuffleId.getAndIncrement() - private var nextRddId = new AtomicInteger(0) + private val nextRddId = new AtomicInteger(0) /** Register a new RDD, returning its RDD ID */ private[spark] def newRddId(): Int = nextRddId.getAndIncrement() @@ -794,6 +861,7 @@ class SparkContext( * various Spark features. */ object SparkContext { + val SPARK_JOB_DESCRIPTION = "spark.job.description" implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] { def addInPlace(t1: Double, t2: Double): Double = t1 + t2 @@ -826,7 +894,7 @@ object SparkContext { implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( rdd: RDD[(K, V)]) = - new OrderedRDDFunctions(rdd) + new OrderedRDDFunctions[K, V, (K, V)](rdd) implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd) @@ -911,7 +979,6 @@ object SparkContext { } } - /** * A class encapsulating how to convert some type T to Writable. It stores both the Writable class * corresponding to T (e.g. IntWritable for Int) and a function for doing the conversion. @@ -923,3 +990,4 @@ private[spark] class WritableConverter[T]( val writableClass: ClassManifest[T] => Class[_ <: Writable], val convert: Writable => T) extends Serializable + diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index f2bdc11bdb..478e5a0aaf 100644 --- a/core/src/main/scala/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import collection.mutable import serializer.Serializer @@ -23,13 +23,14 @@ import serializer.Serializer import akka.actor.{Actor, ActorRef, Props, ActorSystemImpl, ActorSystem} import akka.remote.RemoteActorRefProvider -import spark.broadcast.BroadcastManager -import spark.storage.BlockManager -import spark.storage.BlockManagerMaster -import spark.network.ConnectionManager -import spark.serializer.{Serializer, SerializerManager} -import spark.util.AkkaUtils -import spark.api.python.PythonWorkerFactory +import org.apache.spark.broadcast.BroadcastManager +import org.apache.spark.metrics.MetricsSystem +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.storage.{BlockManagerMasterActor, BlockManager, BlockManagerMaster} +import org.apache.spark.network.ConnectionManager +import org.apache.spark.serializer.{Serializer, SerializerManager} +import org.apache.spark.util.{Utils, AkkaUtils} +import org.apache.spark.api.python.PythonWorkerFactory /** @@ -53,13 +54,23 @@ class SparkEnv ( val connectionManager: ConnectionManager, val httpFileServer: HttpFileServer, val sparkFilesDir: String, - // To be set only as part of initialization of SparkContext. - // (executorId, defaultHostPort) => executorHostPort - // If executorId is NOT found, return defaultHostPort - var executorIdToHostPort: Option[(String, String) => String]) { + val metricsSystem: MetricsSystem) { private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]() + val hadoop = { + val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))) + if(yarnMode) { + try { + Class.forName("spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil] + } catch { + case th: Throwable => throw new SparkException("Unable to load YARN support", th) + } + } else { + new SparkHadoopUtil + } + } + def stop() { pythonWorkers.foreach { case(key, worker) => worker.stop() } httpFileServer.stop() @@ -68,6 +79,7 @@ class SparkEnv ( broadcastManager.stop() blockManager.stop() blockManager.master.stop() + metricsSystem.stop() actorSystem.shutdown() // Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut // down, but let's call it anyway in case it gets fixed in a later release @@ -80,27 +92,30 @@ class SparkEnv ( pythonWorkers.getOrElseUpdate(key, new PythonWorkerFactory(pythonExec, envVars)).create() } } - - def resolveExecutorIdToHostPort(executorId: String, defaultHostPort: String): String = { - val env = SparkEnv.get - if (env.executorIdToHostPort.isEmpty) { - // default to using host, not host port. Relevant to non cluster modes. - return defaultHostPort - } - - env.executorIdToHostPort.get(executorId, defaultHostPort) - } } object SparkEnv extends Logging { private val env = new ThreadLocal[SparkEnv] + @volatile private var lastSetSparkEnv : SparkEnv = _ def set(e: SparkEnv) { + lastSetSparkEnv = e env.set(e) } + /** + * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv + * previously set in any thread. + */ def get: SparkEnv = { - env.get() + Option(env.get()).getOrElse(lastSetSparkEnv) + } + + /** + * Returns the ThreadLocal SparkEnv. + */ + def getThreadLocal : SparkEnv = { + env.get() } def createFromSystemProperties( @@ -140,10 +155,10 @@ object SparkEnv extends Logging { val serializerManager = new SerializerManager val serializer = serializerManager.setDefault( - System.getProperty("spark.serializer", "spark.JavaSerializer")) + System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer")) val closureSerializer = serializerManager.get( - System.getProperty("spark.closure.serializer", "spark.JavaSerializer")) + System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer")) def registerOrLookup(name: String, newActor: => Actor): ActorRef = { if (isDriver) { @@ -161,7 +176,7 @@ object SparkEnv extends Logging { val blockManagerMaster = new BlockManagerMaster(registerOrLookup( "BlockManagerMaster", - new spark.storage.BlockManagerMasterActor(isLocal))) + new BlockManagerMasterActor(isLocal))) val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer) val connectionManager = blockManager.connectionManager @@ -178,12 +193,19 @@ object SparkEnv extends Logging { new MapOutputTrackerActor(mapOutputTracker)) val shuffleFetcher = instantiateClass[ShuffleFetcher]( - "spark.shuffle.fetcher", "spark.BlockStoreShuffleFetcher") + "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher") val httpFileServer = new HttpFileServer() httpFileServer.initialize() System.setProperty("spark.fileserver.uri", httpFileServer.serverUri) + val metricsSystem = if (isDriver) { + MetricsSystem.createMetricsSystem("driver") + } else { + MetricsSystem.createMetricsSystem("executor") + } + metricsSystem.start() + // Set the sparkFiles directory, used when downloading dependencies. In local mode, // this is a temporary directory; in distributed mode, this is the executor's current working // directory. @@ -213,6 +235,6 @@ object SparkEnv extends Logging { connectionManager, httpFileServer, sparkFilesDir, - None) + metricsSystem) } } diff --git a/core/src/main/scala/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index b7045eea63..d34e47e8ca 100644 --- a/core/src/main/scala/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark class SparkException(message: String, cause: Throwable) extends Exception(message, cause) { diff --git a/core/src/main/scala/spark/SparkFiles.java b/core/src/main/scala/org/apache/spark/SparkFiles.java index f9b3f7965e..af9cf85e37 100644 --- a/core/src/main/scala/spark/SparkFiles.java +++ b/core/src/main/scala/org/apache/spark/SparkFiles.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark; +package org.apache.spark; import java.io.File; diff --git a/core/src/main/scala/spark/HadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala index b1fe0075a3..2bab9d6e3d 100644 --- a/core/src/main/scala/spark/HadoopWriter.scala +++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala @@ -25,8 +25,8 @@ import java.text.NumberFormat import java.io.IOException import java.util.Date -import spark.Logging -import spark.SerializableWritable +import org.apache.spark.Logging +import org.apache.spark.SerializableWritable /** * Internal helper class that saves an RDD using a Hadoop OutputFormat. This is only public @@ -36,7 +36,7 @@ import spark.SerializableWritable * Saves the RDD using a JobConf, which should contain an output key class, an output value class, * a filename to write to, etc, exactly like in a Hadoop MapReduce job. */ -class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRedUtil with Serializable { +class SparkHadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable { private val now = new Date() private val conf = new SerializableWritable(jobConf) @@ -165,7 +165,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe splitID = splitid attemptID = attemptid - jID = new SerializableWritable[JobID](HadoopWriter.createJobID(now, jobid)) + jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobid)) taID = new SerializableWritable[TaskAttemptID]( new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID)) } @@ -179,7 +179,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe } } -object HadoopWriter { +object SparkHadoopWriter { def createJobID(time: Date, id: Int): JobID = { val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) diff --git a/core/src/main/scala/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala index b79f4ca813..b2dd668330 100644 --- a/core/src/main/scala/spark/TaskContext.scala +++ b/core/src/main/scala/org/apache/spark/TaskContext.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import executor.TaskMetrics import scala.collection.mutable.ArrayBuffer diff --git a/core/src/main/scala/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala index 3ad665da34..03bf268863 100644 --- a/core/src/main/scala/spark/TaskEndReason.scala +++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark +package org.apache.spark -import spark.executor.TaskMetrics -import spark.storage.BlockManagerId +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.storage.BlockManagerId /** * Various possible reasons why a task ended. The low-level TaskScheduler is supposed to retry diff --git a/core/src/main/scala/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala index 9df7d8277b..19ce8369d9 100644 --- a/core/src/main/scala/spark/TaskState.scala +++ b/core/src/main/scala/org/apache/spark/TaskState.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.apache.mesos.Protos.{TaskState => MesosTaskState} @@ -24,9 +24,11 @@ private[spark] object TaskState val LAUNCHING, RUNNING, FINISHED, FAILED, KILLED, LOST = Value + val FINISHED_STATES = Set(FINISHED, FAILED, KILLED, LOST) + type TaskState = Value - def isFinished(state: TaskState) = Seq(FINISHED, FAILED, LOST).contains(state) + def isFinished(state: TaskState) = FINISHED_STATES.contains(state) def toMesos(state: TaskState): MesosTaskState = state match { case LAUNCHING => MesosTaskState.TASK_STARTING diff --git a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala index 392556f261..5fd1fab580 100644 --- a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala @@ -15,16 +15,16 @@ * limitations under the License. */ -package spark.api.java - -import spark.RDD -import spark.SparkContext.doubleRDDToDoubleRDDFunctions -import spark.api.java.function.{Function => JFunction} -import spark.util.StatCounter -import spark.partial.{BoundedDouble, PartialResult} -import spark.storage.StorageLevel +package org.apache.spark.api.java + +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext.doubleRDDToDoubleRDDFunctions +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.util.StatCounter +import org.apache.spark.partial.{BoundedDouble, PartialResult} +import org.apache.spark.storage.StorageLevel import java.lang.Double -import spark.Partitioner +import org.apache.spark.Partitioner class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, JavaDoubleRDD] { @@ -115,33 +115,48 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav // Double RDD functions - /** Return the sum of the elements in this RDD. */ + /** Add up the elements in this RDD. */ def sum(): Double = srdd.sum() - /** Return a [[spark.StatCounter]] describing the elements in this RDD. */ + /** + * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count + * of the RDD's elements in one operation. + */ def stats(): StatCounter = srdd.stats() - /** Return the mean of the elements in this RDD. */ + /** Compute the mean of this RDD's elements. */ def mean(): Double = srdd.mean() - /** Return the variance of the elements in this RDD. */ + /** Compute the variance of this RDD's elements. */ def variance(): Double = srdd.variance() - /** Return the standard deviation of the elements in this RDD. */ + /** Compute the standard deviation of this RDD's elements. */ def stdev(): Double = srdd.stdev() + /** + * Compute the sample standard deviation of this RDD's elements (which corrects for bias in + * estimating the standard deviation by dividing by N-1 instead of N). + */ + def sampleStdev(): Double = srdd.sampleStdev() + + /** + * Compute the sample variance of this RDD's elements (which corrects for bias in + * estimating the standard variance by dividing by N-1 instead of N). + */ + def sampleVariance(): Double = srdd.sampleVariance() + /** Return the approximate mean of the elements in this RDD. */ def meanApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] = srdd.meanApprox(timeout, confidence) - /** Return the approximate mean of the elements in this RDD. */ + /** (Experimental) Approximate operation to return the mean within a timeout. */ def meanApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.meanApprox(timeout) - /** Return the approximate sum of the elements in this RDD. */ + /** (Experimental) Approximate operation to return the sum within a timeout. */ def sumApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] = srdd.sumApprox(timeout, confidence) - - /** Return the approximate sum of the elements in this RDD. */ + + /** (Experimental) Approximate operation to return the sum within a timeout. */ def sumApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.sumApprox(timeout) } diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index ccc511dc5f..a6518abf45 100644 --- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java +package org.apache.spark.api.java import java.util.{List => JList} import java.util.Comparator @@ -23,23 +23,25 @@ import java.util.Comparator import scala.Tuple2 import scala.collection.JavaConversions._ +import com.google.common.base.Optional import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.OutputFormat import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.conf.Configuration -import spark.api.java.function.{Function2 => JFunction2} -import spark.api.java.function.{Function => JFunction} -import spark.partial.BoundedDouble -import spark.partial.PartialResult -import spark.OrderedRDDFunctions -import spark.storage.StorageLevel -import spark.HashPartitioner -import spark.Partitioner -import spark.Partitioner._ -import spark.RDD -import spark.SparkContext.rddToPairRDDFunctions +import org.apache.spark.HashPartitioner +import org.apache.spark.Partitioner +import org.apache.spark.Partitioner._ +import org.apache.spark.SparkContext.rddToPairRDDFunctions +import org.apache.spark.api.java.function.{Function2 => JFunction2} +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.partial.BoundedDouble +import org.apache.spark.partial.PartialResult +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.OrderedRDDFunctions +import org.apache.spark.storage.StorageLevel + class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManifest[K], implicit val vManifest: ClassManifest[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] { @@ -252,11 +254,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif fromRDD(rdd.subtract(other, p)) /** - * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine` - * is true, Spark will group values of the same key together on the map side before the - * repartitioning, to only send each key over the network once. If a large number of - * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should - * be set to true. + * Return a copy of the RDD partitioned using the specified partitioner. */ def partitionBy(partitioner: Partitioner): JavaPairRDD[K, V] = fromRDD(rdd.partitionBy(partitioner)) @@ -276,8 +274,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * partition the output RDD. */ def leftOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner) - : JavaPairRDD[K, (V, Option[W])] = - fromRDD(rdd.leftOuterJoin(other, partitioner)) + : JavaPairRDD[K, (V, Optional[W])] = { + val joinResult = rdd.leftOuterJoin(other, partitioner) + fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}) + } /** * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the @@ -286,8 +286,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * partition the output RDD. */ def rightOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner) - : JavaPairRDD[K, (Option[V], W)] = - fromRDD(rdd.rightOuterJoin(other, partitioner)) + : JavaPairRDD[K, (Optional[V], W)] = { + val joinResult = rdd.rightOuterJoin(other, partitioner) + fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}) + } /** * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing @@ -340,8 +342,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output * using the existing partitioner/parallelism level. */ - def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Option[W])] = - fromRDD(rdd.leftOuterJoin(other)) + def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Optional[W])] = { + val joinResult = rdd.leftOuterJoin(other) + fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}) + } /** * Perform a left outer join of `this` and `other`. For each element (k, v) in `this`, the @@ -349,8 +353,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output * into `numPartitions` partitions. */ - def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Option[W])] = - fromRDD(rdd.leftOuterJoin(other, numPartitions)) + def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Optional[W])] = { + val joinResult = rdd.leftOuterJoin(other, numPartitions) + fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}) + } /** * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the @@ -358,8 +364,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting * RDD using the existing partitioner/parallelism level. */ - def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Option[V], W)] = - fromRDD(rdd.rightOuterJoin(other)) + def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], W)] = { + val joinResult = rdd.rightOuterJoin(other) + fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}) + } /** * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the @@ -367,8 +375,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting * RDD into the given number of partitions. */ - def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Option[V], W)] = - fromRDD(rdd.rightOuterJoin(other, numPartitions)) + def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Optional[V], W)] = { + val joinResult = rdd.rightOuterJoin(other, numPartitions) + fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}) + } /** * Return the key-value pairs in this RDD to the master as a Map. @@ -554,7 +564,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif override def compare(b: K) = comp.compare(a, b) } implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x) - fromRDD(new OrderedRDDFunctions(rdd).sortByKey(ascending)) + fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending)) } /** diff --git a/core/src/main/scala/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala index c0bf2cf568..eec58abdd6 100644 --- a/core/src/main/scala/spark/api/java/JavaRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package spark.api.java +package org.apache.spark.api.java -import spark._ -import spark.api.java.function.{Function => JFunction} -import spark.storage.StorageLevel +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.storage.StorageLevel class JavaRDD[T](val rdd: RDD[T])(implicit val classManifest: ClassManifest[T]) extends JavaRDDLike[T, JavaRDD[T]] { diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 21b5abf053..7e6e691f11 100644 --- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -15,19 +15,21 @@ * limitations under the License. */ -package spark.api.java +package org.apache.spark.api.java import java.util.{List => JList, Comparator} import scala.Tuple2 import scala.collection.JavaConversions._ -import org.apache.hadoop.io.compress.CompressionCodec -import spark.{SparkContext, Partition, RDD, TaskContext} -import spark.api.java.JavaPairRDD._ -import spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _} -import spark.partial.{PartialResult, BoundedDouble} -import spark.storage.StorageLevel import com.google.common.base.Optional +import org.apache.hadoop.io.compress.CompressionCodec + +import org.apache.spark.{SparkContext, Partition, TaskContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.api.java.JavaPairRDD._ +import org.apache.spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _} +import org.apache.spark.partial.{PartialResult, BoundedDouble} +import org.apache.spark.storage.StorageLevel trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { @@ -40,7 +42,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** Set of partitions in this RDD. */ def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) - /** The [[spark.SparkContext]] that this RDD was created on. */ + /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */ def context: SparkContext = rdd.context /** A unique ID for this RDD (within its SparkContext). */ @@ -207,12 +209,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * of elements in each partition. */ def zipPartitions[U, V]( - f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V], - other: JavaRDDLike[U, _]): JavaRDD[V] = { + other: JavaRDDLike[U, _], + f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V]): JavaRDD[V] = { def fn = (x: Iterator[T], y: Iterator[U]) => asScalaIterator( f.apply(asJavaIterator(x), asJavaIterator(y)).iterator()) JavaRDD.fromRDD( - rdd.zipPartitions(fn, other.rdd)(other.classManifest, f.elementType()))(f.elementType()) + rdd.zipPartitions(other.rdd)(fn)(other.classManifest, f.elementType()))(f.elementType()) } // Actions (launch a job to return a value to the user program) @@ -366,10 +368,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Gets the name of the file to which this RDD was checkpointed */ def getCheckpointFile(): Optional[String] = { - rdd.getCheckpointFile match { - case Some(file) => Optional.of(file) - case _ => Optional.absent() - } + JavaUtils.optionToOptional(rdd.getCheckpointFile) } /** A description of this RDD and its recursive dependencies for debugging. */ diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index fe182e7ab6..8869e072bf 100644 --- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java +package org.apache.spark.api.java import java.util.{Map => JMap} @@ -26,14 +26,16 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.InputFormat import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} +import com.google.common.base.Optional -import spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, RDD, SparkContext} -import spark.SparkContext.IntAccumulatorParam -import spark.SparkContext.DoubleAccumulatorParam -import spark.broadcast.Broadcast +import org.apache.spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, SparkContext} +import org.apache.spark.SparkContext.IntAccumulatorParam +import org.apache.spark.SparkContext.DoubleAccumulatorParam +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.rdd.RDD /** - * A Java-friendly version of [[spark.SparkContext]] that returns [[spark.api.java.JavaRDD]]s and + * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns [[org.apache.spark.api.java.JavaRDD]]s and * works with Java collections instead of Scala ones. */ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround { @@ -281,48 +283,48 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork } /** - * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values * to using the `add` method. Only the master can access the accumulator's `value`. */ def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] = sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]] /** - * Create an [[spark.Accumulator]] double variable, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values * to using the `add` method. Only the master can access the accumulator's `value`. */ def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] = sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]] /** - * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values * to using the `add` method. Only the master can access the accumulator's `value`. */ def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue) /** - * Create an [[spark.Accumulator]] double variable, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values * to using the `add` method. Only the master can access the accumulator's `value`. */ def accumulator(initialValue: Double): Accumulator[java.lang.Double] = doubleAccumulator(initialValue) /** - * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values + * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values * to using the `add` method. Only the master can access the accumulator's `value`. */ def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] = sc.accumulator(initialValue)(accumulatorParam) /** - * Create an [[spark.Accumulable]] shared variable of the given type, to which tasks can + * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks can * "add" values with `add`. Only the master can access the accumuable's `value`. */ def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] = sc.accumulable(initialValue)(param) /** - * Broadcast a read-only variable to the cluster, returning a [[spark.Broadcast]] object for + * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.Broadcast]] object for * reading it in distributed functions. The variable will be sent to each cluster only once. */ def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value) @@ -337,7 +339,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * or the spark.home Java property, or the SPARK_HOME environment variable * (in that order of preference). If neither of these is set, return None. */ - def getSparkHome(): Option[String] = sc.getSparkHome() + def getSparkHome(): Optional[String] = JavaUtils.optionToOptional(sc.getSparkHome()) /** * Add a file to be downloaded with this Spark job on every node. diff --git a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java index 42b1de01b1..c9cbce5624 100644 --- a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java; +package org.apache.spark.api.java; import java.util.Arrays; import java.util.ArrayList; diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala new file mode 100644 index 0000000000..ecbf18849a --- /dev/null +++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java + +import com.google.common.base.Optional + +object JavaUtils { + def optionToOptional[T](option: Option[T]): Optional[T] = + option match { + case Some(value) => Optional.of(value) + case None => Optional.absent() + } +} diff --git a/core/src/main/scala/spark/api/java/StorageLevels.java b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java index f385636e83..0744269773 100644 --- a/core/src/main/scala/spark/api/java/StorageLevels.java +++ b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.api.java; +package org.apache.spark.api.java; -import spark.storage.StorageLevel; +import org.apache.spark.storage.StorageLevel; /** * Expose some commonly useful storage level constants. diff --git a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java index 8bc88d757f..4830067f7a 100644 --- a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.runtime.AbstractFunction1; diff --git a/core/src/main/scala/spark/api/java/function/DoubleFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java index 1aa1e5dae0..db34cd190a 100644 --- a/core/src/main/scala/spark/api/java/function/DoubleFunction.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.runtime.AbstractFunction1; diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala index 9eb0cfe3f9..158539a846 100644 --- a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala +++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function +package org.apache.spark.api.java.function /** * A function that returns zero or more output records from each input record. diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala index dda98710c2..5ef6a814f5 100644 --- a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala +++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function +package org.apache.spark.api.java.function /** * A function that takes two inputs and returns zero or more output records. diff --git a/core/src/main/scala/spark/api/java/function/Function.java b/core/src/main/scala/org/apache/spark/api/java/function/Function.java index 2a2ea0aacf..b9070cfd83 100644 --- a/core/src/main/scala/spark/api/java/function/Function.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/Function.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.reflect.ClassManifest; import scala.reflect.ClassManifest$; diff --git a/core/src/main/scala/spark/api/java/function/Function2.java b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java index 952d31ece4..d4c9154869 100644 --- a/core/src/main/scala/spark/api/java/function/Function2.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.reflect.ClassManifest; import scala.reflect.ClassManifest$; diff --git a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java index 4aad602da3..c0e5544b7d 100644 --- a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.Tuple2; import scala.reflect.ClassManifest; diff --git a/core/src/main/scala/spark/api/java/function/PairFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java index ccfe64ecf1..40480fe8e8 100644 --- a/core/src/main/scala/spark/api/java/function/PairFunction.java +++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function; +package org.apache.spark.api.java.function; import scala.Tuple2; import scala.reflect.ClassManifest; diff --git a/core/src/main/scala/spark/api/java/function/VoidFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala index f6fc0b0f7d..ea94313a4a 100644 --- a/core/src/main/scala/spark/api/java/function/VoidFunction.scala +++ b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function +package org.apache.spark.api.java.function /** * A function with no return value. diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala index 1758a38c4e..cfe694f65d 100644 --- a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala +++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function +package org.apache.spark.api.java.function import scala.runtime.AbstractFunction1 diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala index b093567d2c..eb9277c6fb 100644 --- a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala +++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.java.function +package org.apache.spark.api.java.function import scala.runtime.AbstractFunction2 diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala index 31a719fbff..b090c6edf3 100644 --- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.api.python - -import spark.Partitioner +package org.apache.spark.api.python +import org.apache.spark.Partitioner import java.util.Arrays +import org.apache.spark.util.Utils /** - * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API. + * A [[org.apache.spark.Partitioner]] that performs handling of byte arrays, for use by the Python API. * * Stores the unique id() of the Python-side partitioning function so that it is incorporated into * equality comparisons. Correctness requires that the id is a unique identifier for the @@ -35,25 +35,10 @@ private[spark] class PythonPartitioner( val pyPartitionFunctionId: Long) extends Partitioner { - override def getPartition(key: Any): Int = { - if (key == null) { - return 0 - } - else { - val hashCode = { - if (key.isInstanceOf[Array[Byte]]) { - Arrays.hashCode(key.asInstanceOf[Array[Byte]]) - } else { - key.hashCode() - } - } - val mod = hashCode % numPartitions - if (mod < 0) { - mod + numPartitions - } else { - mod // Guard against negative hash codes - } - } + override def getPartition(key: Any): Int = key match { + case null => 0 + case key: Array[Byte] => Utils.nonNegativeMod(Arrays.hashCode(key), numPartitions) + case _ => Utils.nonNegativeMod(key.hashCode(), numPartitions) } override def equals(other: Any): Boolean = other match { diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index af10822dbd..ccd3833964 100644 --- a/core/src/main/scala/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.api.python +package org.apache.spark.api.python import java.io._ import java.net._ @@ -23,16 +23,19 @@ import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collectio import scala.collection.JavaConversions._ -import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD} -import spark.broadcast.Broadcast -import spark._ -import spark.rdd.PipedRDD +import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD} +import org.apache.spark.broadcast.Broadcast +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.PipedRDD +import org.apache.spark.util.Utils private[spark] class PythonRDD[T: ClassManifest]( parent: RDD[T], command: Seq[String], envVars: JMap[String, String], + pythonIncludes: JList[String], preservePartitoning: Boolean, pythonExec: String, broadcastVars: JList[Broadcast[Array[Byte]]], @@ -44,10 +47,11 @@ private[spark] class PythonRDD[T: ClassManifest]( // Similar to Runtime.exec(), if we are given a single string, split it into words // using a standard StringTokenizer (i.e. by spaces) def this(parent: RDD[T], command: String, envVars: JMap[String, String], + pythonIncludes: JList[String], preservePartitoning: Boolean, pythonExec: String, broadcastVars: JList[Broadcast[Array[Byte]]], accumulator: Accumulator[JList[Array[Byte]]]) = - this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec, + this(parent, PipedRDD.tokenize(command), envVars, pythonIncludes, preservePartitoning, pythonExec, broadcastVars, accumulator) override def getPartitions = parent.partitions @@ -63,34 +67,47 @@ private[spark] class PythonRDD[T: ClassManifest]( // Start a thread to feed the process input from our parent's iterator new Thread("stdin writer for " + pythonExec) { override def run() { - SparkEnv.set(env) - val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize) - val dataOut = new DataOutputStream(stream) - val printOut = new PrintWriter(stream) - // Partition index - dataOut.writeInt(split.index) - // sparkFilesDir - PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut) - // Broadcast variables - dataOut.writeInt(broadcastVars.length) - for (broadcast <- broadcastVars) { - dataOut.writeLong(broadcast.id) - dataOut.writeInt(broadcast.value.length) - dataOut.write(broadcast.value) - } - dataOut.flush() - // Serialized user code - for (elem <- command) { - printOut.println(elem) - } - printOut.flush() - // Data values - for (elem <- parent.iterator(split, context)) { - PythonRDD.writeAsPickle(elem, dataOut) + try { + SparkEnv.set(env) + val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize) + val dataOut = new DataOutputStream(stream) + val printOut = new PrintWriter(stream) + // Partition index + dataOut.writeInt(split.index) + // sparkFilesDir + PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut) + // Broadcast variables + dataOut.writeInt(broadcastVars.length) + for (broadcast <- broadcastVars) { + dataOut.writeLong(broadcast.id) + dataOut.writeInt(broadcast.value.length) + dataOut.write(broadcast.value) + } + // Python includes (*.zip and *.egg files) + dataOut.writeInt(pythonIncludes.length) + for (f <- pythonIncludes) { + PythonRDD.writeAsPickle(f, dataOut) + } + dataOut.flush() + // Serialized user code + for (elem <- command) { + printOut.println(elem) + } + printOut.flush() + // Data values + for (elem <- parent.iterator(split, context)) { + PythonRDD.writeAsPickle(elem, dataOut) + } + dataOut.flush() + printOut.flush() + worker.shutdownOutput() + } catch { + case e: IOException => + // This can happen for legitimate reasons if the Python code stops returning data before we are done + // passing elements through, e.g., for take(). Just log a message to say it happened. + logInfo("stdin writer to Python finished early") + logDebug("stdin writer to Python finished early", e) } - dataOut.flush() - printOut.flush() - worker.shutdownOutput() } }.start() @@ -283,7 +300,7 @@ private object Pickle { val APPENDS: Byte = 'e' } -private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] { +private class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] { override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8") } @@ -297,7 +314,7 @@ class PythonAccumulatorParam(@transient serverHost: String, serverPort: Int) Utils.checkHost(serverHost, "Expected hostname") val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt - + override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList override def addInPlace(val1: JList[Array[Byte]], val2: JList[Array[Byte]]) diff --git a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala index 078ad45ce8..08e3f670f5 100644 --- a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.api.python +package org.apache.spark.api.python -import java.io.{DataInputStream, IOException} +import java.io.{File, DataInputStream, IOException} import java.net.{Socket, SocketException, InetAddress} import scala.collection.JavaConversions._ -import spark._ +import org.apache.spark._ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String]) extends Logging { @@ -67,6 +67,8 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/daemon.py")) val workerEnv = pb.environment() workerEnv.putAll(envVars) + val pythonPath = sparkHome + "/python/" + File.pathSeparator + workerEnv.get("PYTHONPATH") + workerEnv.put("PYTHONPATH", pythonPath) daemon = pb.start() // Redirect the stderr to ours diff --git a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala index 6f7d385379..93e7815ab5 100644 --- a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast import java.io._ import java.net._ @@ -25,8 +25,9 @@ import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.{ListBuffer, Map, Set} import scala.math -import spark._ -import spark.storage.StorageLevel +import org.apache.spark._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long) extends Broadcast[T](id) diff --git a/core/src/main/scala/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala index aba56a60ca..43c18294c5 100644 --- a/core/src/main/scala/spark/broadcast/Broadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast import java.io._ import java.util.concurrent.atomic.AtomicLong -import spark._ +import org.apache.spark._ abstract class Broadcast[T](private[spark] val id: Long) extends Serializable { def value: T @@ -28,7 +28,7 @@ abstract class Broadcast[T](private[spark] val id: Long) extends Serializable { // We cannot have an abstract readObject here due to some weird issues with // readObject having to be 'private' in sub-classes. - override def toString = "spark.Broadcast(" + id + ")" + override def toString = "Broadcast(" + id + ")" } private[spark] @@ -44,7 +44,7 @@ class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable synchronized { if (!initialized) { val broadcastFactoryClass = System.getProperty( - "spark.broadcast.factory", "spark.broadcast.HttpBroadcastFactory") + "spark.broadcast.factory", "org.apache.spark.broadcast.HttpBroadcastFactory") broadcastFactory = Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory] diff --git a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala index d33d95c7d9..68bff75b90 100644 --- a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast /** * An interface for all the broadcast implementations in Spark (to allow diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala index c565876950..9db26ae6de 100644 --- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala @@ -15,23 +15,22 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast -import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} - -import java.io._ -import java.net._ -import java.util.UUID +import java.io.{File, FileOutputStream, ObjectInputStream, OutputStream} +import java.net.URL import it.unimi.dsi.fastutil.io.FastBufferedInputStream import it.unimi.dsi.fastutil.io.FastBufferedOutputStream -import spark._ -import spark.storage.StorageLevel -import util.{MetadataCleaner, TimeStampedHashSet} +import org.apache.spark.{HttpServer, Logging, SparkEnv} +import org.apache.spark.io.CompressionCodec +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashSet} + private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long) -extends Broadcast[T](id) with Logging with Serializable { + extends Broadcast[T](id) with Logging with Serializable { def value = value_ @@ -85,6 +84,7 @@ private object HttpBroadcast extends Logging { private val files = new TimeStampedHashSet[String] private val cleaner = new MetadataCleaner("HttpBroadcast", cleanup) + private lazy val compressionCodec = CompressionCodec.createCodec() def initialize(isDriver: Boolean) { synchronized { @@ -122,10 +122,12 @@ private object HttpBroadcast extends Logging { def write(id: Long, value: Any) { val file = new File(broadcastDir, "broadcast-" + id) - val out: OutputStream = if (compress) { - new LZFOutputStream(new FileOutputStream(file)) // Does its own buffering - } else { - new FastBufferedOutputStream(new FileOutputStream(file), bufferSize) + val out: OutputStream = { + if (compress) { + compressionCodec.compressedOutputStream(new FileOutputStream(file)) + } else { + new FastBufferedOutputStream(new FileOutputStream(file), bufferSize) + } } val ser = SparkEnv.get.serializer.newInstance() val serOut = ser.serializeStream(out) @@ -136,10 +138,12 @@ private object HttpBroadcast extends Logging { def read[T](id: Long): T = { val url = serverUri + "/broadcast-" + id - var in = if (compress) { - new LZFInputStream(new URL(url).openStream()) // Does its own buffering - } else { - new FastBufferedInputStream(new URL(url).openStream(), bufferSize) + val in = { + if (compress) { + compressionCodec.compressedInputStream(new URL(url).openStream()) + } else { + new FastBufferedInputStream(new URL(url).openStream(), bufferSize) + } } val ser = SparkEnv.get.serializer.newInstance() val serIn = ser.deserializeStream(in) diff --git a/core/src/main/scala/spark/broadcast/MultiTracker.scala b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala index 7855d44e9b..21ec94659e 100644 --- a/core/src/main/scala/spark/broadcast/MultiTracker.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast import java.io._ import java.net._ @@ -23,7 +23,8 @@ import java.util.Random import scala.collection.mutable.Map -import spark._ +import org.apache.spark._ +import org.apache.spark.util.Utils private object MultiTracker extends Logging { diff --git a/core/src/main/scala/spark/broadcast/SourceInfo.scala b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala index b17ae63b5c..baa1fd6da4 100644 --- a/core/src/main/scala/spark/broadcast/SourceInfo.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast import java.util.BitSet -import spark._ +import org.apache.spark._ /** * Used to keep and pass around information of peers involved in a broadcast diff --git a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala index ea1e9a12c1..80c97ca073 100644 --- a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.broadcast +package org.apache.spark.broadcast import java.io._ import java.net._ @@ -24,8 +24,9 @@ import java.util.{Comparator, Random, UUID} import scala.collection.mutable.{ListBuffer, Map, Set} import scala.math -import spark._ -import spark.storage.StorageLevel +import org.apache.spark._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils private[spark] class TreeBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long) extends Broadcast[T](id) with Logging with Serializable { diff --git a/core/src/main/scala/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala index a8b22fbef8..19d393a0db 100644 --- a/core/src/main/scala/spark/deploy/ApplicationDescription.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy private[spark] class ApplicationDescription( val name: String, diff --git a/core/src/main/scala/spark/deploy/Command.scala b/core/src/main/scala/org/apache/spark/deploy/Command.scala index bad629e965..fa8af9a646 100644 --- a/core/src/main/scala/spark/deploy/Command.scala +++ b/core/src/main/scala/org/apache/spark/deploy/Command.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy import scala.collection.Map diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala new file mode 100644 index 0000000000..c31619db27 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy + +import scala.collection.immutable.List + +import org.apache.spark.deploy.ExecutorState.ExecutorState +import org.apache.spark.deploy.master.{WorkerInfo, ApplicationInfo} +import org.apache.spark.deploy.worker.ExecutorRunner +import org.apache.spark.util.Utils + + +private[deploy] sealed trait DeployMessage extends Serializable + +private[deploy] object DeployMessages { + + // Worker to Master + + case class RegisterWorker( + id: String, + host: String, + port: Int, + cores: Int, + memory: Int, + webUiPort: Int, + publicAddress: String) + extends DeployMessage { + Utils.checkHost(host, "Required hostname") + assert (port > 0) + } + + case class ExecutorStateChanged( + appId: String, + execId: Int, + state: ExecutorState, + message: Option[String], + exitStatus: Option[Int]) + extends DeployMessage + + case class Heartbeat(workerId: String) extends DeployMessage + + // Master to Worker + + case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage + + case class RegisterWorkerFailed(message: String) extends DeployMessage + + case class KillExecutor(appId: String, execId: Int) extends DeployMessage + + case class LaunchExecutor( + appId: String, + execId: Int, + appDesc: ApplicationDescription, + cores: Int, + memory: Int, + sparkHome: String) + extends DeployMessage + + // Client to Master + + case class RegisterApplication(appDescription: ApplicationDescription) + extends DeployMessage + + // Master to Client + + case class RegisteredApplication(appId: String) extends DeployMessage + + // TODO(matei): replace hostPort with host + case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) { + Utils.checkHostPort(hostPort, "Required hostport") + } + + case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String], + exitStatus: Option[Int]) + + case class ApplicationRemoved(message: String) + + // Internal message in Client + + case object StopClient + + // MasterWebUI To Master + + case object RequestMasterState + + // Master to MasterWebUI + + case class MasterStateResponse(host: String, port: Int, workers: Array[WorkerInfo], + activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) { + + Utils.checkHost(host, "Required hostname") + assert (port > 0) + + def uri = "spark://" + host + ":" + port + } + + // WorkerWebUI to Worker + + case object RequestWorkerState + + // Worker to WorkerWebUI + + case class WorkerStateResponse(host: String, port: Int, workerId: String, + executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner], masterUrl: String, + cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) { + + Utils.checkHost(host, "Required hostname") + assert (port > 0) + } + + // Actor System to Master + + case object CheckForWorkerTimeOut + +} diff --git a/core/src/main/scala/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala index 08c9a3b725..fcfea96ad6 100644 --- a/core/src/main/scala/spark/deploy/ExecutorState.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy private[spark] object ExecutorState extends Enumeration("LAUNCHING", "LOADING", "RUNNING", "KILLED", "FAILED", "LOST") { diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index 64f89623e1..a6be8efef1 100644 --- a/core/src/main/scala/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -15,11 +15,14 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy -import master.{ApplicationInfo, WorkerInfo} import net.liftweb.json.JsonDSL._ -import worker.ExecutorRunner + +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} +import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo} +import org.apache.spark.deploy.worker.ExecutorRunner + private[spark] object JsonProtocol { def writeWorkerInfo(obj: WorkerInfo) = { @@ -30,7 +33,8 @@ private[spark] object JsonProtocol { ("cores" -> obj.cores) ~ ("coresused" -> obj.coresUsed) ~ ("memory" -> obj.memory) ~ - ("memoryused" -> obj.memoryUsed) + ("memoryused" -> obj.memoryUsed) ~ + ("state" -> obj.state.toString) } def writeApplicationInfo(obj: ApplicationInfo) = { @@ -57,7 +61,7 @@ private[spark] object JsonProtocol { ("appdesc" -> writeApplicationDescription(obj.appDesc)) } - def writeMasterState(obj: MasterState) = { + def writeMasterState(obj: MasterStateResponse) = { ("url" -> ("spark://" + obj.uri)) ~ ("workers" -> obj.workers.toList.map(writeWorkerInfo)) ~ ("cores" -> obj.workers.map(_.cores).sum) ~ @@ -68,7 +72,7 @@ private[spark] object JsonProtocol { ("completedapps" -> obj.completedApps.toList.map(writeApplicationInfo)) } - def writeWorkerState(obj: WorkerState) = { + def writeWorkerState(obj: WorkerStateResponse) = { ("id" -> obj.workerId) ~ ("masterurl" -> obj.masterUrl) ~ ("masterwebuiurl" -> obj.masterWebUiUrl) ~ diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala index 6b8e9f27af..78e3747ad8 100644 --- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala +++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated} -import spark.deploy.worker.Worker -import spark.deploy.master.Master -import spark.util.AkkaUtils -import spark.{Logging, Utils} +import org.apache.spark.deploy.worker.Worker +import org.apache.spark.deploy.master.Master +import org.apache.spark.util.{Utils, AkkaUtils} +import org.apache.spark.{Logging} import scala.collection.mutable.ArrayBuffer diff --git a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index df55be1254..0a5f4c368f 100644 --- a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.JobConf @@ -23,22 +23,14 @@ import org.apache.hadoop.mapred.JobConf /** * Contains util methods to interact with Hadoop from spark. */ -object SparkHadoopUtil { - - def getUserNameFromEnvironment(): String = { - // defaulting to -D ... - System.getProperty("user.name") - } - - def runAsUser(func: (Product) => Unit, args: Product) { - - // Add support, if exists - for now, simply run func ! - func(args) - } +class SparkHadoopUtil { // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems def newConfiguration(): Configuration = new Configuration() // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster def addCredentials(conf: JobConf) {} + + def isYarnMode(): Boolean = { false } + } diff --git a/core/src/main/scala/spark/deploy/WebUI.scala b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala index 8ea7792ef4..ae258b58b9 100644 --- a/core/src/main/scala/spark/deploy/WebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy import java.text.SimpleDateFormat import java.util.Date diff --git a/core/src/main/scala/spark/deploy/client/Client.scala b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala index edefa0292d..a342dd724a 100644 --- a/core/src/main/scala/spark/deploy/client/Client.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala @@ -15,23 +15,25 @@ * limitations under the License. */ -package spark.deploy.client +package org.apache.spark.deploy.client + +import java.util.concurrent.TimeoutException -import spark.deploy._ import akka.actor._ +import akka.actor.Terminated import akka.pattern.ask import akka.util.Duration -import akka.util.duration._ -import akka.pattern.AskTimeoutException -import spark.{SparkException, Logging} +import akka.remote.RemoteClientDisconnected import akka.remote.RemoteClientLifeCycleEvent import akka.remote.RemoteClientShutdown -import spark.deploy.RegisterApplication -import spark.deploy.master.Master -import akka.remote.RemoteClientDisconnected -import akka.actor.Terminated import akka.dispatch.Await +import org.apache.spark.Logging +import org.apache.spark.deploy.{ApplicationDescription, ExecutorState} +import org.apache.spark.deploy.DeployMessages._ +import org.apache.spark.deploy.master.Master + + /** * The main class used to talk to a Spark deploy cluster. Takes a master URL, an app description, * and a listener for cluster events, and calls back the listener when various events occur. @@ -134,7 +136,8 @@ private[spark] class Client( val future = actor.ask(StopClient)(timeout) Await.result(future, timeout) } catch { - case e: AskTimeoutException => // Ignore it, maybe master went away + case e: TimeoutException => + logInfo("Stop request to Master timed out; it may already be shut down.") } actor = null } diff --git a/core/src/main/scala/spark/deploy/client/ClientListener.scala b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala index 064024455e..4605368c11 100644 --- a/core/src/main/scala/spark/deploy/client/ClientListener.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.client +package org.apache.spark.deploy.client /** * Callbacks invoked by deploy client when various events happen. There are currently four events: diff --git a/core/src/main/scala/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala index 4f4daa141a..d5e9a0e095 100644 --- a/core/src/main/scala/spark/deploy/client/TestClient.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.deploy.client +package org.apache.spark.deploy.client -import spark.util.AkkaUtils -import spark.{Logging, Utils} -import spark.deploy.{Command, ApplicationDescription} +import org.apache.spark.util.{Utils, AkkaUtils} +import org.apache.spark.{Logging} +import org.apache.spark.deploy.{Command, ApplicationDescription} private[spark] object TestClient { diff --git a/core/src/main/scala/spark/deploy/client/TestExecutor.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala index 8a22b6b89f..c5ac45c673 100644 --- a/core/src/main/scala/spark/deploy/client/TestExecutor.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.client +package org.apache.spark.deploy.client private[spark] object TestExecutor { def main(args: Array[String]) { diff --git a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala index 15ff919738..bd5327627a 100644 --- a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master -import spark.deploy.ApplicationDescription +import org.apache.spark.deploy.ApplicationDescription import java.util.Date import akka.actor.ActorRef import scala.collection.mutable @@ -34,6 +34,7 @@ private[spark] class ApplicationInfo( var executors = new mutable.HashMap[Int, ExecutorInfo] var coresGranted = 0 var endTime = -1L + val appSource = new ApplicationSource(this) private var nextExecutorId = 0 @@ -51,8 +52,10 @@ private[spark] class ApplicationInfo( } def removeExecutor(exec: ExecutorInfo) { - executors -= exec.id - coresGranted -= exec.cores + if (executors.contains(exec.id)) { + executors -= exec.id + coresGranted -= exec.cores + } } def coresLeft: Int = desc.maxCores - coresGranted diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala new file mode 100644 index 0000000000..2d75ad5a2c --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala @@ -0,0 +1,24 @@ +package org.apache.spark.deploy.master + +import com.codahale.metrics.{Gauge, MetricRegistry} + +import org.apache.spark.metrics.source.Source + +class ApplicationSource(val application: ApplicationInfo) extends Source { + val metricRegistry = new MetricRegistry() + val sourceName = "%s.%s.%s".format("application", application.desc.name, + System.currentTimeMillis()) + + metricRegistry.register(MetricRegistry.name("status"), new Gauge[String] { + override def getValue: String = application.state.toString + }) + + metricRegistry.register(MetricRegistry.name("runtime_ms"), new Gauge[Long] { + override def getValue: Long = application.duration + }) + + metricRegistry.register(MetricRegistry.name("cores", "number"), new Gauge[Int] { + override def getValue: Int = application.coresGranted + }) + +} diff --git a/core/src/main/scala/spark/deploy/master/ApplicationState.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala index 94f0ad8bae..7e804223cf 100644 --- a/core/src/main/scala/spark/deploy/master/ApplicationState.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master private[spark] object ApplicationState extends Enumeration("WAITING", "RUNNING", "FINISHED", "FAILED") { diff --git a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala index 99b60f7d09..cf384a985e 100644 --- a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master -import spark.deploy.ExecutorState +import org.apache.spark.deploy.ExecutorState private[spark] class ExecutorInfo( val id: Int, diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index e5a7a87e2e..7cf0a7754f 100644 --- a/core/src/main/scala/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -15,28 +15,32 @@ * limitations under the License. */ -package spark.deploy.master - -import akka.actor._ -import akka.actor.Terminated -import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown} -import akka.util.duration._ +package org.apache.spark.deploy.master import java.text.SimpleDateFormat import java.util.Date import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} -import spark.deploy._ -import spark.{Logging, SparkException, Utils} -import spark.util.AkkaUtils -import ui.MasterWebUI +import akka.actor._ +import akka.actor.Terminated +import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown} +import akka.util.duration._ + +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.deploy.{ApplicationDescription, ExecutorState} +import org.apache.spark.deploy.DeployMessages._ +import org.apache.spark.deploy.master.ui.MasterWebUI +import org.apache.spark.metrics.MetricsSystem +import org.apache.spark.util.{Utils, AkkaUtils} private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Actor with Logging { val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs val WORKER_TIMEOUT = System.getProperty("spark.worker.timeout", "60").toLong * 1000 - + val RETAINED_APPLICATIONS = System.getProperty("spark.deploy.retainedApplications", "200").toInt + val REAPER_ITERATIONS = System.getProperty("spark.dead.worker.persistence", "15").toInt + var nextAppNumber = 0 val workers = new HashSet[WorkerInfo] val idToWorker = new HashMap[String, WorkerInfo] @@ -53,10 +57,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act var firstApp: Option[ApplicationInfo] = None - val webUi = new MasterWebUI(self) - Utils.checkHost(host, "Expected hostname") + val masterMetricsSystem = MetricsSystem.createMetricsSystem("master") + val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications") + val masterSource = new MasterSource(this) + + val webUi = new MasterWebUI(this, webUiPort) + val masterPublicAddress = { val envVar = System.getenv("SPARK_PUBLIC_DNS") if (envVar != null) envVar else host @@ -72,17 +80,23 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act // Listen for remote client disconnection events, since they don't go through Akka's watch() context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent]) webUi.start() - context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis)(timeOutDeadWorkers()) + context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut) + + masterMetricsSystem.registerSource(masterSource) + masterMetricsSystem.start() + applicationMetricsSystem.start() } override def postStop() { webUi.stop() + masterMetricsSystem.stop() + applicationMetricsSystem.stop() } override def receive = { case RegisterWorker(id, host, workerPort, cores, memory, worker_webUiPort, publicAddress) => { logInfo("Registering worker %s:%d with %d cores, %s RAM".format( - host, workerPort, cores, Utils.memoryMegabytesToString(memory))) + host, workerPort, cores, Utils.megabytesToString(memory))) if (idToWorker.contains(id)) { sender ! RegisterWorkerFailed("Duplicate worker ID") } else { @@ -160,7 +174,11 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act } case RequestMasterState => { - sender ! MasterState(host, port, workers.toArray, apps.toArray, completedApps.toArray) + sender ! MasterStateResponse(host, port, workers.toArray, apps.toArray, completedApps.toArray) + } + + case CheckForWorkerTimeOut => { + timeOutDeadWorkers() } } @@ -225,20 +243,27 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: String) { logInfo("Launching executor " + exec.fullId + " on worker " + worker.id) worker.addExecutor(exec) - worker.actor ! LaunchExecutor(exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome) - exec.application.driver ! ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory) + worker.actor ! LaunchExecutor( + exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome) + exec.application.driver ! ExecutorAdded( + exec.id, worker.id, worker.hostPort, exec.cores, exec.memory) } def addWorker(id: String, host: String, port: Int, cores: Int, memory: Int, webUiPort: Int, publicAddress: String): WorkerInfo = { - // There may be one or more refs to dead workers on this same node (w/ different ID's), remove them. - workers.filter(w => (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)).foreach(workers -= _) + // There may be one or more refs to dead workers on this same node (w/ different ID's), + // remove them. + workers.filter { w => + (w.host == host && w.port == port) && (w.state == WorkerState.DEAD) + }.foreach { w => + workers -= w + } val worker = new WorkerInfo(id, host, port, cores, memory, sender, webUiPort, publicAddress) workers += worker idToWorker(worker.id) = worker actorToWorker(sender) = worker addressToWorker(sender.path.address) = worker - return worker + worker } def removeWorker(worker: WorkerInfo) { @@ -249,7 +274,8 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act addressToWorker -= worker.actor.path.address for (exec <- worker.executors.values) { logInfo("Telling app of lost executor: " + exec.id) - exec.application.driver ! ExecutorUpdated(exec.id, ExecutorState.LOST, Some("worker lost"), None) + exec.application.driver ! ExecutorUpdated( + exec.id, ExecutorState.LOST, Some("worker lost"), None) exec.application.removeExecutor(exec) } } @@ -258,6 +284,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act val now = System.currentTimeMillis() val date = new Date(now) val app = new ApplicationInfo(now, newApplicationId(date), desc, date, driver, desc.appUiUrl) + applicationMetricsSystem.registerSource(app.appSource) apps += app idToApp(app.id) = app actorToApp(driver) = app @@ -269,7 +296,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act if (workersAlive.size > 0 && !workersAlive.exists(_.memoryFree >= desc.memoryPerSlave)) { logWarning("Could not find any workers with enough memory for " + firstApp.get.id) } - return app + app } def finishApplication(app: ApplicationInfo) { @@ -283,7 +310,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act idToApp -= app.id actorToApp -= app.driver addressToApp -= app.driver.path.address - completedApps += app // Remember it in our history + if (completedApps.size >= RETAINED_APPLICATIONS) { + val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1) + completedApps.take(toRemove).foreach( a => { + applicationMetricsSystem.removeSource(a.appSource) + }) + completedApps.trimStart(toRemove) + } + completedApps += app // Remember it in our history waitingApps -= app for (exec <- app.executors.values) { exec.worker.removeExecutor(exec) @@ -308,12 +342,17 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act /** Check for, and remove, any timed-out workers */ def timeOutDeadWorkers() { // Copy the workers into an array so we don't modify the hashset while iterating through it - val expirationTime = System.currentTimeMillis() - WORKER_TIMEOUT - val toRemove = workers.filter(_.lastHeartbeat < expirationTime).toArray + val currentTime = System.currentTimeMillis() + val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT).toArray for (worker <- toRemove) { - logWarning("Removing %s because we got no heartbeat in %d seconds".format( - worker.id, WORKER_TIMEOUT)) - removeWorker(worker) + if (worker.state != WorkerState.DEAD) { + logWarning("Removing %s because we got no heartbeat in %d seconds".format( + worker.id, WORKER_TIMEOUT/1000)) + removeWorker(worker) + } else { + if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT)) + workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it + } } } } diff --git a/core/src/main/scala/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala index d0ec3d5ea0..9d89b455fb 100644 --- a/core/src/main/scala/spark/deploy/master/MasterArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala @@ -15,10 +15,9 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master -import spark.util.IntParam -import spark.Utils +import org.apache.spark.util.{Utils, IntParam} /** * Command-line parser for the master. @@ -38,7 +37,10 @@ private[spark] class MasterArguments(args: Array[String]) { if (System.getenv("SPARK_MASTER_WEBUI_PORT") != null) { webUiPort = System.getenv("SPARK_MASTER_WEBUI_PORT").toInt } - + if (System.getProperty("master.ui.port") != null) { + webUiPort = System.getProperty("master.ui.port").toInt + } + parse(args.toList) def parse(args: List[String]): Unit = args match { diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala new file mode 100644 index 0000000000..8dd0a42f71 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala @@ -0,0 +1,25 @@ +package org.apache.spark.deploy.master + +import com.codahale.metrics.{Gauge, MetricRegistry} + +import org.apache.spark.metrics.source.Source + +private[spark] class MasterSource(val master: Master) extends Source { + val metricRegistry = new MetricRegistry() + val sourceName = "master" + + // Gauge for worker numbers in cluster + metricRegistry.register(MetricRegistry.name("workers","number"), new Gauge[Int] { + override def getValue: Int = master.workers.size + }) + + // Gauge for application numbers in cluster + metricRegistry.register(MetricRegistry.name("apps", "number"), new Gauge[Int] { + override def getValue: Int = master.apps.size + }) + + // Gauge for waiting application numbers in cluster + metricRegistry.register(MetricRegistry.name("waitingApps", "number"), new Gauge[Int] { + override def getValue: Int = master.waitingApps.size + }) +} diff --git a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala index 4135cfeb28..6219f11f2a 100644 --- a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master import akka.actor.ActorRef import scala.collection.mutable -import spark.Utils +import org.apache.spark.util.Utils private[spark] class WorkerInfo( val id: String, diff --git a/core/src/main/scala/spark/deploy/master/WorkerState.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala index 3e50b7748d..b5ee6dca79 100644 --- a/core/src/main/scala/spark/deploy/master/WorkerState.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.master +package org.apache.spark.deploy.master private[spark] object WorkerState extends Enumeration("ALIVE", "DEAD", "DECOMMISSIONED") { type WorkerState = Value diff --git a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 32264af393..f4e574d15d 100644 --- a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -15,7 +15,9 @@ * limitations under the License. */ -package spark.deploy.master.ui +package org.apache.spark.deploy.master.ui + +import scala.xml.Node import akka.dispatch.Await import akka.pattern.ask @@ -25,20 +27,20 @@ import javax.servlet.http.HttpServletRequest import net.liftweb.json.JsonAST.JValue -import scala.xml.Node - -import spark.deploy.{RequestMasterState, JsonProtocol, MasterState} -import spark.deploy.master.ExecutorInfo -import spark.ui.UIUtils +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} +import org.apache.spark.deploy.JsonProtocol +import org.apache.spark.deploy.master.ExecutorInfo +import org.apache.spark.ui.UIUtils +import org.apache.spark.util.Utils private[spark] class ApplicationPage(parent: MasterWebUI) { - val master = parent.master + val master = parent.masterActorRef implicit val timeout = parent.timeout /** Executor details for a particular application */ def renderJson(request: HttpServletRequest): JValue = { val appId = request.getParameter("appId") - val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState] + val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] val state = Await.result(stateFuture, 30 seconds) val app = state.activeApps.find(_.id == appId).getOrElse({ state.completedApps.find(_.id == appId).getOrElse(null) @@ -49,7 +51,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) { /** Executor details for a particular application */ def render(request: HttpServletRequest): Seq[Node] = { val appId = request.getParameter("appId") - val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState] + val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] val state = Await.result(stateFuture, 30 seconds) val app = state.activeApps.find(_.id == appId).getOrElse({ state.completedApps.find(_.id == appId).getOrElse(null) @@ -60,24 +62,26 @@ private[spark] class ApplicationPage(parent: MasterWebUI) { val executorTable = UIUtils.listingTable(executorHeaders, executorRow, executors) val content = - <hr /> - <div class="row"> + <div class="row-fluid"> <div class="span12"> <ul class="unstyled"> <li><strong>ID:</strong> {app.id}</li> - <li><strong>Description:</strong> {app.desc.name}</li> + <li><strong>Name:</strong> {app.desc.name}</li> <li><strong>User:</strong> {app.desc.user}</li> <li><strong>Cores:</strong> { if (app.desc.maxCores == Integer.MAX_VALUE) { - "Unlimited %s granted".format(app.coresGranted) + "Unlimited (%s granted)".format(app.coresGranted) } else { "%s (%s granted, %s left)".format( app.desc.maxCores, app.coresGranted, app.coresLeft) } } </li> - <li><strong>Memory per Slave:</strong> {app.desc.memoryPerSlave}</li> + <li> + <strong>Executor Memory:</strong> + {Utils.megabytesToString(app.desc.memoryPerSlave)} + </li> <li><strong>Submit Date:</strong> {app.submitDate}</li> <li><strong>State:</strong> {app.state}</li> <li><strong><a href={app.appUiUrl}>Application Detail UI</a></strong></li> @@ -85,16 +89,13 @@ private[spark] class ApplicationPage(parent: MasterWebUI) { </div> </div> - <hr/> - - <div class="row"> <!-- Executors --> + <div class="row-fluid"> <!-- Executors --> <div class="span12"> - <h3> Executor Summary </h3> - <br/> + <h4> Executor Summary </h4> {executorTable} </div> </div>; - UIUtils.basicSparkPage(content, "Application Info: " + app.desc.name) + UIUtils.basicSparkPage(content, "Application: " + app.desc.name) } def executorRow(executor: ExecutorInfo): Seq[Node] = { diff --git a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala index b05197c1b9..d7a57229b0 100644 --- a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala @@ -15,35 +15,45 @@ * limitations under the License. */ -package spark.deploy.master.ui +package org.apache.spark.deploy.master.ui + +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node import akka.dispatch.Await import akka.pattern.ask import akka.util.duration._ -import javax.servlet.http.HttpServletRequest - -import scala.xml.Node +import net.liftweb.json.JsonAST.JValue -import spark.deploy.{RequestMasterState, DeployWebUI, MasterState} -import spark.Utils -import spark.ui.UIUtils -import spark.deploy.master.{ApplicationInfo, WorkerInfo} +import org.apache.spark.deploy.DeployWebUI +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} +import org.apache.spark.deploy.JsonProtocol +import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo} +import org.apache.spark.ui.UIUtils +import org.apache.spark.util.Utils private[spark] class IndexPage(parent: MasterWebUI) { - val master = parent.master + val master = parent.masterActorRef implicit val timeout = parent.timeout + def renderJson(request: HttpServletRequest): JValue = { + val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] + val state = Await.result(stateFuture, 30 seconds) + JsonProtocol.writeMasterState(state) + } + /** Index view listing applications and executors */ def render(request: HttpServletRequest): Seq[Node] = { - val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState] + val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] val state = Await.result(stateFuture, 30 seconds) val workerHeaders = Seq("Id", "Address", "State", "Cores", "Memory") val workers = state.workers.sortBy(_.id) val workerTable = UIUtils.listingTable(workerHeaders, workerRow, workers) - val appHeaders = Seq("ID", "Description", "Cores", "Memory per Node", "Submit Time", "User", + val appHeaders = Seq("ID", "Name", "Cores", "Memory per Node", "Submitted Time", "User", "State", "Duration") val activeApps = state.activeApps.sortBy(_.startTime).reverse val activeAppsTable = UIUtils.listingTable(appHeaders, appRow, activeApps) @@ -51,8 +61,7 @@ private[spark] class IndexPage(parent: MasterWebUI) { val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps) val content = - <hr /> - <div class="row"> + <div class="row-fluid"> <div class="span12"> <ul class="unstyled"> <li><strong>URL:</strong> {state.uri}</li> @@ -60,8 +69,8 @@ private[spark] class IndexPage(parent: MasterWebUI) { <li><strong>Cores:</strong> {state.workers.map(_.cores).sum} Total, {state.workers.map(_.coresUsed).sum} Used</li> <li><strong>Memory:</strong> - {Utils.memoryMegabytesToString(state.workers.map(_.memory).sum)} Total, - {Utils.memoryMegabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li> + {Utils.megabytesToString(state.workers.map(_.memory).sum)} Total, + {Utils.megabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li> <li><strong>Applications:</strong> {state.activeApps.size} Running, {state.completedApps.size} Completed </li> @@ -69,34 +78,28 @@ private[spark] class IndexPage(parent: MasterWebUI) { </div> </div> - <div class="row"> + <div class="row-fluid"> <div class="span12"> - <h3> Workers </h3> - <br/> + <h4> Workers </h4> {workerTable} </div> </div> - <hr/> - - <div class="row"> + <div class="row-fluid"> <div class="span12"> - <h3> Running Applications </h3> - <br/> + <h4> Running Applications </h4> + {activeAppsTable} </div> </div> - <hr/> - - <div class="row"> + <div class="row-fluid"> <div class="span12"> - <h3> Completed Applications </h3> - <br/> + <h4> Completed Applications </h4> {completedAppsTable} </div> </div>; - UIUtils.basicSparkPage(content, "Spark Master: " + state.uri) + UIUtils.basicSparkPage(content, "Spark Master at " + state.uri) } def workerRow(worker: WorkerInfo): Seq[Node] = { @@ -108,8 +111,8 @@ private[spark] class IndexPage(parent: MasterWebUI) { <td>{worker.state}</td> <td>{worker.cores} ({worker.coresUsed} Used)</td> <td sorttable_customkey={"%s.%s".format(worker.memory, worker.memoryUsed)}> - {Utils.memoryMegabytesToString(worker.memory)} - ({Utils.memoryMegabytesToString(worker.memoryUsed)} Used) + {Utils.megabytesToString(worker.memory)} + ({Utils.megabytesToString(worker.memoryUsed)} Used) </td> </tr> } @@ -127,7 +130,7 @@ private[spark] class IndexPage(parent: MasterWebUI) { {app.coresGranted} </td> <td sorttable_customkey={app.desc.memoryPerSlave.toString}> - {Utils.memoryMegabytesToString(app.desc.memoryPerSlave)} + {Utils.megabytesToString(app.desc.memoryPerSlave)} </td> <td>{DeployWebUI.formatDate(app.submitDate)}</td> <td>{app.desc.user}</td> diff --git a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala index 04b32c7968..f4df729e87 100644 --- a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala @@ -15,29 +15,31 @@ * limitations under the License. */ -package spark.deploy.master.ui +package org.apache.spark.deploy.master.ui -import akka.actor.ActorRef import akka.util.Duration import javax.servlet.http.HttpServletRequest import org.eclipse.jetty.server.{Handler, Server} -import spark.{Logging, Utils} -import spark.ui.JettyUtils -import spark.ui.JettyUtils._ +import org.apache.spark.{Logging} +import org.apache.spark.deploy.master.Master +import org.apache.spark.ui.JettyUtils +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.util.Utils /** * Web UI server for the standalone master. */ private[spark] -class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) extends Logging { +class MasterWebUI(val master: Master, requestedPort: Int) extends Logging { implicit val timeout = Duration.create( System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds") val host = Utils.localHostName() - val port = requestedPort.getOrElse( - System.getProperty("master.ui.port", MasterWebUI.DEFAULT_PORT).toInt) + val port = requestedPort + + val masterActorRef = master.self var server: Option[Server] = None var boundPort: Option[Int] = None @@ -58,10 +60,14 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten } } - val handlers = Array[(String, Handler)]( + val metricsHandlers = master.masterMetricsSystem.getServletHandlers ++ + master.applicationMetricsSystem.getServletHandlers + + val handlers = metricsHandlers ++ Array[(String, Handler)]( ("/static", createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR)), ("/app/json", (request: HttpServletRequest) => applicationPage.renderJson(request)), ("/app", (request: HttpServletRequest) => applicationPage.render(request)), + ("/json", (request: HttpServletRequest) => indexPage.renderJson(request)), ("*", (request: HttpServletRequest) => indexPage.render(request)) ) @@ -71,6 +77,5 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten } private[spark] object MasterWebUI { - val STATIC_RESOURCE_DIR = "spark/ui/static" - val DEFAULT_PORT = "8080" + val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static" } diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index 8b51ff1c3a..e3dc30eefc 100644 --- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -15,18 +15,20 @@ * limitations under the License. */ -package spark.deploy.worker +package org.apache.spark.deploy.worker import java.io._ import java.lang.System.getenv -import spark.deploy.{ExecutorState, ExecutorStateChanged, ApplicationDescription} + import akka.actor.ActorRef -import spark.{Utils, Logging} -import java.net.{URI, URL} -import org.apache.hadoop.fs.{Path, FileSystem} -import org.apache.hadoop.conf.Configuration -import scala.Some -import spark.deploy.ExecutorStateChanged + +import com.google.common.base.Charsets +import com.google.common.io.Files + +import org.apache.spark.{Logging} +import org.apache.spark.deploy.{ExecutorState, ApplicationDescription} +import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged +import org.apache.spark.util.Utils /** * Manages the execution of one executor process. @@ -39,18 +41,19 @@ private[spark] class ExecutorRunner( val memory: Int, val worker: ActorRef, val workerId: String, - val hostPort: String, + val host: String, val sparkHome: File, val workDir: File) extends Logging { - Utils.checkHostPort(hostPort, "Expected hostport") - val fullId = appId + "/" + execId var workerThread: Thread = null var process: Process = null var shutdownHook: Thread = null + private def getAppEnv(key: String): Option[String] = + appDesc.command.environment.get(key).orElse(Option(getenv(key))) + def start() { workerThread = new Thread("ExecutorRunner for " + fullId) { override def run() { fetchAndRunExecutor() } @@ -88,14 +91,14 @@ private[spark] class ExecutorRunner( /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */ def substituteVariables(argument: String): String = argument match { case "{{EXECUTOR_ID}}" => execId.toString - case "{{HOSTNAME}}" => Utils.parseHostPort(hostPort)._1 + case "{{HOSTNAME}}" => host case "{{CORES}}" => cores.toString case other => other } def buildCommandSeq(): Seq[String] = { val command = appDesc.command - val runner = Option(getenv("JAVA_HOME")).map(_ + "/bin/java").getOrElse("java") + val runner = getAppEnv("JAVA_HOME").map(_ + "/bin/java").getOrElse("java") // SPARK-698: do not call the run.cmd script, as process.destroy() // fails to kill a process tree on Windows Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++ @@ -107,10 +110,11 @@ private[spark] class ExecutorRunner( * the way the JAVA_OPTS are assembled there. */ def buildJavaOpts(): Seq[String] = { - val libraryOpts = Option(getenv("SPARK_LIBRARY_PATH")) + val libraryOpts = getAppEnv("SPARK_LIBRARY_PATH") .map(p => List("-Djava.library.path=" + p)) .getOrElse(Nil) - val userOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil) + val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil) + val userOpts = getAppEnv("SPARK_JAVA_OPTS").map(Utils.splitCommandString).getOrElse(Nil) val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M") // Figure out our classpath with the external compute-classpath script @@ -119,12 +123,12 @@ private[spark] class ExecutorRunner( Seq(sparkHome + "/bin/compute-classpath" + ext), extraEnvironment=appDesc.command.environment) - Seq("-cp", classPath) ++ libraryOpts ++ userOpts ++ memoryOpts + Seq("-cp", classPath) ++ libraryOpts ++ workerLocalOpts ++ userOpts ++ memoryOpts } /** Spawn a thread that will redirect a given stream to a file */ def redirectStream(in: InputStream, file: File) { - val out = new FileOutputStream(file) + val out = new FileOutputStream(file, true) new Thread("redirect output to " + file) { override def run() { try { @@ -150,6 +154,7 @@ private[spark] class ExecutorRunner( // Launch the process val command = buildCommandSeq() + logInfo("Launch command: " + command.mkString("\"", "\" \"", "\"")) val builder = new ProcessBuilder(command: _*).directory(executorDir) val env = builder.environment() for ((key, value) <- appDesc.command.environment) { @@ -160,9 +165,16 @@ private[spark] class ExecutorRunner( env.put("SPARK_LAUNCH_WITH_SCALA", "0") process = builder.start() + val header = "Spark Executor Command: %s\n%s\n\n".format( + command.mkString("\"", "\" \"", "\""), "=" * 40) + // Redirect its stdout and stderr to files - redirectStream(process.getInputStream, new File(executorDir, "stdout")) - redirectStream(process.getErrorStream, new File(executorDir, "stderr")) + val stdout = new File(executorDir, "stdout") + redirectStream(process.getInputStream, stdout) + + val stderr = new File(executorDir, "stderr") + Files.write(header, stderr, Charsets.UTF_8) + redirectStream(process.getErrorStream, stderr) // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run // long-lived processes only. However, in the future, we might restart the executor a few diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 0bd88ea253..09530beb3b 100644 --- a/core/src/main/scala/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -15,23 +15,26 @@ * limitations under the License. */ -package spark.deploy.worker +package org.apache.spark.deploy.worker -import scala.collection.mutable.{ArrayBuffer, HashMap} -import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated} -import akka.util.duration._ -import spark.{Logging, Utils} -import spark.util.AkkaUtils -import spark.deploy._ -import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected} import java.text.SimpleDateFormat import java.util.Date -import spark.deploy.RegisterWorker -import spark.deploy.LaunchExecutor -import spark.deploy.RegisterWorkerFailed -import spark.deploy.master.Master import java.io.File -import ui.WorkerWebUI + +import scala.collection.mutable.HashMap + +import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated} +import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected} +import akka.util.duration._ + +import org.apache.spark.{Logging} +import org.apache.spark.deploy.ExecutorState +import org.apache.spark.deploy.DeployMessages._ +import org.apache.spark.deploy.master.Master +import org.apache.spark.deploy.worker.ui.WorkerWebUI +import org.apache.spark.metrics.MetricsSystem +import org.apache.spark.util.{Utils, AkkaUtils} + private[spark] class Worker( host: String, @@ -67,6 +70,9 @@ private[spark] class Worker( var coresUsed = 0 var memoryUsed = 0 + val metricsSystem = MetricsSystem.createMetricsSystem("worker") + val workerSource = new WorkerSource(this) + def coresFree: Int = cores - coresUsed def memoryFree: Int = memory - memoryUsed @@ -90,13 +96,17 @@ private[spark] class Worker( override def preStart() { logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format( - host, port, cores, Utils.memoryMegabytesToString(memory))) + host, port, cores, Utils.megabytesToString(memory))) sparkHome = new File(Option(System.getenv("SPARK_HOME")).getOrElse(".")) logInfo("Spark home: " + sparkHome) createWorkDir() webUi = new WorkerWebUI(this, workDir, Some(webUiPort)) + webUi.start() connectToMaster() + + metricsSystem.registerSource(workerSource) + metricsSystem.start() } def connectToMaster() { @@ -122,7 +132,7 @@ private[spark] class Worker( case LaunchExecutor(appId, execId, appDesc, cores_, memory_, execSparkHome_) => logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) val manager = new ExecutorRunner( - appId, execId, appDesc, cores_, memory_, self, workerId, host + ":" + port, new File(execSparkHome_), workDir) + appId, execId, appDesc, cores_, memory_, self, workerId, host, new File(execSparkHome_), workDir) executors(appId + "/" + execId) = manager manager.start() coresUsed += cores_ @@ -155,10 +165,10 @@ private[spark] class Worker( case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) => masterDisconnected() - + case RequestWorkerState => { - sender ! WorkerState(host, port, workerId, executors.values.toList, - finishedExecutors.values.toList, masterUrl, cores, memory, + sender ! WorkerStateResponse(host, port, workerId, executors.values.toList, + finishedExecutors.values.toList, masterUrl, cores, memory, coresUsed, memoryUsed, masterWebUiUrl) } } @@ -178,6 +188,7 @@ private[spark] class Worker( override def postStop() { executors.values.foreach(_.kill()) webUi.stop() + metricsSystem.stop() } } diff --git a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala index 9fcd3260ca..0ae89a864f 100644 --- a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala @@ -15,11 +15,9 @@ * limitations under the License. */ -package spark.deploy.worker +package org.apache.spark.deploy.worker -import spark.util.IntParam -import spark.util.MemoryParam -import spark.Utils +import org.apache.spark.util.{Utils, IntParam, MemoryParam} import java.lang.management.ManagementFactory /** diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala new file mode 100644 index 0000000000..6427c0178f --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala @@ -0,0 +1,34 @@ +package org.apache.spark.deploy.worker + +import com.codahale.metrics.{Gauge, MetricRegistry} + +import org.apache.spark.metrics.source.Source + +private[spark] class WorkerSource(val worker: Worker) extends Source { + val sourceName = "worker" + val metricRegistry = new MetricRegistry() + + metricRegistry.register(MetricRegistry.name("executors", "number"), new Gauge[Int] { + override def getValue: Int = worker.executors.size + }) + + // Gauge for cores used of this worker + metricRegistry.register(MetricRegistry.name("coresUsed", "number"), new Gauge[Int] { + override def getValue: Int = worker.coresUsed + }) + + // Gauge for memory used of this worker + metricRegistry.register(MetricRegistry.name("memUsed", "MBytes"), new Gauge[Int] { + override def getValue: Int = worker.memoryUsed + }) + + // Gauge for cores free of this worker + metricRegistry.register(MetricRegistry.name("coresFree", "number"), new Gauge[Int] { + override def getValue: Int = worker.coresFree + }) + + // Gauge for memory free of this worker + metricRegistry.register(MetricRegistry.name("memFree", "MBytes"), new Gauge[Int] { + override def getValue: Int = worker.memoryFree + }) +} diff --git a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala index 7548a26c2e..d2d3617498 100644 --- a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala @@ -15,22 +15,24 @@ * limitations under the License. */ -package spark.deploy.worker.ui +package org.apache.spark.deploy.worker.ui + +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node import akka.dispatch.Await import akka.pattern.ask import akka.util.duration._ -import javax.servlet.http.HttpServletRequest - import net.liftweb.json.JsonAST.JValue -import scala.xml.Node +import org.apache.spark.deploy.JsonProtocol +import org.apache.spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse} +import org.apache.spark.deploy.worker.ExecutorRunner +import org.apache.spark.ui.UIUtils +import org.apache.spark.util.Utils -import spark.deploy.{RequestWorkerState, JsonProtocol, WorkerState} -import spark.deploy.worker.ExecutorRunner -import spark.Utils -import spark.ui.UIUtils private[spark] class IndexPage(parent: WorkerWebUI) { val workerActor = parent.worker.self @@ -38,13 +40,13 @@ private[spark] class IndexPage(parent: WorkerWebUI) { val timeout = parent.timeout def renderJson(request: HttpServletRequest): JValue = { - val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState] + val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse] val workerState = Await.result(stateFuture, 30 seconds) JsonProtocol.writeWorkerState(workerState) } def render(request: HttpServletRequest): Seq[Node] = { - val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState] + val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse] val workerState = Await.result(stateFuture, 30 seconds) val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs") @@ -54,8 +56,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) { UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors) val content = - <hr /> - <div class="row"> <!-- Worker Details --> + <div class="row-fluid"> <!-- Worker Details --> <div class="span12"> <ul class="unstyled"> <li><strong>ID:</strong> {workerState.workerId}</li> @@ -63,32 +64,29 @@ private[spark] class IndexPage(parent: WorkerWebUI) { Master URL:</strong> {workerState.masterUrl} </li> <li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li> - <li><strong>Memory:</strong> {Utils.memoryMegabytesToString(workerState.memory)} - ({Utils.memoryMegabytesToString(workerState.memoryUsed)} Used)</li> + <li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)} + ({Utils.megabytesToString(workerState.memoryUsed)} Used)</li> </ul> <p><a href={workerState.masterWebUiUrl}>Back to Master</a></p> </div> </div> - <hr/> - <div class="row"> <!-- Running Executors --> + <div class="row-fluid"> <!-- Running Executors --> <div class="span12"> - <h3> Running Executors {workerState.executors.size} </h3> - <br/> + <h4> Running Executors {workerState.executors.size} </h4> {runningExecutorTable} </div> </div> - <hr/> - <div class="row"> <!-- Finished Executors --> + <div class="row-fluid"> <!-- Finished Executors --> <div class="span12"> - <h3> Finished Executors </h3> - <br/> + <h4> Finished Executors </h4> {finishedExecutorTable} </div> </div>; - UIUtils.basicSparkPage(content, "Spark Worker on %s:%s".format(workerState.host, workerState.port)) + UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format( + workerState.host, workerState.port)) } def executorRow(executor: ExecutorRunner): Seq[Node] = { @@ -96,7 +94,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) { <td>{executor.execId}</td> <td>{executor.cores}</td> <td sorttable_customkey={executor.memory.toString}> - {Utils.memoryMegabytesToString(executor.memory)} + {Utils.megabytesToString(executor.memory)} </td> <td> <ul class="unstyled"> diff --git a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala index 61d4cd6d99..95d6007f3b 100644 --- a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala @@ -15,9 +15,8 @@ * limitations under the License. */ -package spark.deploy.worker.ui +package org.apache.spark.deploy.worker.ui -import akka.actor.ActorRef import akka.util.{Duration, Timeout} import java.io.{FileInputStream, File} @@ -26,18 +25,19 @@ import javax.servlet.http.HttpServletRequest import org.eclipse.jetty.server.{Handler, Server} -import spark.deploy.worker.Worker -import spark.{Utils, Logging} -import spark.ui.JettyUtils -import spark.ui.JettyUtils._ -import spark.ui.UIUtils +import org.apache.spark.deploy.worker.Worker +import org.apache.spark.{Logging} +import org.apache.spark.ui.JettyUtils +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.ui.UIUtils +import org.apache.spark.util.Utils /** * Web UI server for the standalone worker. */ private[spark] class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[Int] = None) - extends Logging { + extends Logging { implicit val timeout = Timeout( Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")) val host = Utils.localHostName() @@ -49,7 +49,9 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I val indexPage = new IndexPage(this) - val handlers = Array[(String, Handler)]( + val metricsHandlers = worker.metricsSystem.getServletHandlers + + val handlers = metricsHandlers ++ Array[(String, Handler)]( ("/static", createStaticHandler(WorkerWebUI.STATIC_RESOURCE_DIR)), ("/log", (request: HttpServletRequest) => log(request)), ("/logPage", (request: HttpServletRequest) => logPage(request)), @@ -111,30 +113,37 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I if (startByte > 0) { <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s" .format(appId, executorId, logType, math.max(startByte-byteLength, 0), - byteLength)}> - <button>Previous {Utils.memoryBytesToString(math.min(byteLength, startByte))}</button> + byteLength)}> + <button type="button" class="btn btn-default"> + Previous {Utils.bytesToString(math.min(byteLength, startByte))} + </button> </a> } else { - <button disabled="disabled">Previous 0 B</button> + <button type="button" class="btn btn-default" disabled="disabled"> + Previous 0 B + </button> } val nextButton = if (endByte < logLength) { <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s". format(appId, executorId, logType, endByte, byteLength)}> - <button>Next {Utils.memoryBytesToString(math.min(byteLength, logLength-endByte))}</button> + <button type="button" class="btn btn-default"> + Next {Utils.bytesToString(math.min(byteLength, logLength-endByte))} + </button> </a> } else { - <button disabled="disabled">Next 0 B</button> + <button type="button" class="btn btn-default" disabled="disabled"> + Next 0 B + </button> } val content = <html> <body> {linkToMaster} - <hr /> <div> <div style="float:left;width:40%">{backButton}</div> <div style="float:left;">{range}</div> @@ -177,6 +186,6 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I } private[spark] object WorkerWebUI { - val STATIC_RESOURCE_DIR = "spark/ui/static" + val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static" val DEFAULT_PORT="8081" } diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 2e81151882..d365804994 100644 --- a/core/src/main/scala/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -15,26 +15,30 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor -import java.io.{File, FileOutputStream} -import java.net.{URI, URL, URLClassLoader} +import java.io.{File} +import java.lang.management.ManagementFactory +import java.nio.ByteBuffer import java.util.concurrent._ -import org.apache.hadoop.fs.FileUtil +import scala.collection.JavaConversions._ +import scala.collection.mutable.HashMap -import scala.collection.mutable.{ArrayBuffer, Map, HashMap} +import org.apache.spark.scheduler._ +import org.apache.spark._ +import org.apache.spark.util.Utils -import spark.broadcast._ -import spark.scheduler._ -import spark._ -import java.nio.ByteBuffer /** * The Mesos executor for Spark. */ -private[spark] class Executor(executorId: String, slaveHostname: String, properties: Seq[(String, String)]) extends Logging { - +private[spark] class Executor( + executorId: String, + slaveHostname: String, + properties: Seq[(String, String)]) + extends Logging +{ // Application dependencies (added through SparkContext) that we've fetched so far on this node. // Each map holds the master's timestamp for the version of that file or JAR we got. private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]() @@ -57,6 +61,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert System.setProperty(key, value) } + // If we are in yarn mode, systems can have different disk layouts so we must set it + // to what Yarn on this system said was available. This will be used later when SparkEnv + // created. + if (java.lang.Boolean.valueOf(System.getenv("SPARK_YARN_MODE"))) { + System.setProperty("spark.local.dir", getYarnLocalDirs()) + } + // Create our ClassLoader and set it on this thread private val urlClassLoader = createClassLoader() private val replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader) @@ -69,7 +80,7 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert override def uncaughtException(thread: Thread, exception: Throwable) { try { logError("Uncaught exception in thread " + thread, exception) - + // We may have been called from a shutdown hook. If so, we must not call System.exit(). // (If we do, we will deadlock.) if (!Utils.inShutdown()) { @@ -87,9 +98,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert } ) + val executorSource = new ExecutorSource(this) + // Initialize Spark environment (using system properties read above) val env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0, false, false) SparkEnv.set(env) + env.metricsSystem.registerSource(executorSource) + private val akkaFrameSize = env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size") // Start worker thread pool @@ -100,6 +115,21 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert threadPool.execute(new TaskRunner(context, taskId, serializedTask)) } + /** Get the Yarn approved local directories. */ + private def getYarnLocalDirs(): String = { + // Hadoop 0.23 and 2.x have different Environment variable names for the + // local dirs, so lets check both. We assume one of the 2 is set. + // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X + val localDirs = Option(System.getenv("YARN_LOCAL_DIRS")) + .getOrElse(Option(System.getenv("LOCAL_DIRS")) + .getOrElse("")) + + if (localDirs.isEmpty()) { + throw new Exception("Yarn Local dirs can't be empty") + } + return localDirs + } + class TaskRunner(context: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer) extends Runnable { @@ -112,6 +142,9 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert context.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER) var attemptedTask: Option[Task[Any]] = None var taskStart: Long = 0 + def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum + val startGCTime = getTotalGCTime + try { SparkEnv.set(env) Accumulators.clear() @@ -119,15 +152,16 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert updateDependencies(taskFiles, taskJars) val task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader) attemptedTask = Some(task) - logInfo("Its generation is " + task.generation) - env.mapOutputTracker.updateGeneration(task.generation) + logInfo("Its epoch is " + task.epoch) + env.mapOutputTracker.updateEpoch(task.epoch) taskStart = System.currentTimeMillis() val value = task.run(taskId.toInt) val taskFinish = System.currentTimeMillis() - task.metrics.foreach{ m => + for (m <- task.metrics) { m.hostname = Utils.localHostName m.executorDeserializeTime = (taskStart - startTime).toInt m.executorRunTime = (taskFinish - taskStart).toInt + m.jvmGCTime = getTotalGCTime - startGCTime } //TODO I'd also like to track the time it takes to serialize the task results, but that is huge headache, b/c // we need to serialize the task metrics first. If TaskMetrics had a custom serialized format, we could @@ -151,7 +185,10 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert case t: Throwable => { val serviceTime = (System.currentTimeMillis() - taskStart).toInt val metrics = attemptedTask.flatMap(t => t.metrics) - metrics.foreach{m => m.executorRunTime = serviceTime} + for (m <- metrics) { + m.executorRunTime = serviceTime + m.jvmGCTime = getTotalGCTime - startGCTime + } val reason = ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics) context.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason)) @@ -189,13 +226,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert if (classUri != null) { logInfo("Using REPL class URI: " + classUri) try { - val klass = Class.forName("spark.repl.ExecutorClassLoader") + val klass = Class.forName("org.apache.spark.repl.ExecutorClassLoader") .asInstanceOf[Class[_ <: ClassLoader]] val constructor = klass.getConstructor(classOf[String], classOf[ClassLoader]) return constructor.newInstance(classUri, parent) } catch { case _: ClassNotFoundException => - logError("Could not find spark.repl.ExecutorClassLoader on classpath!") + logError("Could not find org.apache.spark.repl.ExecutorClassLoader on classpath!") System.exit(1) null } diff --git a/core/src/main/scala/spark/executor/ExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala index 33a6f8a824..ad7dd34c76 100644 --- a/core/src/main/scala/spark/executor/ExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor import java.nio.ByteBuffer -import spark.TaskState.TaskState +import org.apache.spark.TaskState.TaskState /** * A pluggable interface used by the Executor to send updates to the cluster scheduler. diff --git a/core/src/main/scala/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala index 64b9fb88f8..e5c9bbbe28 100644 --- a/core/src/main/scala/spark/executor/ExecutorExitCode.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor /** * These are exit codes that executors should use to provide the master with information about diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala new file mode 100644 index 0000000000..17653cd560 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala @@ -0,0 +1,55 @@ +package org.apache.spark.executor + +import com.codahale.metrics.{Gauge, MetricRegistry} + +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.hdfs.DistributedFileSystem +import org.apache.hadoop.fs.LocalFileSystem + +import scala.collection.JavaConversions._ + +import org.apache.spark.metrics.source.Source + +class ExecutorSource(val executor: Executor) extends Source { + private def fileStats(scheme: String) : Option[FileSystem.Statistics] = + FileSystem.getAllStatistics().filter(s => s.getScheme.equals(scheme)).headOption + + private def registerFileSystemStat[T]( + scheme: String, name: String, f: FileSystem.Statistics => T, defaultValue: T) = { + metricRegistry.register(MetricRegistry.name("filesystem", scheme, name), new Gauge[T] { + override def getValue: T = fileStats(scheme).map(f).getOrElse(defaultValue) + }) + } + + val metricRegistry = new MetricRegistry() + val sourceName = "executor" + + // Gauge for executor thread pool's actively executing task counts + metricRegistry.register(MetricRegistry.name("threadpool", "activeTask", "count"), new Gauge[Int] { + override def getValue: Int = executor.threadPool.getActiveCount() + }) + + // Gauge for executor thread pool's approximate total number of tasks that have been completed + metricRegistry.register(MetricRegistry.name("threadpool", "completeTask", "count"), new Gauge[Long] { + override def getValue: Long = executor.threadPool.getCompletedTaskCount() + }) + + // Gauge for executor thread pool's current number of threads + metricRegistry.register(MetricRegistry.name("threadpool", "currentPool", "size"), new Gauge[Int] { + override def getValue: Int = executor.threadPool.getPoolSize() + }) + + // Gauge got executor thread pool's largest number of threads that have ever simultaneously been in th pool + metricRegistry.register(MetricRegistry.name("threadpool", "maxPool", "size"), new Gauge[Int] { + override def getValue: Int = executor.threadPool.getMaximumPoolSize() + }) + + // Gauge for file system stats of this executor + for (scheme <- Array("hdfs", "file")) { + registerFileSystemStat(scheme, "bytesRead", _.getBytesRead(), 0L) + registerFileSystemStat(scheme, "bytesWritten", _.getBytesWritten(), 0L) + registerFileSystemStat(scheme, "readOps", _.getReadOps(), 0) + registerFileSystemStat(scheme, "largeReadOps", _.getLargeReadOps(), 0) + registerFileSystemStat(scheme, "writeOps", _.getWriteOps(), 0) + } +} diff --git a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala index 09d12fb65b..f9bfe8ed2f 100644 --- a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor import java.net.{URLClassLoader, URL} diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala index 4961c42fad..da62091980 100644 --- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala @@ -15,15 +15,16 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor import java.nio.ByteBuffer import org.apache.mesos.{Executor => MesosExecutor, MesosExecutorDriver, MesosNativeLibrary, ExecutorDriver} import org.apache.mesos.Protos.{TaskState => MesosTaskState, TaskStatus => MesosTaskStatus, _} -import spark.TaskState.TaskState +import org.apache.spark.TaskState.TaskState import com.google.protobuf.ByteString -import spark.{Utils, Logging} -import spark.TaskState +import org.apache.spark.{Logging} +import org.apache.spark.TaskState +import org.apache.spark.util.Utils private[spark] class MesosExecutorBackend extends MesosExecutor diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala index f4003da732..7839023868 100644 --- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala @@ -15,22 +15,18 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor import java.nio.ByteBuffer -import spark.Logging -import spark.TaskState.TaskState -import spark.util.AkkaUtils + import akka.actor.{ActorRef, Actor, Props, Terminated} import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected} -import java.util.concurrent.{TimeUnit, ThreadPoolExecutor, SynchronousQueue} -import spark.scheduler.cluster._ -import spark.scheduler.cluster.RegisteredExecutor -import spark.scheduler.cluster.LaunchTask -import spark.scheduler.cluster.RegisterExecutorFailed -import spark.scheduler.cluster.RegisterExecutor -import spark.Utils -import spark.deploy.SparkHadoopUtil + +import org.apache.spark.{Logging, SparkEnv} +import org.apache.spark.TaskState.TaskState +import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._ +import org.apache.spark.util.{Utils, AkkaUtils} + private[spark] class StandaloneExecutorBackend( driverUrl: String, @@ -85,19 +81,6 @@ private[spark] class StandaloneExecutorBackend( private[spark] object StandaloneExecutorBackend { def run(driverUrl: String, executorId: String, hostname: String, cores: Int) { - SparkHadoopUtil.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores)) - } - - // This will be run 'as' the user - def run0(args: Product) { - assert(4 == args.productArity) - runImpl(args.productElement(0).asInstanceOf[String], - args.productElement(1).asInstanceOf[String], - args.productElement(2).asInstanceOf[String], - args.productElement(3).asInstanceOf[Int]) - } - - private def runImpl(driverUrl: String, executorId: String, hostname: String, cores: Int) { // Debug code Utils.checkHost(hostname) diff --git a/core/src/main/scala/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala index 3151627839..f311141148 100644 --- a/core/src/main/scala/spark/executor/TaskMetrics.scala +++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.executor +package org.apache.spark.executor class TaskMetrics extends Serializable { /** @@ -31,7 +31,7 @@ class TaskMetrics extends Serializable { /** * Time the executor spends actually running the task (including fetching shuffle data) */ - var executorRunTime:Int = _ + var executorRunTime: Int = _ /** * The number of bytes this task transmitted back to the driver as the TaskResult @@ -39,6 +39,11 @@ class TaskMetrics extends Serializable { var resultSize: Long = _ /** + * Amount of time the JVM spent in garbage collection while executing this task + */ + var jvmGCTime: Long = _ + + /** * If this task reads from shuffle output, metrics on getting shuffle data will be collected here */ var shuffleReadMetrics: Option[ShuffleReadMetrics] = None diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala new file mode 100644 index 0000000000..90a0420caf --- /dev/null +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.io + +import java.io.{InputStream, OutputStream} + +import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} + +import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream} + + +/** + * CompressionCodec allows the customization of choosing different compression implementations + * to be used in block storage. + */ +trait CompressionCodec { + + def compressedOutputStream(s: OutputStream): OutputStream + + def compressedInputStream(s: InputStream): InputStream +} + + +private[spark] object CompressionCodec { + + def createCodec(): CompressionCodec = { + // Set the default codec to Snappy since the LZF implementation initializes a pretty large + // buffer for every stream, which results in a lot of memory overhead when the number of + // shuffle reduce buckets are large. + createCodec(classOf[SnappyCompressionCodec].getName) + } + + def createCodec(codecName: String): CompressionCodec = { + Class.forName( + System.getProperty("spark.io.compression.codec", codecName), + true, + Thread.currentThread.getContextClassLoader).newInstance().asInstanceOf[CompressionCodec] + } +} + + +/** + * LZF implementation of [[org.apache.spark.io.CompressionCodec]]. + */ +class LZFCompressionCodec extends CompressionCodec { + + override def compressedOutputStream(s: OutputStream): OutputStream = { + new LZFOutputStream(s).setFinishBlockOnFlush(true) + } + + override def compressedInputStream(s: InputStream): InputStream = new LZFInputStream(s) +} + + +/** + * Snappy implementation of [[org.apache.spark.io.CompressionCodec]]. + * Block size can be configured by spark.io.compression.snappy.block.size. + */ +class SnappyCompressionCodec extends CompressionCodec { + + override def compressedOutputStream(s: OutputStream): OutputStream = { + val blockSize = System.getProperty("spark.io.compression.snappy.block.size", "32768").toInt + new SnappyOutputStream(s, blockSize) + } + + override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s) +} diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala new file mode 100644 index 0000000000..0f9c4e00b1 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics + +import java.util.Properties +import java.io.{File, FileInputStream, InputStream, IOException} + +import scala.collection.mutable +import scala.util.matching.Regex + +import org.apache.spark.Logging + +private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { + initLogging() + + val DEFAULT_PREFIX = "*" + val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r + val METRICS_CONF = "metrics.properties" + + val properties = new Properties() + var propertyCategories: mutable.HashMap[String, Properties] = null + + private def setDefaultProperties(prop: Properties) { + prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") + prop.setProperty("*.sink.servlet.uri", "/metrics/json") + prop.setProperty("*.sink.servlet.sample", "false") + prop.setProperty("master.sink.servlet.uri", "/metrics/master/json") + prop.setProperty("applications.sink.servlet.uri", "/metrics/applications/json") + } + + def initialize() { + //Add default properties in case there's no properties file + setDefaultProperties(properties) + + // If spark.metrics.conf is not set, try to get file in class path + var is: InputStream = null + try { + is = configFile match { + case Some(f) => new FileInputStream(f) + case None => getClass.getClassLoader.getResourceAsStream(METRICS_CONF) + } + + if (is != null) { + properties.load(is) + } + } catch { + case e: Exception => logError("Error loading configure file", e) + } finally { + if (is != null) is.close() + } + + propertyCategories = subProperties(properties, INSTANCE_REGEX) + if (propertyCategories.contains(DEFAULT_PREFIX)) { + import scala.collection.JavaConversions._ + + val defaultProperty = propertyCategories(DEFAULT_PREFIX) + for { (inst, prop) <- propertyCategories + if (inst != DEFAULT_PREFIX) + (k, v) <- defaultProperty + if (prop.getProperty(k) == null) } { + prop.setProperty(k, v) + } + } + } + + def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { + val subProperties = new mutable.HashMap[String, Properties] + import scala.collection.JavaConversions._ + prop.foreach { kv => + if (regex.findPrefixOf(kv._1) != None) { + val regex(prefix, suffix) = kv._1 + subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) + } + } + subProperties + } + + def getInstance(inst: String): Properties = { + propertyCategories.get(inst) match { + case Some(s) => s + case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) + } + } +} + diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala new file mode 100644 index 0000000000..bec0c83be8 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics + +import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry} + +import java.util.Properties +import java.util.concurrent.TimeUnit + +import scala.collection.mutable + +import org.apache.spark.Logging +import org.apache.spark.metrics.sink.{MetricsServlet, Sink} +import org.apache.spark.metrics.source.Source + +/** + * Spark Metrics System, created by specific "instance", combined by source, + * sink, periodically poll source metrics data to sink destinations. + * + * "instance" specify "who" (the role) use metrics system. In spark there are several roles + * like master, worker, executor, client driver, these roles will create metrics system + * for monitoring. So instance represents these roles. Currently in Spark, several instances + * have already implemented: master, worker, executor, driver, applications. + * + * "source" specify "where" (source) to collect metrics data. In metrics system, there exists + * two kinds of source: + * 1. Spark internal source, like MasterSource, WorkerSource, etc, which will collect + * Spark component's internal state, these sources are related to instance and will be + * added after specific metrics system is created. + * 2. Common source, like JvmSource, which will collect low level state, is configured by + * configuration and loaded through reflection. + * + * "sink" specify "where" (destination) to output metrics data to. Several sinks can be + * coexisted and flush metrics to all these sinks. + * + * Metrics configuration format is like below: + * [instance].[sink|source].[name].[options] = xxxx + * + * [instance] can be "master", "worker", "executor", "driver", "applications" which means only + * the specified instance has this property. + * wild card "*" can be used to replace instance name, which means all the instances will have + * this property. + * + * [sink|source] means this property belongs to source or sink. This field can only be source or sink. + * + * [name] specify the name of sink or source, it is custom defined. + * + * [options] is the specific property of this source or sink. + */ +private[spark] class MetricsSystem private (val instance: String) extends Logging { + initLogging() + + val confFile = System.getProperty("spark.metrics.conf") + val metricsConfig = new MetricsConfig(Option(confFile)) + + val sinks = new mutable.ArrayBuffer[Sink] + val sources = new mutable.ArrayBuffer[Source] + val registry = new MetricRegistry() + + // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui + private var metricsServlet: Option[MetricsServlet] = None + + /** Get any UI handlers used by this metrics system. */ + def getServletHandlers = metricsServlet.map(_.getHandlers).getOrElse(Array()) + + metricsConfig.initialize() + registerSources() + registerSinks() + + def start() { + sinks.foreach(_.start) + } + + def stop() { + sinks.foreach(_.stop) + } + + def registerSource(source: Source) { + sources += source + try { + registry.register(source.sourceName, source.metricRegistry) + } catch { + case e: IllegalArgumentException => logInfo("Metrics already registered", e) + } + } + + def removeSource(source: Source) { + sources -= source + registry.removeMatching(new MetricFilter { + def matches(name: String, metric: Metric): Boolean = name.startsWith(source.sourceName) + }) + } + + def registerSources() { + val instConfig = metricsConfig.getInstance(instance) + val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX) + + // Register all the sources related to instance + sourceConfigs.foreach { kv => + val classPath = kv._2.getProperty("class") + try { + val source = Class.forName(classPath).newInstance() + registerSource(source.asInstanceOf[Source]) + } catch { + case e: Exception => logError("Source class " + classPath + " cannot be instantialized", e) + } + } + } + + def registerSinks() { + val instConfig = metricsConfig.getInstance(instance) + val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX) + + sinkConfigs.foreach { kv => + val classPath = kv._2.getProperty("class") + try { + val sink = Class.forName(classPath) + .getConstructor(classOf[Properties], classOf[MetricRegistry]) + .newInstance(kv._2, registry) + if (kv._1 == "servlet") { + metricsServlet = Some(sink.asInstanceOf[MetricsServlet]) + } else { + sinks += sink.asInstanceOf[Sink] + } + } catch { + case e: Exception => logError("Sink class " + classPath + " cannot be instantialized", e) + } + } + } +} + +private[spark] object MetricsSystem { + val SINK_REGEX = "^sink\\.(.+)\\.(.+)".r + val SOURCE_REGEX = "^source\\.(.+)\\.(.+)".r + + val MINIMAL_POLL_UNIT = TimeUnit.SECONDS + val MINIMAL_POLL_PERIOD = 1 + + def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) { + val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit) + if (period < MINIMAL_POLL_PERIOD) { + throw new IllegalArgumentException("Polling period " + pollPeriod + " " + pollUnit + + " below than minimal polling period ") + } + } + + def createMetricsSystem(instance: String): MetricsSystem = new MetricsSystem(instance) +} diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala new file mode 100644 index 0000000000..bce257d6e6 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics.sink + +import com.codahale.metrics.{ConsoleReporter, MetricRegistry} + +import java.util.Properties +import java.util.concurrent.TimeUnit + +import org.apache.spark.metrics.MetricsSystem + +class ConsoleSink(val property: Properties, val registry: MetricRegistry) extends Sink { + val CONSOLE_DEFAULT_PERIOD = 10 + val CONSOLE_DEFAULT_UNIT = "SECONDS" + + val CONSOLE_KEY_PERIOD = "period" + val CONSOLE_KEY_UNIT = "unit" + + val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { + case Some(s) => s.toInt + case None => CONSOLE_DEFAULT_PERIOD + } + + val pollUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { + case Some(s) => TimeUnit.valueOf(s.toUpperCase()) + case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) + } + + MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) + + val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .convertRatesTo(TimeUnit.SECONDS) + .build() + + override def start() { + reporter.start(pollPeriod, pollUnit) + } + + override def stop() { + reporter.stop() + } +} + diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala new file mode 100644 index 0000000000..3d1a06a395 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics.sink + +import com.codahale.metrics.{CsvReporter, MetricRegistry} + +import java.io.File +import java.util.{Locale, Properties} +import java.util.concurrent.TimeUnit + +import org.apache.spark.metrics.MetricsSystem + +class CsvSink(val property: Properties, val registry: MetricRegistry) extends Sink { + val CSV_KEY_PERIOD = "period" + val CSV_KEY_UNIT = "unit" + val CSV_KEY_DIR = "directory" + + val CSV_DEFAULT_PERIOD = 10 + val CSV_DEFAULT_UNIT = "SECONDS" + val CSV_DEFAULT_DIR = "/tmp/" + + val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { + case Some(s) => s.toInt + case None => CSV_DEFAULT_PERIOD + } + + val pollUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { + case Some(s) => TimeUnit.valueOf(s.toUpperCase()) + case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) + } + + MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) + + val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { + case Some(s) => s + case None => CSV_DEFAULT_DIR + } + + val reporter: CsvReporter = CsvReporter.forRegistry(registry) + .formatFor(Locale.US) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .convertRatesTo(TimeUnit.SECONDS) + .build(new File(pollDir)) + + override def start() { + reporter.start(pollPeriod, pollUnit) + } + + override def stop() { + reporter.stop() + } +} + diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala index 25386b2796..621d086d41 100644 --- a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala @@ -15,13 +15,21 @@ * limitations under the License. */ -package org.apache.hadoop.mapred +package org.apache.spark.metrics.sink -trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContext(conf, jobId) +import com.codahale.metrics.{JmxReporter, MetricRegistry} - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) +import java.util.Properties + +class JmxSink(val property: Properties, val registry: MetricRegistry) extends Sink { + val reporter: JmxReporter = JmxReporter.forRegistry(registry).build() + + override def start() { + reporter.start() + } + + override def stop() { + reporter.stop() + } - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) } diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala new file mode 100644 index 0000000000..4e90dd4323 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics.sink + +import com.codahale.metrics.MetricRegistry +import com.codahale.metrics.json.MetricsModule + +import com.fasterxml.jackson.databind.ObjectMapper + +import java.util.Properties +import java.util.concurrent.TimeUnit +import javax.servlet.http.HttpServletRequest + +import org.eclipse.jetty.server.Handler + +import org.apache.spark.ui.JettyUtils + +class MetricsServlet(val property: Properties, val registry: MetricRegistry) extends Sink { + val SERVLET_KEY_URI = "uri" + val SERVLET_KEY_SAMPLE = "sample" + + val servletURI = property.getProperty(SERVLET_KEY_URI) + + val servletShowSample = property.getProperty(SERVLET_KEY_SAMPLE).toBoolean + + val mapper = new ObjectMapper().registerModule( + new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample)) + + def getHandlers = Array[(String, Handler)]( + (servletURI, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json")) + ) + + def getMetricsSnapshot(request: HttpServletRequest): String = { + mapper.writeValueAsString(registry) + } + + override def start() { } + + override def stop() { } +} diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala index 4b3e3e50e1..3a739aa563 100644 --- a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala +++ b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala @@ -15,10 +15,9 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.metrics.sink -object SchedulingMode extends Enumeration("FAIR","FIFO"){ - - type SchedulingMode = Value - val FAIR,FIFO = Value +trait Sink { + def start: Unit + def stop: Unit } diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala index 4b3d84670c..75cb2b8973 100644 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala @@ -15,13 +15,18 @@ * limitations under the License. */ -package org.apache.hadoop.mapred +package org.apache.spark.metrics.source -trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) +import com.codahale.metrics.MetricRegistry +import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet} - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) +class JvmSource extends Source { + val sourceName = "jvm" + val metricRegistry = new MetricRegistry() - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) + val gcMetricSet = new GarbageCollectorMetricSet + val memGaugeSet = new MemoryUsageGaugeSet + + metricRegistry.registerAll(gcMetricSet) + metricRegistry.registerAll(memGaugeSet) } diff --git a/core/src/main/scala/org/apache/spark/metrics/source/Source.scala b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala new file mode 100644 index 0000000000..3fee55cc6d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics.source + +import com.codahale.metrics.MetricRegistry + +trait Source { + def sourceName: String + def metricRegistry: MetricRegistry +} diff --git a/core/src/main/scala/spark/network/BufferMessage.scala b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala index e566aeac13..f736bb3713 100644 --- a/core/src/main/scala/spark/network/BufferMessage.scala +++ b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer -import spark.storage.BlockManager +import org.apache.spark.storage.BlockManager private[spark] diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala index b66c00b58c..95cb0206ac 100644 --- a/core/src/main/scala/spark/network/Connection.scala +++ b/core/src/main/scala/org/apache/spark/network/Connection.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network -import spark._ +import org.apache.spark._ import scala.collection.mutable.{HashMap, Queue, ArrayBuffer} @@ -45,12 +45,15 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector, channel.socket.setKeepAlive(true) /*channel.socket.setReceiveBufferSize(32768) */ + @volatile private var closed = false var onCloseCallback: Connection => Unit = null var onExceptionCallback: (Connection, Exception) => Unit = null var onKeyInterestChangeCallback: (Connection, Int) => Unit = null val remoteAddress = getRemoteAddress() + def resetForceReregister(): Boolean + // Read channels typically do not register for write and write does not for read // Now, we do have write registering for read too (temporarily), but this is to detect // channel close NOT to actually read/consume data on it ! @@ -95,6 +98,7 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector, } def close() { + closed = true val k = key() if (k != null) { k.cancel() @@ -103,6 +107,8 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector, callOnCloseCallback() } + protected def isClosed: Boolean = closed + def onClose(callback: Connection => Unit) { onCloseCallback = callback } @@ -168,7 +174,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, remoteId_ : ConnectionManagerId) extends Connection(SocketChannel.open, selector_, remoteId_) { - class Outbox(fair: Int = 0) { + private class Outbox(fair: Int = 0) { val messages = new Queue[Message]() val defaultChunkSize = 65536 //32768 //16384 var nextMessageToBeUsed = 0 @@ -245,7 +251,17 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, } } + // outbox is used as a lock - ensure that it is always used as a leaf (since methods which + // lock it are invoked in context of other locks) private val outbox = new Outbox(1) + /* + This is orthogonal to whether we have pending bytes to write or not - and satisfies a slightly + different purpose. This flag is to see if we need to force reregister for write even when we + do not have any pending bytes to write to socket. + This can happen due to a race between adding pending buffers, and checking for existing of + data as detailed in https://github.com/mesos/spark/pull/791 + */ + private var needForceReregister = false val currentBuffers = new ArrayBuffer[ByteBuffer]() /*channel.socket.setSendBufferSize(256 * 1024)*/ @@ -267,9 +283,19 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, def send(message: Message) { outbox.synchronized { outbox.addMessage(message) - if (channel.isConnected) { - registerInterest() - } + needForceReregister = true + } + if (channel.isConnected) { + registerInterest() + } + } + + // return previous value after resetting it. + def resetForceReregister(): Boolean = { + outbox.synchronized { + val result = needForceReregister + needForceReregister = false + result } } @@ -322,7 +348,11 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, outbox.synchronized { outbox.getChunk() match { case Some(chunk) => { - currentBuffers ++= chunk.buffers + val buffers = chunk.buffers + // If we have 'seen' pending messages, then reset flag - since we handle that as normal + // registering of event (below) + if (needForceReregister && buffers.exists(_.remaining() > 0)) resetForceReregister() + currentBuffers ++= buffers } case None => { // changeConnectionKeyInterest(0) @@ -384,7 +414,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, override def changeInterestForRead(): Boolean = false - override def changeInterestForWrite(): Boolean = true + override def changeInterestForWrite(): Boolean = ! isClosed } @@ -534,6 +564,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S def onReceive(callback: (Connection, Message) => Unit) {onReceiveCallback = callback} + // override def changeInterestForRead(): Boolean = ! isClosed override def changeInterestForRead(): Boolean = true override def changeInterestForWrite(): Boolean = { @@ -549,4 +580,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S override def unregisterInterest() { changeConnectionKeyInterest(0) } + + // For read conn, always false. + override def resetForceReregister(): Boolean = false } diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala index 6c4e7dc03e..e15a839c4e 100644 --- a/core/src/main/scala/spark/network/ConnectionManager.scala +++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network -import spark._ +import org.apache.spark._ import java.nio._ import java.nio.channels._ @@ -34,6 +34,7 @@ import scala.collection.mutable.ArrayBuffer import akka.dispatch.{Await, Promise, ExecutionContext, Future} import akka.util.Duration import akka.util.duration._ +import org.apache.spark.util.Utils private[spark] class ConnectionManager(port: Int) extends Logging { @@ -123,7 +124,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging { } finally { writeRunnableStarted.synchronized { writeRunnableStarted -= key - if (register && conn.changeInterestForWrite()) { + val needReregister = register || conn.resetForceReregister() + if (needReregister && conn.changeInterestForWrite()) { conn.registerInterest() } } diff --git a/core/src/main/scala/spark/network/ConnectionManagerId.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala index 9d5c518293..50dd9bc2d1 100644 --- a/core/src/main/scala/spark/network/ConnectionManagerId.scala +++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.net.InetSocketAddress -import spark.Utils +import org.apache.spark.util.Utils private[spark] case class ConnectionManagerId(host: String, port: Int) { diff --git a/core/src/main/scala/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala index 9e3827aaf5..8d9ad9604d 100644 --- a/core/src/main/scala/spark/network/ConnectionManagerTest.scala +++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network -import spark._ -import spark.SparkContext._ +import org.apache.spark._ +import org.apache.spark.SparkContext._ import scala.io.Source diff --git a/core/src/main/scala/spark/network/Message.scala b/core/src/main/scala/org/apache/spark/network/Message.scala index a25457ea35..f2ecc6d439 100644 --- a/core/src/main/scala/spark/network/Message.scala +++ b/core/src/main/scala/org/apache/spark/network/Message.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.nio.ByteBuffer import java.net.InetSocketAddress diff --git a/core/src/main/scala/spark/network/MessageChunk.scala b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala index 784db5ab62..e0fe57b80d 100644 --- a/core/src/main/scala/spark/network/MessageChunk.scala +++ b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.nio.ByteBuffer diff --git a/core/src/main/scala/spark/network/MessageChunkHeader.scala b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala index 18d0cbcc14..235fbc39b3 100644 --- a/core/src/main/scala/spark/network/MessageChunkHeader.scala +++ b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.net.InetAddress import java.net.InetSocketAddress diff --git a/core/src/main/scala/spark/network/ReceiverTest.scala b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala index 2bbc736f40..781715108b 100644 --- a/core/src/main/scala/spark/network/ReceiverTest.scala +++ b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.nio.ByteBuffer import java.net.InetAddress diff --git a/core/src/main/scala/spark/network/SenderTest.scala b/core/src/main/scala/org/apache/spark/network/SenderTest.scala index 542c54c36b..777574980f 100644 --- a/core/src/main/scala/spark/network/SenderTest.scala +++ b/core/src/main/scala/org/apache/spark/network/SenderTest.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network +package org.apache.spark.network import java.nio.ByteBuffer import java.net.InetAddress diff --git a/core/src/main/scala/spark/network/netty/FileHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala index bf46d32aa3..3c29700920 100644 --- a/core/src/main/scala/spark/network/netty/FileHeader.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.network.netty +package org.apache.spark.network.netty import io.netty.buffer._ -import spark.Logging +import org.apache.spark.Logging private[spark] class FileHeader ( val fileLen: Int, diff --git a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala index b01f6369f6..9493ccffd9 100644 --- a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.network.netty +package org.apache.spark.network.netty import java.util.concurrent.Executors @@ -23,8 +23,8 @@ import io.netty.buffer.ByteBuf import io.netty.channel.ChannelHandlerContext import io.netty.util.CharsetUtil -import spark.Logging -import spark.network.ConnectionManagerId +import org.apache.spark.Logging +import org.apache.spark.network.ConnectionManagerId import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala index cdf88b03a0..537f225469 100644 --- a/core/src/main/scala/spark/network/netty/ShuffleSender.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.network.netty +package org.apache.spark.network.netty import java.io.File -import spark.Logging +import org.apache.spark.Logging private[spark] class ShuffleSender(portIn: Int, val pResolver: PathResolver) extends Logging { diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala new file mode 100644 index 0000000000..c0ec527339 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -0,0 +1,35 @@ +import org.apache.spark.rdd.{SequenceFileRDDFunctions, DoubleRDDFunctions, PairRDDFunctions} + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Core Spark functionality. [[org.apache.spark.SparkContext]] serves as the main entry point to + * Spark, while [[org.apache.spark.rdd.RDD]] is the data type representing a distributed collection, + * and provides most parallel operations. + * + * In addition, [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs + * of key-value pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]] + * contains operations available only on RDDs of Doubles; and + * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that can + * be saved as SequenceFiles. These operations are automatically available on any RDD of the right + * type (e.g. RDD[(Int, Int)] through implicit conversions when you + * `import org.apache.spark.SparkContext._`. + */ +package object spark { + // For package docs only +} diff --git a/core/src/main/scala/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala index 691d939150..d71069444a 100644 --- a/core/src/main/scala/spark/partial/ApproximateActionListener.scala +++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial -import spark._ -import spark.scheduler.JobListener +import org.apache.spark._ +import org.apache.spark.scheduler.JobListener +import org.apache.spark.rdd.RDD /** * A JobListener for an approximate single-result action, such as count() or non-parallel reduce(). diff --git a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala index 5eae144dfb..9c2859c8b9 100644 --- a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial /** * An object that computes a function incrementally by merging in results of type U from multiple diff --git a/core/src/main/scala/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala index 8bdbe6c012..5f4450859c 100644 --- a/core/src/main/scala/spark/partial/BoundedDouble.scala +++ b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial /** * A Double with error bars on it. diff --git a/core/src/main/scala/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala index 6aa92094eb..3155dfe165 100644 --- a/core/src/main/scala/spark/partial/CountEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import cern.jet.stat.Probability diff --git a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala index ebe2e5a1e3..e519e3a548 100644 --- a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import java.util.{HashMap => JHashMap} import java.util.{Map => JMap} diff --git a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala index 2dadbbd5fb..cf8a5680b6 100644 --- a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import java.util.{HashMap => JHashMap} import java.util.{Map => JMap} @@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap import scala.collection.Map import scala.collection.JavaConversions.mapAsScalaMap -import spark.util.StatCounter +import org.apache.spark.util.StatCounter /** * An ApproximateEvaluator for means by key. Returns a map of key to confidence interval. diff --git a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala index ae2b63f7cb..8225a5d933 100644 --- a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import java.util.{HashMap => JHashMap} import java.util.{Map => JMap} @@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap import scala.collection.Map import scala.collection.JavaConversions.mapAsScalaMap -import spark.util.StatCounter +import org.apache.spark.util.StatCounter /** * An ApproximateEvaluator for sums by key. Returns a map of key to confidence interval. diff --git a/core/src/main/scala/spark/partial/MeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala index 5ddcad7075..d24959cba8 100644 --- a/core/src/main/scala/spark/partial/MeanEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import cern.jet.stat.Probability -import spark.util.StatCounter +import org.apache.spark.util.StatCounter /** * An ApproximateEvaluator for means. diff --git a/core/src/main/scala/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala index 922a9f9bc6..5ce49b8100 100644 --- a/core/src/main/scala/spark/partial/PartialResult.scala +++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial class PartialResult[R](initialVal: R, isFinal: Boolean) { private var finalValue: Option[R] = if (isFinal) Some(initialVal) else None diff --git a/core/src/main/scala/spark/partial/StudentTCacher.scala b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala index f3bb987d46..92915ee66d 100644 --- a/core/src/main/scala/spark/partial/StudentTCacher.scala +++ b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import cern.jet.stat.Probability diff --git a/core/src/main/scala/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala index 4083abef03..a74f800944 100644 --- a/core/src/main/scala/spark/partial/SumEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.partial +package org.apache.spark.partial import cern.jet.stat.Probability -import spark.util.StatCounter +import org.apache.spark.util.StatCounter /** * An ApproximateEvaluator for sums. It estimates the mean and the cont and multiplies them diff --git a/core/src/main/scala/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala index 0ebb722d73..bca6956a18 100644 --- a/core/src/main/scala/spark/rdd/BlockRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext} -import spark.storage.BlockManager +import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext} +import org.apache.spark.storage.BlockManager private[spark] class BlockRDDPartition(val blockId: String, idx: Int) extends Partition { val index = idx @@ -28,13 +28,12 @@ private[spark] class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[String]) extends RDD[T](sc, Nil) { - @transient lazy val locations_ = BlockManager.blockIdsToExecutorLocations(blockIds, SparkEnv.get) + @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get) override def getPartitions: Array[Partition] = (0 until blockIds.size).map(i => { new BlockRDDPartition(blockIds(i), i).asInstanceOf[Partition] }).toArray - override def compute(split: Partition, context: TaskContext): Iterator[T] = { val blockManager = SparkEnv.get.blockManager val blockId = split.asInstanceOf[BlockRDDPartition].blockId @@ -45,8 +44,8 @@ class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[St } } - override def getPreferredLocations(split: Partition): Seq[String] = + override def getPreferredLocations(split: Partition): Seq[String] = { locations_(split.asInstanceOf[BlockRDDPartition].blockId) - + } } diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala index 150e5bca29..9b0c882481 100644 --- a/core/src/main/scala/spark/rdd/CartesianRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.io.{ObjectOutputStream, IOException} -import spark._ +import org.apache.spark._ private[spark] @@ -64,7 +64,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest]( override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] - rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2) + (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext) = { diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala index 6794e0e201..3311757189 100644 --- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark._ +import org.apache.spark._ import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.{NullWritable, BytesWritable} @@ -25,7 +25,6 @@ import org.apache.hadoop.util.ReflectionUtils import org.apache.hadoop.fs.Path import java.io.{File, IOException, EOFException} import java.text.NumberFormat -import spark.deploy.SparkHadoopUtil private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {} @@ -82,8 +81,9 @@ private[spark] object CheckpointRDD extends Logging { } def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) { + val env = SparkEnv.get val outputDir = new Path(path) - val fs = outputDir.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = outputDir.getFileSystem(env.hadoop.newConfiguration()) val finalOutputName = splitIdToFile(ctx.splitId) val finalOutputPath = new Path(outputDir, finalOutputName) @@ -101,7 +101,7 @@ private[spark] object CheckpointRDD extends Logging { // This is mainly for testing purpose fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication, blockSize) } - val serializer = SparkEnv.get.serializer.newInstance() + val serializer = env.serializer.newInstance() val serializeStream = serializer.serializeStream(fileOutputStream) serializeStream.writeAll(iterator) serializeStream.close() @@ -121,10 +121,11 @@ private[spark] object CheckpointRDD extends Logging { } def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = { - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val env = SparkEnv.get + val fs = path.getFileSystem(env.hadoop.newConfiguration()) val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt val fileInputStream = fs.open(path, bufferSize) - val serializer = SparkEnv.get.serializer.newInstance() + val serializer = env.serializer.newInstance() val deserializeStream = serializer.deserializeStream(fileInputStream) // Register an on-task-completion callback to close the input stream. @@ -137,13 +138,14 @@ private[spark] object CheckpointRDD extends Logging { // each split file having multiple blocks. This needs to be run on a // cluster (mesos or standalone) using HDFS. def main(args: Array[String]) { - import spark._ + import org.apache.spark._ val Array(cluster, hdfsPath) = args + val env = SparkEnv.get val sc = new SparkContext(cluster, "CheckpointRDD Test") val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000) val path = new Path(hdfsPath, "temp") - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = path.getFileSystem(env.hadoop.newConfiguration()) sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _) val cpRDD = new CheckpointRDD[Int](sc, path.toString) assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same") diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala index c540cd36eb..0187256a8e 100644 --- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.io.{ObjectOutputStream, IOException} import java.util.{HashMap => JHashMap} @@ -23,8 +23,8 @@ import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions import scala.collection.mutable.ArrayBuffer -import spark.{Aggregator, Partition, Partitioner, RDD, SparkEnv, TaskContext} -import spark.{Dependency, OneToOneDependency, ShuffleDependency} +import org.apache.spark.{Partition, Partitioner, SparkEnv, TaskContext} +import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency} private[spark] sealed trait CoGroupSplitDep extends Serializable @@ -52,13 +52,6 @@ class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]) override def hashCode(): Int = idx } -private[spark] class CoGroupAggregator - extends Aggregator[Any, Any, ArrayBuffer[Any]]( - { x => ArrayBuffer(x) }, - { (b, x) => b += x }, - { (b1, b2) => b1 ++ b2 }) - with Serializable - /** * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a @@ -66,34 +59,25 @@ private[spark] class CoGroupAggregator * * @param rdds parent RDDs. * @param part partitioner used to partition the shuffle output. - * @param mapSideCombine flag indicating whether to merge values before shuffle step. If the flag - * is on, Spark does an extra pass over the data on the map side to merge - * all values belonging to the same key together. This can reduce the amount - * of data shuffled if and only if the number of distinct keys is very small, - * and the ratio of key size to value size is also very small. */ -class CoGroupedRDD[K]( - @transient var rdds: Seq[RDD[(K, _)]], - part: Partitioner, - val mapSideCombine: Boolean = false, - val serializerClass: String = null) +class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner) extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) { - private val aggr = new CoGroupAggregator + private var serializerClass: String = null + + def setSerializer(cls: String): CoGroupedRDD[K] = { + serializerClass = cls + this + } override def getDependencies: Seq[Dependency[_]] = { - rdds.map { rdd => + rdds.map { rdd: RDD[_ <: Product2[K, _]] => if (rdd.partitioner == Some(part)) { - logInfo("Adding one-to-one dependency with " + rdd) + logDebug("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { - logInfo("Adding shuffle dependency with " + rdd) - if (mapSideCombine) { - val mapSideCombinedRDD = rdd.mapPartitions(aggr.combineValuesByKey(_), true) - new ShuffleDependency[Any, ArrayBuffer[Any]](mapSideCombinedRDD, part, serializerClass) - } else { - new ShuffleDependency[Any, Any](rdd.asInstanceOf[RDD[(Any, Any)]], part, serializerClass) - } + logDebug("Adding shuffle dependency with " + rdd) + new ShuffleDependency[Any, Any](rdd, part, serializerClass) } } } @@ -138,23 +122,15 @@ class CoGroupedRDD[K]( for ((dep, depNum) <- split.deps.zipWithIndex) dep match { case NarrowCoGroupSplitDep(rdd, _, itsSplit) => { // Read them from the parent - for ((k, v) <- rdd.iterator(itsSplit, context)) { - getSeq(k.asInstanceOf[K])(depNum) += v + rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]].foreach { kv => + getSeq(kv._1)(depNum) += kv._2 } } case ShuffleCoGroupSplitDep(shuffleId) => { // Read map outputs of shuffle val fetcher = SparkEnv.get.shuffleFetcher - if (mapSideCombine) { - // With map side combine on, for each key, the shuffle fetcher returns a list of values. - fetcher.fetch[K, Seq[Any]](shuffleId, split.index, context.taskMetrics, ser).foreach { - case (key, values) => getSeq(key)(depNum) ++= values - } - } else { - // With map side combine off, for each key the shuffle fetcher returns a single value. - fetcher.fetch[K, Any](shuffleId, split.index, context.taskMetrics, ser).foreach { - case (key, value) => getSeq(key)(depNum) += value - } + fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context.taskMetrics, ser).foreach { + kv => getSeq(kv._1)(depNum) += kv._2 } } } diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala new file mode 100644 index 0000000000..c5de6362a9 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import org.apache.spark._ +import java.io.{ObjectOutputStream, IOException} +import scala.collection.mutable +import scala.Some +import scala.collection.mutable.ArrayBuffer + +/** + * Class that captures a coalesced RDD by essentially keeping track of parent partitions + * @param index of this coalesced partition + * @param rdd which it belongs to + * @param parentsIndices list of indices in the parent that have been coalesced into this partition + * @param preferredLocation the preferred location for this partition + */ +case class CoalescedRDDPartition( + index: Int, + @transient rdd: RDD[_], + parentsIndices: Array[Int], + @transient preferredLocation: String = "" + ) extends Partition { + var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_)) + + @throws(classOf[IOException]) + private def writeObject(oos: ObjectOutputStream) { + // Update the reference to parent partition at the time of task serialization + parents = parentsIndices.map(rdd.partitions(_)) + oos.defaultWriteObject() + } + + /** + * Computes how many of the parents partitions have getPreferredLocation + * as one of their preferredLocations + * @return locality of this coalesced partition between 0 and 1 + */ + def localFraction: Double = { + val loc = parents.count(p => + rdd.context.getPreferredLocs(rdd, p.index).map(tl => tl.host).contains(preferredLocation)) + + if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) + } +} + +/** + * Represents a coalesced RDD that has fewer partitions than its parent RDD + * This class uses the PartitionCoalescer class to find a good partitioning of the parent RDD + * so that each new partition has roughly the same number of parent partitions and that + * the preferred location of each new partition overlaps with as many preferred locations of its + * parent partitions + * @param prev RDD to be coalesced + * @param maxPartitions number of desired partitions in the coalesced RDD + * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance + */ +class CoalescedRDD[T: ClassManifest]( + @transient var prev: RDD[T], + maxPartitions: Int, + balanceSlack: Double = 0.10) + extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies + + override def getPartitions: Array[Partition] = { + val pc = new PartitionCoalescer(maxPartitions, prev, balanceSlack) + + pc.run().zipWithIndex.map { + case (pg, i) => + val ids = pg.arr.map(_.index).toArray + new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) + } + } + + override def compute(partition: Partition, context: TaskContext): Iterator[T] = { + partition.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentPartition => + firstParent[T].iterator(parentPartition, context) + } + } + + override def getDependencies: Seq[Dependency[_]] = { + Seq(new NarrowDependency(prev) { + def getParents(id: Int): Seq[Int] = + partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices + }) + } + + override def clearDependencies() { + super.clearDependencies() + prev = null + } + + /** + * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition, + * then the preferred machine will be one which most parent splits prefer too. + * @param partition + * @return the machine most preferred by split + */ + override def getPreferredLocations(partition: Partition): Seq[String] = { + List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation) + } +} + +/** + * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of + * this RDD computes one or more of the parent ones. It will produce exactly `maxPartitions` if the + * parent had more than maxPartitions, or fewer if the parent had fewer. + * + * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, + * or to avoid having a large number of small tasks when processing a directory with many files. + * + * If there is no locality information (no preferredLocations) in the parent, then the coalescing + * is very simple: chunk parents that are close in the Array in chunks. + * If there is locality information, it proceeds to pack them with the following four goals: + * + * (1) Balance the groups so they roughly have the same number of parent partitions + * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer + * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard) + * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine + * + * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000. + * We assume the final number of desired partitions is small, e.g. less than 1000. + * + * The algorithm tries to assign unique preferred machines to each partition. If the number of + * desired partitions is greater than the number of preferred machines (can happen), it needs to + * start picking duplicate preferred machines. This is determined using coupon collector estimation + * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist: + * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two + * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions + * according to locality. (contact alig for questions) + * + */ + +private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { + + def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size + def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = + if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) + + val rnd = new scala.util.Random(7919) // keep this class deterministic + + // each element of groupArr represents one coalesced partition + val groupArr = ArrayBuffer[PartitionGroup]() + + // hash used to check whether some machine is already in groupArr + val groupHash = mutable.Map[String, ArrayBuffer[PartitionGroup]]() + + // hash used for the first maxPartitions (to avoid duplicates) + val initialHash = mutable.Set[Partition]() + + // determines the tradeoff between load-balancing the partitions sizes and their locality + // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality + val slack = (balanceSlack * prev.partitions.size).toInt + + var noLocality = true // if true if no preferredLocations exists for parent RDD + + // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) + def currPrefLocs(part: Partition): Seq[String] = { + prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host) + } + + // this class just keeps iterating and rotating infinitely over the partitions of the RDD + // next() returns the next preferred machine that a partition is replicated on + // the rotator first goes through the first replica copy of each partition, then second, third + // the iterators return type is a tuple: (replicaString, partition) + class LocationIterator(prev: RDD[_]) extends Iterator[(String, Partition)] { + + var it: Iterator[(String, Partition)] = resetIterator() + + override val isEmpty = !it.hasNext + + // initializes/resets to start iterating from the beginning + def resetIterator() = { + val iterators = (0 to 2).map( x => + prev.partitions.iterator.flatMap(p => { + if (currPrefLocs(p).size > x) Some((currPrefLocs(p)(x), p)) else None + } ) + ) + iterators.reduceLeft((x, y) => x ++ y) + } + + // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD + def hasNext(): Boolean = { !isEmpty } + + // return the next preferredLocation of some partition of the RDD + def next(): (String, Partition) = { + if (it.hasNext) + it.next() + else { + it = resetIterator() // ran out of preferred locations, reset and rotate to the beginning + it.next() + } + } + } + + /** + * Sorts and gets the least element of the list associated with key in groupHash + * The returned PartitionGroup is the least loaded of all groups that represent the machine "key" + * @param key string representing a partitioned group on preferred machine key + * @return Option of PartitionGroup that has least elements for key + */ + def getLeastGroupHash(key: String): Option[PartitionGroup] = { + groupHash.get(key).map(_.sortWith(compare).head) + } + + def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = { + if (!initialHash.contains(part)) { + pgroup.arr += part // already assign this element + initialHash += part // needed to avoid assigning partitions to multiple buckets + true + } else { false } + } + + /** + * Initializes targetLen partition groups and assigns a preferredLocation + * This uses coupon collector to estimate how many preferredLocations it must rotate through + * until it has seen most of the preferred locations (2 * n log(n)) + * @param targetLen + */ + def setupGroups(targetLen: Int) { + val rotIt = new LocationIterator(prev) + + // deal with empty case, just create targetLen partition groups with no preferred location + if (!rotIt.hasNext()) { + (1 to targetLen).foreach(x => groupArr += PartitionGroup()) + return + } + + noLocality = false + + // number of iterations needed to be certain that we've seen most preferred locations + val expectedCoupons2 = 2 * (math.log(targetLen)*targetLen + targetLen + 0.5).toInt + var numCreated = 0 + var tries = 0 + + // rotate through until either targetLen unique/distinct preferred locations have been created + // OR we've rotated expectedCoupons2, in which case we have likely seen all preferred locations, + // i.e. likely targetLen >> number of preferred locations (more buckets than there are machines) + while (numCreated < targetLen && tries < expectedCoupons2) { + tries += 1 + val (nxt_replica, nxt_part) = rotIt.next() + if (!groupHash.contains(nxt_replica)) { + val pgroup = PartitionGroup(nxt_replica) + groupArr += pgroup + addPartToPGroup(nxt_part, pgroup) + groupHash += (nxt_replica -> (ArrayBuffer(pgroup))) // list in case we have multiple + numCreated += 1 + } + } + + while (numCreated < targetLen) { // if we don't have enough partition groups, create duplicates + var (nxt_replica, nxt_part) = rotIt.next() + val pgroup = PartitionGroup(nxt_replica) + groupArr += pgroup + groupHash.get(nxt_replica).get += pgroup + var tries = 0 + while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure at least one part + nxt_part = rotIt.next()._2 + tries += 1 + } + numCreated += 1 + } + + } + + /** + * Takes a parent RDD partition and decides which of the partition groups to put it in + * Takes locality into account, but also uses power of 2 choices to load balance + * It strikes a balance between the two use the balanceSlack variable + * @param p partition (ball to be thrown) + * @return partition group (bin to be put in) + */ + def pickBin(p: Partition): PartitionGroup = { + val pref = currPrefLocs(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded pref locs + val prefPart = if (pref == Nil) None else pref.head + + val r1 = rnd.nextInt(groupArr.size) + val r2 = rnd.nextInt(groupArr.size) + val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2) + if (prefPart== None) // if no preferred locations, just use basic power of two + return minPowerOfTwo + + val prefPartActual = prefPart.get + + if (minPowerOfTwo.size + slack <= prefPartActual.size) // more imbalance than the slack allows + return minPowerOfTwo // prefer balance over locality + else { + return prefPartActual // prefer locality over balance + } + } + + def throwBalls() { + if (noLocality) { // no preferredLocations in parent RDD, no randomization needed + if (maxPartitions > groupArr.size) { // just return prev.partitions + for ((p,i) <- prev.partitions.zipWithIndex) { + groupArr(i).arr += p + } + } else { // no locality available, then simply split partitions based on positions in array + for(i <- 0 until maxPartitions) { + val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt + val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt + (rangeStart until rangeEnd).foreach{ j => groupArr(i).arr += prev.partitions(j) } + } + } + } else { + for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into group + pickBin(p).arr += p + } + } + } + + def getPartitions: Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + + /** + * Runs the packing algorithm and returns an array of PartitionGroups that if possible are + * load balanced and grouped by locality + * @return array of partition groups + */ + def run(): Array[PartitionGroup] = { + setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) + throwBalls() // assign partitions (balls) to each group (bins) + getPartitions + } +} + +private[spark] case class PartitionGroup(prefLoc: String = "") { + var arr = mutable.ArrayBuffer[Partition]() + + def size = arr.size +} diff --git a/core/src/main/scala/spark/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala index 93ef097702..a4bec41752 100644 --- a/core/src/main/scala/spark/DoubleRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala @@ -15,17 +15,18 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd -import spark.partial.BoundedDouble -import spark.partial.MeanEvaluator -import spark.partial.PartialResult -import spark.partial.SumEvaluator -import spark.util.StatCounter +import org.apache.spark.partial.BoundedDouble +import org.apache.spark.partial.MeanEvaluator +import org.apache.spark.partial.PartialResult +import org.apache.spark.partial.SumEvaluator +import org.apache.spark.util.StatCounter +import org.apache.spark.{TaskContext, Logging} /** * Extra functions available on RDDs of Doubles through an implicit conversion. - * Import `spark.SparkContext._` at the top of your program to use these functions. + * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions. */ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { /** Add up the elements in this RDD. */ @@ -34,7 +35,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { } /** - * Return a [[spark.util.StatCounter]] object that captures the mean, variance and count + * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count * of the RDD's elements in one operation. */ def stats(): StatCounter = { @@ -54,7 +55,13 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { * Compute the sample standard deviation of this RDD's elements (which corrects for bias in * estimating the standard deviation by dividing by N-1 instead of N). */ - def sampleStdev(): Double = stats().stdev + def sampleStdev(): Double = stats().sampleStdev + + /** + * Compute the sample variance of this RDD's elements (which corrects for bias in + * estimating the variance by dividing by N-1 instead of N). + */ + def sampleVariance(): Double = stats().sampleVariance /** (Experimental) Approximate operation to return the mean within a timeout. */ def meanApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = { diff --git a/core/src/main/scala/spark/rdd/EmptyRDD.scala b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala index d7d4db5d30..c8900d1a93 100644 --- a/core/src/main/scala/spark/rdd/EmptyRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext} +import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext} /** diff --git a/core/src/main/scala/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala index 783508cfd1..5312dc0b59 100644 --- a/core/src/main/scala/spark/rdd/FilteredRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{OneToOneDependency, RDD, Partition, TaskContext} +import org.apache.spark.{OneToOneDependency, Partition, TaskContext} private[spark] class FilteredRDD[T: ClassManifest]( prev: RDD[T], diff --git a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala index ed75eac3ff..cbdf6d84c0 100644 --- a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} private[spark] diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala index 0f972b7a0b..82000bac09 100644 --- a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,15 +15,22 @@ * limitations under the License. */ -package org.apache.hadoop.mapred +package org.apache.spark.rdd + +import org.apache.spark.{TaskContext, Partition} + -import org.apache.hadoop.mapreduce.TaskType +private[spark] +class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U]) + extends RDD[(K, U)](prev) { -trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) + override def getPartitions = firstParent[Product2[K, V]].partitions - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) + override val partitioner = firstParent[Product2[K, V]].partitioner - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = - new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) + override def compute(split: Partition, context: TaskContext) = { + firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) => + f(v).map(x => (k, x)) + } + } } diff --git a/core/src/main/scala/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala index 1573f8a289..829545d7b0 100644 --- a/core/src/main/scala/spark/rdd/GlommedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} private[spark] class GlommedRDD[T: ClassManifest](prev: RDD[T]) extends RDD[Array[T]](prev) { diff --git a/core/src/main/scala/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index d0fdeb741e..2cb6734e41 100644 --- a/core/src/main/scala/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -15,27 +15,20 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.io.EOFException -import java.util.NoSuchElementException -import org.apache.hadoop.io.LongWritable -import org.apache.hadoop.io.NullWritable -import org.apache.hadoop.io.Text -import org.apache.hadoop.mapred.FileInputFormat import org.apache.hadoop.mapred.InputFormat import org.apache.hadoop.mapred.InputSplit import org.apache.hadoop.mapred.JobConf -import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.mapred.RecordReader import org.apache.hadoop.mapred.Reporter import org.apache.hadoop.util.ReflectionUtils -import spark.deploy.SparkHadoopUtil -import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext} -import spark.util.NextIterator -import org.apache.hadoop.conf.Configurable +import org.apache.spark.{Logging, Partition, SerializableWritable, SparkContext, SparkEnv, TaskContext} +import org.apache.spark.util.NextIterator +import org.apache.hadoop.conf.{Configuration, Configurable} /** @@ -68,7 +61,8 @@ class HadoopRDD[K, V]( private val confBroadcast = sc.broadcast(new SerializableWritable(conf)) override def getPartitions: Array[Partition] = { - SparkHadoopUtil.addCredentials(conf); + val env = SparkEnv.get + env.hadoop.addCredentials(conf) val inputFormat = createInputFormat(conf) if (inputFormat.isInstanceOf[Configurable]) { inputFormat.asInstanceOf[Configurable].setConf(conf) @@ -88,6 +82,7 @@ class HadoopRDD[K, V]( override def compute(theSplit: Partition, context: TaskContext) = new NextIterator[(K, V)] { val split = theSplit.asInstanceOf[HadoopPartition] + logInfo("Input split: " + split.inputSplit) var reader: RecordReader[K, V] = null val conf = confBroadcast.value.value @@ -131,4 +126,6 @@ class HadoopRDD[K, V]( override def checkpoint() { // Do nothing. Hadoop RDD should not be checkpointed. } + + def getConf: Configuration = confBroadcast.value.value } diff --git a/core/src/main/scala/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala index 59132437d2..aca0146884 100644 --- a/core/src/main/scala/spark/rdd/JdbcRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.sql.{Connection, ResultSet} -import spark.{Logging, Partition, RDD, SparkContext, TaskContext} -import spark.util.NextIterator +import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} +import org.apache.spark.util.NextIterator private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition { override def index = idx diff --git a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala index af8f0a112f..203179c4ea 100644 --- a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} private[spark] diff --git a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala index 3b4e9518fd..3ed8339010 100644 --- a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} /** diff --git a/core/src/main/scala/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala index 8b411dd85d..e8be1c4816 100644 --- a/core/src/main/scala/spark/rdd/MappedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} private[spark] class MappedRDD[U: ClassManifest, T: ClassManifest](prev: RDD[T], f: T => U) diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala index b1002e0cac..d33c1af581 100644 --- a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala @@ -15,15 +15,20 @@ * limitations under the License. */ -package org.apache.hadoop.mapreduce +package org.apache.spark.rdd -import org.apache.hadoop.conf.Configuration -trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContext(conf, jobId) +import org.apache.spark.{TaskContext, Partition} - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) +private[spark] +class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U) + extends RDD[(K, U)](prev) { - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) + override def getPartitions = firstParent[Product2[K, U]].partitions + + override val partitioner = firstParent[Product2[K, U]].partitioner + + override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = { + firstParent[Product2[K, V]].iterator(split, context).map { case Product2(k ,v) => (k, f(v)) } + } } diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 17fe805fd4..7b3a89f7e0 100644 --- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.text.SimpleDateFormat import java.util.Date @@ -24,7 +24,7 @@ import org.apache.hadoop.conf.{Configurable, Configuration} import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce._ -import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext} +import org.apache.spark.{Dependency, Logging, Partition, SerializableWritable, SparkContext, TaskContext} private[spark] @@ -43,7 +43,7 @@ class NewHadoopRDD[K, V]( valueClass: Class[V], @transient conf: Configuration) extends RDD[(K, V)](sc, Nil) - with HadoopMapReduceUtil + with SparkHadoopMapReduceUtil with Logging { // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it @@ -73,6 +73,7 @@ class NewHadoopRDD[K, V]( override def compute(theSplit: Partition, context: TaskContext) = new Iterator[(K, V)] { val split = theSplit.asInstanceOf[NewHadoopPartition] + logInfo("Input split: " + split.serializableHadoopSplit) val conf = confBroadcast.value.value val attemptId = newTaskAttemptID(jobtrackerId, id, true, split.index, 0) val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) @@ -119,4 +120,7 @@ class NewHadoopRDD[K, V]( val theSplit = split.asInstanceOf[NewHadoopPartition] theSplit.serializableHadoopSplit.value.getLocations.filter(_ != "localhost") } + + def getConf: Configuration = confBroadcast.value.value } + diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala new file mode 100644 index 0000000000..697be8b997 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import org.apache.spark.{RangePartitioner, Logging} + +/** + * Extra functions available on RDDs of (key, value) pairs where the key is sortable through + * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to + * use these functions. They will work with any key type that has a `scala.math.Ordered` + * implementation. + */ +class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, + V: ClassManifest, + P <: Product2[K, V] : ClassManifest]( + self: RDD[P]) + extends Logging with Serializable { + + /** + * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling + * `collect` or `save` on the resulting RDD will return or output an ordered list of records + * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in + * order of the keys). + */ + def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[P] = { + val part = new RangePartitioner(numPartitions, self, ascending) + val shuffled = new ShuffledRDD[K, V, P](self, part) + shuffled.mapPartitions(iter => { + val buf = iter.toArray + if (ascending) { + buf.sortWith((x, y) => x._1 < y._1).iterator + } else { + buf.sortWith((x, y) => x._1 > y._1).iterator + } + }, preservesPartitioning = true) + } +} diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index 6b0cc2fbf1..a47c512275 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -15,45 +15,44 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import java.nio.ByteBuffer -import java.util.{Date, HashMap => JHashMap} +import java.util.Date import java.text.SimpleDateFormat +import java.util.{HashMap => JHashMap} -import scala.collection.Map +import scala.collection.{mutable, Map} import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashMap import scala.collection.JavaConversions._ +import org.apache.hadoop.mapred._ +import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.io.SequenceFile.CompressionType -import org.apache.hadoop.mapred.FileOutputCommitter import org.apache.hadoop.mapred.FileOutputFormat -import org.apache.hadoop.mapred.HadoopWriter -import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.OutputFormat - +import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat} -import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, HadoopMapReduceUtil} -import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil +import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob} +import org.apache.hadoop.mapreduce.{RecordWriter => NewRecordWriter} -import spark.partial.BoundedDouble -import spark.partial.PartialResult -import spark.rdd._ -import spark.SparkContext._ -import spark.Partitioner._ +import org.apache.spark._ +import org.apache.spark.SparkContext._ +import org.apache.spark.partial.{BoundedDouble, PartialResult} +import org.apache.spark.Aggregator +import org.apache.spark.Partitioner +import org.apache.spark.Partitioner.defaultPartitioner /** * Extra functions available on RDDs of (key, value) pairs through an implicit conversion. - * Import `spark.SparkContext._` at the top of your program to use these functions. + * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions. */ -class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( - self: RDD[(K, V)]) +class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)]) extends Logging - with HadoopMapReduceUtil + with SparkHadoopMapReduceUtil with Serializable { /** @@ -85,17 +84,18 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( } val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners) if (self.partitioner == Some(partitioner)) { - self.mapPartitions(aggregator.combineValuesByKey(_), true) + self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } else if (mapSideCombine) { - val mapSideCombined = self.mapPartitions(aggregator.combineValuesByKey(_), true) - val partitioned = new ShuffledRDD[K, C](mapSideCombined, partitioner, serializerClass) - partitioned.mapPartitions(aggregator.combineCombinersByKey(_), true) + val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) + val partitioned = new ShuffledRDD[K, C, (K, C)](combined, partitioner) + .setSerializer(serializerClass) + partitioned.mapPartitions(aggregator.combineCombinersByKey, preservesPartitioning = true) } else { // Don't apply map-side combiner. // A sanity check to make sure mergeCombiners is not defined. assert(mergeCombiners == null) - val values = new ShuffledRDD[K, V](self, partitioner, serializerClass) - values.mapPartitions(aggregator.combineValuesByKey(_), true) + val values = new ShuffledRDD[K, V, (K, V)](self, partitioner).setSerializer(serializerClass) + values.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } } @@ -167,7 +167,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = { val map = new JHashMap[K, V] - for ((k, v) <- iter) { + iter.foreach { case (k, v) => val old = map.get(k) map.put(k, if (old == null) v else func(old, v)) } @@ -175,11 +175,11 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( } def mergeMaps(m1: JHashMap[K, V], m2: JHashMap[K, V]): JHashMap[K, V] = { - for ((k, v) <- m2) { + m2.foreach { case (k, v) => val old = m1.get(k) m1.put(k, if (old == null) v else func(old, v)) } - return m1 + m1 } self.mapPartitions(reducePartition).reduce(mergeMaps) @@ -233,31 +233,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( } /** - * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine` - * is true, Spark will group values of the same key together on the map side before the - * repartitioning, to only send each key over the network once. If a large number of - * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should - * be set to true. + * Return a copy of the RDD partitioned using the specified partitioner. */ - def partitionBy(partitioner: Partitioner, mapSideCombine: Boolean = false): RDD[(K, V)] = { - if (getKeyClass().isArray) { - if (mapSideCombine) { - throw new SparkException("Cannot use map-side combining with array keys.") - } - if (partitioner.isInstanceOf[HashPartitioner]) { - throw new SparkException("Default partitioner cannot partition array keys.") - } - } - if (mapSideCombine) { - def createCombiner(v: V) = ArrayBuffer(v) - def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v - def mergeCombiners(b1: ArrayBuffer[V], b2: ArrayBuffer[V]) = b1 ++= b2 - val bufs = combineByKey[ArrayBuffer[V]]( - createCombiner _, mergeValue _, mergeCombiners _, partitioner) - bufs.flatMapValues(buf => buf) - } else { - new ShuffledRDD[K, V](self, partitioner) + def partitionBy(partitioner: Partitioner): RDD[(K, V)] = { + if (getKeyClass().isArray && partitioner.isInstanceOf[HashPartitioner]) { + throw new SparkException("Default partitioner cannot partition array keys.") } + new ShuffledRDD[K, V, (K, V)](self, partitioner) } /** @@ -266,9 +248,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD. */ def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - for (v <- vs.iterator; w <- ws.iterator) yield (v, w) + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + for (v <- vs.iterator; w <- ws.iterator) yield (v, w) } } @@ -279,13 +260,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( * partition the output RDD. */ def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - if (ws.isEmpty) { - vs.iterator.map(v => (v, None)) - } else { - for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w)) - } + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + if (ws.isEmpty) { + vs.iterator.map(v => (v, None)) + } else { + for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w)) + } } } @@ -297,13 +277,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( */ def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner) : RDD[(K, (Option[V], W))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - if (vs.isEmpty) { - ws.iterator.map(w => (None, w)) - } else { - for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w) - } + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + if (vs.isEmpty) { + ws.iterator.map(w => (None, w)) + } else { + for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w) + } } } @@ -395,7 +374,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( /** * Return the key-value pairs in this RDD to the master as a Map. */ - def collectAsMap(): Map[K, V] = HashMap(self.collect(): _*) + def collectAsMap(): Map[K, V] = { + val data = self.toArray() + val map = new mutable.HashMap[K, V] + map.sizeHint(data.length) + data.foreach { case (k, v) => map.put(k, v) } + map + } /** * Pass each value in the key-value pair RDD through a map function without changing the keys; @@ -423,13 +408,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } - val cg = new CoGroupedRDD[K]( - Seq(self.asInstanceOf[RDD[(K, _)]], other.asInstanceOf[RDD[(K, _)]]), - partitioner) + val cg = new CoGroupedRDD[K](Seq(self, other), partitioner) val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest) - prfs.mapValues { - case Seq(vs, ws) => - (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]]) + prfs.mapValues { case Seq(vs, ws) => + (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]]) } } @@ -442,15 +424,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } - val cg = new CoGroupedRDD[K]( - Seq(self.asInstanceOf[RDD[(K, _)]], - other1.asInstanceOf[RDD[(K, _)]], - other2.asInstanceOf[RDD[(K, _)]]), - partitioner) + val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner) val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest) - prfs.mapValues { - case Seq(vs, w1s, w2s) => - (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]]) + prfs.mapValues { case Seq(vs, w1s, w2s) => + (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]]) } } @@ -582,7 +559,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) val stageId = self.id - def writeShard(context: spark.TaskContext, iter: Iterator[(K,V)]): Int = { + def writeShard(context: TaskContext, iter: Iterator[(K,V)]): Int = { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt @@ -594,7 +571,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( committer.setupTask(hadoopContext) val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]] while (iter.hasNext) { - val (k, v) = iter.next + val (k, v) = iter.next() writer.write(k, v) } writer.close(hadoopContext) @@ -652,7 +629,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( conf.set("mapred.output.compression.type", CompressionType.BLOCK.toString) } conf.setOutputCommitter(classOf[FileOutputCommitter]) - FileOutputFormat.setOutputPath(conf, HadoopWriter.createPathFromString(path, conf)) + FileOutputFormat.setOutputPath(conf, SparkHadoopWriter.createPathFromString(path, conf)) saveAsHadoopDataset(conf) } @@ -678,10 +655,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( logInfo("Saving as hadoop file of type (" + keyClass.getSimpleName+ ", " + valueClass.getSimpleName+ ")") - val writer = new HadoopWriter(conf) + val writer = new SparkHadoopWriter(conf) writer.preSetup() - def writeToFile(context: TaskContext, iter: Iterator[(K,V)]) { + def writeToFile(context: TaskContext, iter: Iterator[(K, V)]) { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt @@ -720,55 +697,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure } -/** - * Extra functions available on RDDs of (key, value) pairs where the key is sortable through - * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these - * functions. They will work with any key type that has a `scala.math.Ordered` implementation. - */ -class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( - self: RDD[(K, V)]) - extends Logging - with Serializable { - - /** - * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling - * `collect` or `save` on the resulting RDD will return or output an ordered list of records - * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in - * order of the keys). - */ - def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[(K,V)] = { - val shuffled = - new ShuffledRDD[K, V](self, new RangePartitioner(numPartitions, self, ascending)) - shuffled.mapPartitions(iter => { - val buf = iter.toArray - if (ascending) { - buf.sortWith((x, y) => x._1 < y._1).iterator - } else { - buf.sortWith((x, y) => x._1 > y._1).iterator - } - }, true) - } -} - -private[spark] -class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U) extends RDD[(K, U)](prev) { - override def getPartitions = firstParent[(K, V)].partitions - override val partitioner = firstParent[(K, V)].partitioner - override def compute(split: Partition, context: TaskContext) = - firstParent[(K, V)].iterator(split, context).map{ case (k, v) => (k, f(v)) } -} - -private[spark] -class FlatMappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => TraversableOnce[U]) - extends RDD[(K, U)](prev) { - - override def getPartitions = firstParent[(K, V)].partitions - override val partitioner = firstParent[(K, V)].partitioner - override def compute(split: Partition, context: TaskContext) = { - firstParent[(K, V)].iterator(split, context).flatMap { case (k, v) => f(v).map(x => (k, x)) } - } -} - private[spark] object Manifests { val seqSeqManifest = classManifest[Seq[Seq[_]]] } diff --git a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala index 16ba0c26f8..6dbd4309aa 100644 --- a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala @@ -15,18 +15,22 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import scala.collection.immutable.NumericRange import scala.collection.mutable.ArrayBuffer import scala.collection.Map -import spark.{RDD, TaskContext, SparkContext, Partition} +import org.apache.spark._ +import java.io._ +import scala.Serializable +import org.apache.spark.serializer.JavaSerializer +import org.apache.spark.util.Utils private[spark] class ParallelCollectionPartition[T: ClassManifest]( - val rddId: Long, - val slice: Int, - values: Seq[T]) - extends Partition with Serializable { + var rddId: Long, + var slice: Int, + var values: Seq[T]) + extends Partition with Serializable { def iterator: Iterator[T] = values.iterator @@ -37,15 +41,49 @@ private[spark] class ParallelCollectionPartition[T: ClassManifest]( case _ => false } - override val index: Int = slice + override def index: Int = slice + + @throws(classOf[IOException]) + private def writeObject(out: ObjectOutputStream): Unit = { + + val sfactory = SparkEnv.get.serializer + + // Treat java serializer with default action rather than going thru serialization, to avoid a + // separate serialization header. + + sfactory match { + case js: JavaSerializer => out.defaultWriteObject() + case _ => + out.writeLong(rddId) + out.writeInt(slice) + + val ser = sfactory.newInstance() + Utils.serializeViaNestedStream(out, ser)(_.writeObject(values)) + } + } + + @throws(classOf[IOException]) + private def readObject(in: ObjectInputStream): Unit = { + + val sfactory = SparkEnv.get.serializer + sfactory match { + case js: JavaSerializer => in.defaultReadObject() + case _ => + rddId = in.readLong() + slice = in.readInt() + + val ser = sfactory.newInstance() + Utils.deserializeViaNestedStream(in, ser)(ds => values = ds.readObject()) + } + } } private[spark] class ParallelCollectionRDD[T: ClassManifest]( @transient sc: SparkContext, @transient data: Seq[T], numSlices: Int, - locationPrefs: Map[Int,Seq[String]]) - extends RDD[T](sc, Nil) { + locationPrefs: Map[Int, Seq[String]]) + extends RDD[T](sc, Nil) { // TODO: Right now, each split sends along its full data, even if later down the RDD chain it gets // cached. It might be worthwhile to write the data to a file in the DFS and read it in the split // instead. @@ -82,16 +120,17 @@ private object ParallelCollectionRDD { 1 } slice(new Range( - r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices) + r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices) } case r: Range => { (0 until numSlices).map(i => { val start = ((i * r.length.toLong) / numSlices).toInt - val end = (((i+1) * r.length.toLong) / numSlices).toInt + val end = (((i + 1) * r.length.toLong) / numSlices).toInt new Range(r.start + start * r.step, r.start + end * r.step, r.step) }).asInstanceOf[Seq[Seq[T]]] } - case nr: NumericRange[_] => { // For ranges of Long, Double, BigInteger, etc + case nr: NumericRange[_] => { + // For ranges of Long, Double, BigInteger, etc val slices = new ArrayBuffer[Seq[T]](numSlices) val sliceSize = (nr.size + numSlices - 1) / numSlices // Round up to catch everything var r = nr @@ -102,10 +141,10 @@ private object ParallelCollectionRDD { slices } case _ => { - val array = seq.toArray // To prevent O(n^2) operations for List etc + val array = seq.toArray // To prevent O(n^2) operations for List etc (0 until numSlices).map(i => { val start = ((i * array.length.toLong) / numSlices).toInt - val end = (((i+1) * array.length.toLong) / numSlices).toInt + val end = (((i + 1) * array.length.toLong) / numSlices).toInt array.slice(start, end).toSeq }) } diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala index 191cfde565..165cd412fc 100644 --- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{NarrowDependency, RDD, SparkEnv, Partition, TaskContext} +import org.apache.spark.{NarrowDependency, SparkEnv, Partition, TaskContext} class PartitionPruningRDDPartition(idx: Int, val parentSplit: Partition) extends Partition { @@ -33,8 +33,9 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo extends NarrowDependency[T](rdd) { @transient - val partitions: Array[Partition] = rdd.partitions.filter(s => partitionFilterFunc(s.index)) - .zipWithIndex.map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition } + val partitions: Array[Partition] = rdd.partitions.zipWithIndex + .filter(s => partitionFilterFunc(s._2)) + .map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition } override def getParents(partitionId: Int) = List(partitions(partitionId).index) } diff --git a/core/src/main/scala/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala index 2cefdc78b0..d5304ab0ae 100644 --- a/core/src/main/scala/spark/rdd/PipedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.io.PrintWriter import java.util.StringTokenizer @@ -25,8 +25,8 @@ import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer import scala.io.Source -import spark.{RDD, SparkEnv, Partition, TaskContext} -import spark.broadcast.Broadcast +import org.apache.spark.{SparkEnv, Partition, TaskContext} +import org.apache.spark.broadcast.Broadcast /** diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index ca7cdd622a..e143ecd096 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import java.util.Random @@ -31,43 +31,28 @@ import org.apache.hadoop.mapred.TextOutputFormat import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap} -import spark.broadcast.Broadcast -import spark.Partitioner._ -import spark.partial.BoundedDouble -import spark.partial.CountEvaluator -import spark.partial.GroupedCountEvaluator -import spark.partial.PartialResult -import spark.rdd.CoalescedRDD -import spark.rdd.CartesianRDD -import spark.rdd.FilteredRDD -import spark.rdd.FlatMappedRDD -import spark.rdd.GlommedRDD -import spark.rdd.MappedRDD -import spark.rdd.MapPartitionsRDD -import spark.rdd.MapPartitionsWithIndexRDD -import spark.rdd.PipedRDD -import spark.rdd.SampledRDD -import spark.rdd.ShuffledRDD -import spark.rdd.UnionRDD -import spark.rdd.ZippedRDD -import spark.rdd.ZippedPartitionsRDD2 -import spark.rdd.ZippedPartitionsRDD3 -import spark.rdd.ZippedPartitionsRDD4 -import spark.storage.StorageLevel -import spark.util.BoundedPriorityQueue - -import SparkContext._ +import org.apache.spark.Partitioner._ +import org.apache.spark.api.java.JavaRDD +import org.apache.spark.partial.BoundedDouble +import org.apache.spark.partial.CountEvaluator +import org.apache.spark.partial.GroupedCountEvaluator +import org.apache.spark.partial.PartialResult +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.{Utils, BoundedPriorityQueue} + +import org.apache.spark.SparkContext._ +import org.apache.spark._ /** * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable, * partitioned collection of elements that can be operated on in parallel. This class contains the * basic operations available on all RDDs, such as `map`, `filter`, and `persist`. In addition, - * [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value pairs, such - * as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations available only on - * RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations available on RDDs - * that can be saved as SequenceFiles. These operations are automatically available on any RDD of - * the right type (e.g. RDD[(Int, Int)] through implicit conversions when you - * `import spark.SparkContext._`. + * [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs of key-value + * pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]] contains + * operations available only on RDDs of Doubles; and [[org.apache.spark.rdd.SequenceFileRDDFunctions]] + * contains operations available on RDDs that can be saved as SequenceFiles. These operations are + * automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit + * conversions when you `import org.apache.spark.SparkContext._`. * * Internally, each RDD is characterized by five main properties: * @@ -220,8 +205,8 @@ abstract class RDD[T: ClassManifest]( } /** - * Get the preferred location of a split, taking into account whether the - * RDD is checkpointed or not. + * Get the preferred locations of a partition (as hostnames), taking into account whether the + * RDD is checkpointed. */ final def preferredLocations(split: Partition): Seq[String] = { checkpointRDD.map(_.getPreferredLocations(split)).getOrElse { @@ -286,7 +271,10 @@ abstract class RDD[T: ClassManifest]( def coalesce(numPartitions: Int, shuffle: Boolean = false): RDD[T] = { if (shuffle) { // include a shuffle step so that our upstream tasks are still distributed - new CoalescedRDD(new ShuffledRDD(map(x => (x, null)), new HashPartitioner(numPartitions)), numPartitions).keys + new CoalescedRDD( + new ShuffledRDD[T, Null, (T, Null)](map(x => (x, null)), + new HashPartitioner(numPartitions)), + numPartitions).keys } else { new CoalescedRDD(this, numPartitions) } @@ -301,8 +289,8 @@ abstract class RDD[T: ClassManifest]( def takeSample(withReplacement: Boolean, num: Int, seed: Int): Array[T] = { var fraction = 0.0 var total = 0 - var multiplier = 3.0 - var initialCount = this.count() + val multiplier = 3.0 + val initialCount = this.count() var maxSelected = 0 if (num < 0) { @@ -514,22 +502,19 @@ abstract class RDD[T: ClassManifest]( * *same number of partitions*, but does *not* require them to have the same number * of elements in each partition. */ - def zipPartitions[B: ClassManifest, V: ClassManifest]( - f: (Iterator[T], Iterator[B]) => Iterator[V], - rdd2: RDD[B]): RDD[V] = + def zipPartitions[B: ClassManifest, V: ClassManifest] + (rdd2: RDD[B]) + (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] = new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2) - def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest]( - f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V], - rdd2: RDD[B], - rdd3: RDD[C]): RDD[V] = + def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest] + (rdd2: RDD[B], rdd3: RDD[C]) + (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] = new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3) - def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest]( - f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], - rdd2: RDD[B], - rdd3: RDD[C], - rdd4: RDD[D]): RDD[V] = + def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest] + (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D]) + (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] = new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4) @@ -893,7 +878,7 @@ abstract class RDD[T: ClassManifest]( dependencies.head.rdd.asInstanceOf[RDD[U]] } - /** The [[spark.SparkContext]] that this RDD was created on. */ + /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */ def context = sc // Avoid handling doCheckpoint multiple times to prevent excessive recursion @@ -929,7 +914,7 @@ abstract class RDD[T: ClassManifest]( * Clears the dependencies of this RDD. This method must ensure that all references * to the original parent RDDs is removed to enable the parent RDDs to be garbage * collected. Subclasses of RDD may override this method for implementing their own cleaning - * logic. See [[spark.rdd.UnionRDD]] for an example. + * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example. */ protected def clearDependencies() { dependencies_ = null @@ -950,4 +935,8 @@ abstract class RDD[T: ClassManifest]( id, origin) + def toJavaRDD() : JavaRDD[T] = { + new JavaRDD(this)(elementClassManifest) + } + } diff --git a/core/src/main/scala/spark/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala index b615f820eb..6009a41570 100644 --- a/core/src/main/scala/spark/RDDCheckpointData.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala @@ -15,12 +15,13 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import org.apache.hadoop.fs.Path import org.apache.hadoop.conf.Configuration -import rdd.{CheckpointRDD, CoalescedRDD} -import scheduler.{ResultTask, ShuffleMapTask} + +import org.apache.spark.{Partition, SparkException, Logging} +import org.apache.spark.scheduler.{ResultTask, ShuffleMapTask} /** * Enumeration to manage state transitions of an RDD through checkpointing diff --git a/core/src/main/scala/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala index 574c9b141d..2c5253ae30 100644 --- a/core/src/main/scala/spark/rdd/SampledRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.util.Random import cern.jet.random.Poisson import cern.jet.random.engine.DRand -import spark.{RDD, Partition, TaskContext} +import org.apache.spark.{Partition, TaskContext} private[spark] class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable { diff --git a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala index 9f30b7f22f..5fe4676029 100644 --- a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala @@ -15,40 +15,22 @@ * limitations under the License. */ -package spark - -import java.io.EOFException -import java.net.URL -import java.io.ObjectInputStream -import java.util.concurrent.atomic.AtomicLong -import java.util.HashSet -import java.util.Random -import java.util.Date - -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.Map -import scala.collection.mutable.HashMap +package org.apache.spark.rdd import org.apache.hadoop.mapred.JobConf -import org.apache.hadoop.mapred.OutputFormat -import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapred.SequenceFileOutputFormat -import org.apache.hadoop.mapred.OutputCommitter -import org.apache.hadoop.mapred.FileOutputCommitter import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.io.Writable -import org.apache.hadoop.io.NullWritable -import org.apache.hadoop.io.BytesWritable -import org.apache.hadoop.io.Text -import spark.SparkContext._ +import org.apache.spark.SparkContext._ +import org.apache.spark.Logging /** * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile, * through an implicit conversion. Note that this can't be part of PairRDDFunctions because * we need more implicit parameters to convert our keys and values to Writable. * - * Users should import `spark.SparkContext._` at the top of their program to use these functions. + * Import `org.apache.spark.SparkContext._` at the top of their program to use these functions. */ class SequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable : ClassManifest]( self: RDD[(K, V)]) diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala index 0137f80953..9537152335 100644 --- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala @@ -15,10 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{Partitioner, RDD, SparkEnv, ShuffleDependency, Partition, TaskContext} -import spark.SparkContext._ +import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext} private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { @@ -30,15 +29,24 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { * The resulting RDD from a shuffle (e.g. repartitioning of data). * @param prev the parent RDD. * @param part the partitioner used to partition the RDD - * @param serializerClass class name of the serializer to use. * @tparam K the key class. * @tparam V the value class. */ -class ShuffledRDD[K, V]( - @transient prev: RDD[(K, V)], - part: Partitioner, - serializerClass: String = null) - extends RDD[(K, V)](prev.context, List(new ShuffleDependency(prev, part, serializerClass))) { +class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest]( + @transient var prev: RDD[P], + part: Partitioner) + extends RDD[P](prev.context, Nil) { + + private var serializerClass: String = null + + def setSerializer(cls: String): ShuffledRDD[K, V, P] = { + serializerClass = cls + this + } + + override def getDependencies: Seq[Dependency[_]] = { + List(new ShuffleDependency(prev, part, serializerClass)) + } override val partitioner = Some(part) @@ -46,9 +54,14 @@ class ShuffledRDD[K, V]( Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i)) } - override def compute(split: Partition, context: TaskContext): Iterator[(K, V)] = { + override def compute(split: Partition, context: TaskContext): Iterator[P] = { val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId - SparkEnv.get.shuffleFetcher.fetch[K, V](shuffledId, split.index, context.taskMetrics, + SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context.taskMetrics, SparkEnv.get.serializerManager.get(serializerClass)) } + + override def clearDependencies() { + super.clearDependencies() + prev = null + } } diff --git a/core/src/main/scala/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala index 0402b9f250..8c1a29dfff 100644 --- a/core/src/main/scala/spark/rdd/SubtractedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala @@ -15,19 +15,18 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer -import spark.RDD -import spark.Partitioner -import spark.Dependency -import spark.TaskContext -import spark.Partition -import spark.SparkEnv -import spark.ShuffleDependency -import spark.OneToOneDependency +import org.apache.spark.Partitioner +import org.apache.spark.Dependency +import org.apache.spark.TaskContext +import org.apache.spark.Partition +import org.apache.spark.SparkEnv +import org.apache.spark.ShuffleDependency +import org.apache.spark.OneToOneDependency /** @@ -47,20 +46,26 @@ import spark.OneToOneDependency * out of memory because of the size of `rdd2`. */ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassManifest]( - @transient var rdd1: RDD[(K, V)], - @transient var rdd2: RDD[(K, W)], - part: Partitioner, - val serializerClass: String = null) + @transient var rdd1: RDD[_ <: Product2[K, V]], + @transient var rdd2: RDD[_ <: Product2[K, W]], + part: Partitioner) extends RDD[(K, V)](rdd1.context, Nil) { + private var serializerClass: String = null + + def setSerializer(cls: String): SubtractedRDD[K, V, W] = { + serializerClass = cls + this + } + override def getDependencies: Seq[Dependency[_]] = { Seq(rdd1, rdd2).map { rdd => if (rdd.partitioner == Some(part)) { - logInfo("Adding one-to-one dependency with " + rdd) + logDebug("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { - logInfo("Adding shuffle dependency with " + rdd) - new ShuffleDependency(rdd.asInstanceOf[RDD[(K, Any)]], part, serializerClass) + logDebug("Adding shuffle dependency with " + rdd) + new ShuffleDependency(rdd, part, serializerClass) } } } @@ -97,16 +102,14 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM seq } } - def integrate(dep: CoGroupSplitDep, op: ((K, V)) => Unit) = dep match { + def integrate(dep: CoGroupSplitDep, op: Product2[K, V] => Unit) = dep match { case NarrowCoGroupSplitDep(rdd, _, itsSplit) => { - for (t <- rdd.iterator(itsSplit, context)) - op(t.asInstanceOf[(K, V)]) + rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, V]]].foreach(op) } case ShuffleCoGroupSplitDep(shuffleId) => { - val iter = SparkEnv.get.shuffleFetcher.fetch(shuffleId, partition.index, + val iter = SparkEnv.get.shuffleFetcher.fetch[Product2[K, V]](shuffleId, partition.index, context.taskMetrics, serializer) - for (t <- iter) - op(t.asInstanceOf[(K, V)]) + iter.foreach(op) } } // the first dep is rdd1; add all values to the map diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala index 2776826f18..ae8a9f36a6 100644 --- a/core/src/main/scala/spark/rdd/UnionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import scala.collection.mutable.ArrayBuffer -import spark.{Dependency, RangeDependency, RDD, SparkContext, Partition, TaskContext} +import org.apache.spark.{Dependency, RangeDependency, SparkContext, Partition, TaskContext} import java.io.{ObjectOutputStream, IOException} private[spark] class UnionPartition[T: ClassManifest](idx: Int, rdd: RDD[T], splitIndex: Int) diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala index 6a4fa13ad6..31e6fd519d 100644 --- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext} +import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext} import java.io.{ObjectOutputStream, IOException} private[spark] class ZippedPartitionsPartition( @@ -55,27 +55,15 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest]( } override def getPreferredLocations(s: Partition): Seq[String] = { - // Note that as number of rdd's increase and/or number of slaves in cluster increase, the computed preferredLocations below - // become diminishingly small : so we might need to look at alternate strategies to alleviate this. - // If there are no (or very small number of preferred locations), we will end up transferred the blocks to 'any' node in the - // cluster - paying with n/w and cache cost. - // Maybe pick a node which figures max amount of time ? - // Choose node which is hosting 'larger' of some subset of blocks ? - // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible) - val splits = s.asInstanceOf[ZippedPartitionsPartition].partitions - val rddSplitZip = rdds.zip(splits) - - // exact match. - val exactMatchPreferredLocations = rddSplitZip.map(x => x._1.preferredLocations(x._2)) - val exactMatchLocations = exactMatchPreferredLocations.reduce((x, y) => x.intersect(y)) - - // Remove exact match and then do host local match. - val exactMatchHosts = exactMatchLocations.map(Utils.parseHostPort(_)._1) - val matchPreferredHosts = exactMatchPreferredLocations.map(locs => locs.map(Utils.parseHostPort(_)._1)) - .reduce((x, y) => x.intersect(y)) - val otherNodeLocalLocations = matchPreferredHosts.filter { s => !exactMatchHosts.contains(s) } - - otherNodeLocalLocations ++ exactMatchLocations + val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions + val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) } + // Check whether there are any hosts that match all RDDs; otherwise return the union + val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y)) + if (!exactMatchLocations.isEmpty) { + exactMatchLocations + } else { + prefs.flatten.distinct + } } override def clearDependencies() { diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala index b1c43b3195..567b67dfee 100644 --- a/core/src/main/scala/spark/rdd/ZippedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd -import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext} +import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext} import java.io.{ObjectOutputStream, IOException} @@ -65,27 +65,16 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest]( } override def getPreferredLocations(s: Partition): Seq[String] = { - // Note that as number of slaves in cluster increase, the computed preferredLocations can become small : so we might need - // to look at alternate strategies to alleviate this. (If there are no (or very small number of preferred locations), we - // will end up transferred the blocks to 'any' node in the cluster - paying with n/w and cache cost. - // Maybe pick one or the other ? (so that atleast one block is local ?). - // Choose node which is hosting 'larger' of the blocks ? - // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible) val (partition1, partition2) = s.asInstanceOf[ZippedPartition[T, U]].partitions val pref1 = rdd1.preferredLocations(partition1) val pref2 = rdd2.preferredLocations(partition2) - - // exact match - instance local and host local. + // Check whether there are any hosts that match both RDDs; otherwise return the union val exactMatchLocations = pref1.intersect(pref2) - - // remove locations which are already handled via exactMatchLocations, and intersect where both partitions are node local. - val otherNodeLocalPref1 = pref1.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1) - val otherNodeLocalPref2 = pref2.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1) - val otherNodeLocalLocations = otherNodeLocalPref1.intersect(otherNodeLocalPref2) - - - // Can have mix of instance local (hostPort) and node local (host) locations as preference ! - exactMatchLocations ++ otherNodeLocalLocations + if (!exactMatchLocations.isEmpty) { + exactMatchLocations + } else { + (pref1 ++ pref2).distinct + } } override def clearDependencies() { diff --git a/core/src/main/scala/spark/scheduler/ActiveJob.scala b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala index 71cc94edb6..0b04607d01 100644 --- a/core/src/main/scala/spark/scheduler/ActiveJob.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.TaskContext +import org.apache.spark.TaskContext import java.util.Properties @@ -25,7 +25,7 @@ import java.util.Properties * Tracks information about an active job in the DAGScheduler. */ private[spark] class ActiveJob( - val runId: Int, + val jobId: Int, val finalStage: Stage, val func: (TaskContext, Iterator[_]) => _, val partitions: Array[Int], diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 29e879aa42..92add5b073 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -15,29 +15,40 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import cluster.TaskInfo -import java.util.concurrent.atomic.AtomicInteger -import java.util.concurrent.LinkedBlockingQueue -import java.util.concurrent.TimeUnit +import java.io.NotSerializableException import java.util.Properties +import java.util.concurrent.{LinkedBlockingQueue, TimeUnit} +import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map} -import spark._ -import spark.executor.TaskMetrics -import spark.partial.ApproximateActionListener -import spark.partial.ApproximateEvaluator -import spark.partial.PartialResult -import spark.storage.{BlockManager, BlockManagerMaster} -import spark.util.{MetadataCleaner, TimeStampedHashMap} +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult} +import org.apache.spark.scheduler.cluster.TaskInfo +import org.apache.spark.storage.{BlockManager, BlockManagerMaster} +import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap} /** - * A Scheduler subclass that implements stage-oriented scheduling. It computes a DAG of stages for - * each job, keeps track of which RDDs and stage outputs are materialized, and computes a minimal - * schedule to run the job. Subclasses only need to implement the code to send a task to the cluster - * and to report fetch failures (the submitTasks method, and code to add CompletionEvents). + * The high-level scheduling layer that implements stage-oriented scheduling. It computes a DAG of + * stages for each job, keeps track of which RDDs and stage outputs are materialized, and finds a + * minimal schedule to run the job. It then submits stages as TaskSets to an underlying + * TaskScheduler implementation that runs them on the cluster. + * + * In addition to coming up with a DAG of stages, this class also determines the preferred + * locations to run each task on, based on the current cache status, and passes these to the + * low-level TaskScheduler. Furthermore, it handles failures due to shuffle output files being + * lost, in which case old stages may need to be resubmitted. Failures *within* a stage that are + * not caused by shuffie file loss are handled by the TaskScheduler, which will retry each task + * a small number of times before cancelling the whole stage. + * + * THREADING: This class runs all its logic in a single thread executing the run() method, to which + * events are submitted using a synchonized queue (eventQueue). The public API methods, such as + * runJob, taskEnded and executorLost, post events asynchronously to this queue. All other methods + * should be private. */ private[spark] class DAGScheduler( @@ -52,6 +63,11 @@ class DAGScheduler( } taskSched.setListener(this) + // Called by TaskScheduler to report task's starting. + override def taskStarted(task: Task[_], taskInfo: TaskInfo) { + eventQueue.put(BeginEvent(task, taskInfo)) + } + // Called by TaskScheduler to report task completions or failures. override def taskEnded( task: Task[_], @@ -69,8 +85,8 @@ class DAGScheduler( } // Called by TaskScheduler when a host is added - override def executorGained(execId: String, hostPort: String) { - eventQueue.put(ExecutorGained(execId, hostPort)) + override def executorGained(execId: String, host: String) { + eventQueue.put(ExecutorGained(execId, host)) } // Called by TaskScheduler to cancel an entire TaskSet due to repeated failures. @@ -89,27 +105,28 @@ class DAGScheduler( private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent] - val nextRunId = new AtomicInteger(0) + val nextJobId = new AtomicInteger(0) val nextStageId = new AtomicInteger(0) - val idToStage = new TimeStampedHashMap[Int, Stage] + val stageIdToStage = new TimeStampedHashMap[Int, Stage] val shuffleToMapStage = new TimeStampedHashMap[Int, Stage] private[spark] val stageToInfos = new TimeStampedHashMap[Stage, StageInfo] - private[spark] val sparkListeners = ArrayBuffer[SparkListener]() + private val listenerBus = new SparkListenerBus() - var cacheLocs = new HashMap[Int, Array[List[String]]] + // Contains the locations that each RDD's partitions are cached on + private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]] - // For tracking failed nodes, we use the MapOutputTracker's generation number, which is - // sent with every task. When we detect a node failing, we note the current generation number - // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask - // results. - // TODO: Garbage collect information about failure generations when we know there are no more + // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with + // every task. When we detect a node failing, we note the current epoch number and failed + // executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask results. + // + // TODO: Garbage collect information about failure epochs when we know there are no more // stray messages to detect. - val failedGeneration = new HashMap[String, Long] + val failedEpoch = new HashMap[String, Long] val idToActiveJob = new HashMap[Int, ActiveJob] @@ -134,11 +151,17 @@ class DAGScheduler( }.start() } - private def getCacheLocs(rdd: RDD[_]): Array[List[String]] = { + def addSparkListener(listener: SparkListener) { + listenerBus.addListener(listener) + } + + private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = { if (!cacheLocs.contains(rdd.id)) { val blockIds = rdd.partitions.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray - val locs = BlockManager.blockIdsToExecutorLocations(blockIds, env, blockManagerMaster) - cacheLocs(rdd.id) = blockIds.map(locs.getOrElse(_, Nil)) + val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster) + cacheLocs(rdd.id) = blockIds.map { id => + locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId)) + } } cacheLocs(rdd.id) } @@ -149,14 +172,14 @@ class DAGScheduler( /** * Get or create a shuffle map stage for the given shuffle dependency's map side. - * The priority value passed in will be used if the stage doesn't already exist with - * a lower priority (we assume that priorities always increase across jobs for now). + * The jobId value passed in will be used if the stage doesn't already exist with + * a lower jobId (jobId always increases across jobs.) */ - private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = { + private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], jobId: Int): Stage = { shuffleToMapStage.get(shuffleDep.shuffleId) match { case Some(stage) => stage case None => - val stage = newStage(shuffleDep.rdd, Some(shuffleDep), priority) + val stage = newStage(shuffleDep.rdd, Some(shuffleDep), jobId) shuffleToMapStage(shuffleDep.shuffleId) = stage stage } @@ -164,13 +187,13 @@ class DAGScheduler( /** * Create a Stage for the given RDD, either as a shuffle map stage (for a ShuffleDependency) or - * as a result stage for the final RDD used directly in an action. The stage will also be given - * the provided priority. + * as a result stage for the final RDD used directly in an action. The stage will also be + * associated with the provided jobId. */ private def newStage( rdd: RDD[_], shuffleDep: Option[ShuffleDependency[_,_]], - priority: Int, + jobId: Int, callSite: Option[String] = None) : Stage = { @@ -181,17 +204,17 @@ class DAGScheduler( mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.partitions.size) } val id = nextStageId.getAndIncrement() - val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, priority), priority, callSite) - idToStage(id) = stage + val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, jobId), jobId, callSite) + stageIdToStage(id) = stage stageToInfos(stage) = StageInfo(stage) stage } /** * Get or create the list of parent stages for a given RDD. The stages will be assigned the - * provided priority if they haven't already been created with a lower priority. + * provided jobId if they haven't already been created with a lower jobId. */ - private def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = { + private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = { val parents = new HashSet[Stage] val visited = new HashSet[RDD[_]] def visit(r: RDD[_]) { @@ -202,7 +225,7 @@ class DAGScheduler( for (dep <- r.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - parents += getShuffleMapStage(shufDep, priority) + parents += getShuffleMapStage(shufDep, jobId) case _ => visit(dep.rdd) } @@ -223,7 +246,7 @@ class DAGScheduler( for (dep <- rdd.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - val mapStage = getShuffleMapStage(shufDep, stage.priority) + val mapStage = getShuffleMapStage(shufDep, stage.jobId) if (!mapStage.isAvailable) { missing += mapStage } @@ -258,8 +281,9 @@ class DAGScheduler( assert(partitions.size > 0) val waiter = new JobWaiter(partitions.size, resultHandler) val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _] - val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties) - return (toSubmit, waiter) + val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter, + properties) + (toSubmit, waiter) } def runJob[T, U: ClassManifest]( @@ -283,7 +307,7 @@ class DAGScheduler( "Total number of partitions: " + maxPartitions) } - val (toSubmit, waiter) = prepareJob( + val (toSubmit: JobSubmitted, waiter: JobWaiter[_]) = prepareJob( finalRdd, func, partitions, callSite, allowLocal, resultHandler, properties) eventQueue.put(toSubmit) waiter.awaitResult() match { @@ -306,8 +330,8 @@ class DAGScheduler( val listener = new ApproximateActionListener(rdd, func, evaluator, timeout) val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _] val partitions = (0 until rdd.partitions.size).toArray - eventQueue.put(JobSubmitted(rdd, func2, partitions, false, callSite, listener, properties)) - return listener.awaitResult() // Will throw an exception if the job fails + eventQueue.put(JobSubmitted(rdd, func2, partitions, allowLocal = false, callSite, listener, properties)) + listener.awaitResult() // Will throw an exception if the job fails } /** @@ -317,11 +341,11 @@ class DAGScheduler( private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = { event match { case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener, properties) => - val runId = nextRunId.getAndIncrement() - val finalStage = newStage(finalRDD, None, runId, Some(callSite)) - val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener, properties) + val jobId = nextJobId.getAndIncrement() + val finalStage = newStage(finalRDD, None, jobId, Some(callSite)) + val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties) clearCacheLocs() - logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length + + logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length + " output partitions (allowLocal=" + allowLocal + ")") logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")") logInfo("Parents of final stage: " + finalStage.parents) @@ -330,37 +354,40 @@ class DAGScheduler( // Compute very short actions like first() or take() with no parent stages locally. runLocally(job) } else { - sparkListeners.foreach(_.onJobStart(SparkListenerJobStart(job, properties))) - idToActiveJob(runId) = job + listenerBus.post(SparkListenerJobStart(job, properties)) + idToActiveJob(jobId) = job activeJobs += job resultStageToJob(finalStage) = job submitStage(finalStage) } - case ExecutorGained(execId, hostPort) => - handleExecutorGained(execId, hostPort) + case ExecutorGained(execId, host) => + handleExecutorGained(execId, host) case ExecutorLost(execId) => handleExecutorLost(execId) + case begin: BeginEvent => + listenerBus.post(SparkListenerTaskStart(begin.task, begin.taskInfo)) + case completion: CompletionEvent => - sparkListeners.foreach(_.onTaskEnd(SparkListenerTaskEnd(completion.task, - completion.reason, completion.taskInfo, completion.taskMetrics))) + listenerBus.post(SparkListenerTaskEnd( + completion.task, completion.reason, completion.taskInfo, completion.taskMetrics)) handleTaskCompletion(completion) case TaskSetFailed(taskSet, reason) => - abortStage(idToStage(taskSet.stageId), reason) + abortStage(stageIdToStage(taskSet.stageId), reason) case StopDAGScheduler => // Cancel any active jobs for (job <- activeJobs) { val error = new SparkException("Job cancelled because SparkContext was shut down") job.listener.jobFailed(error) - sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, None)))) + listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, None))) } return true } - return false + false } /** @@ -372,7 +399,7 @@ class DAGScheduler( clearCacheLocs() val failed2 = failed.toArray failed.clear() - for (stage <- failed2.sortBy(_.priority)) { + for (stage <- failed2.sortBy(_.jobId)) { submitStage(stage) } } @@ -390,7 +417,7 @@ class DAGScheduler( logTrace("failed: " + failed) val waiting2 = waiting.toArray waiting.clear() - for (stage <- waiting2.sortBy(_.priority)) { + for (stage <- waiting2.sortBy(_.jobId)) { submitStage(stage) } } @@ -409,23 +436,24 @@ class DAGScheduler( if (event != null) { logDebug("Got event of type " + event.getClass.getName) } - - if (event != null) { - if (processEvent(event)) { - return + this.synchronized { // needed in case other threads makes calls into methods of this class + if (event != null) { + if (processEvent(event)) { + return + } } - } - val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability - // Periodically resubmit failed stages if some map output fetches have failed and we have - // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails, - // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at - // the same time, so we want to make sure we've identified all the reduce tasks that depend - // on the failed node. - if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) { - resubmitFailedStages() - } else { - submitWaitingStages() + val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability + // Periodically resubmit failed stages if some map output fetches have failed and we have + // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails, + // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at + // the same time, so we want to make sure we've identified all the reduce tasks that depend + // on the failed node. + if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) { + resubmitFailedStages() + } else { + submitWaitingStages() + } } } } @@ -437,7 +465,7 @@ class DAGScheduler( */ protected def runLocally(job: ActiveJob) { logInfo("Computing the requested partition locally") - new Thread("Local computation of job " + job.runId) { + new Thread("Local computation of job " + job.jobId) { override def run() { runLocallyWithinThread(job) } @@ -497,20 +525,36 @@ class DAGScheduler( } else { // This is a final stage; figure out its job's missing partitions val job = resultStageToJob(stage) - for (id <- 0 until job.numPartitions if (!job.finished(id))) { + for (id <- 0 until job.numPartitions if !job.finished(id)) { val partition = job.partitions(id) val locs = getPreferredLocs(stage.rdd, partition) tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id) } } + // must be run listener before possible NotSerializableException + // should be "StageSubmitted" first and then "JobEnded" + val properties = idToActiveJob(stage.jobId).properties + listenerBus.post(SparkListenerStageSubmitted(stage, tasks.size, properties)) + if (tasks.size > 0) { - sparkListeners.foreach(_.onStageSubmitted(SparkListenerStageSubmitted(stage, tasks.size))) + // Preemptively serialize a task to make sure it can be serialized. We are catching this + // exception here because it would be fairly hard to catch the non-serializable exception + // down the road, where we have several different implementations for local scheduler and + // cluster schedulers. + try { + SparkEnv.get.closureSerializer.newInstance().serialize(tasks.head) + } catch { + case e: NotSerializableException => + abortStage(stage, e.toString) + running -= stage + return + } + logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")") myPending ++= tasks logDebug("New pending tasks: " + myPending) - val properties = idToActiveJob(stage.priority).properties taskSched.submitTasks( - new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority, properties)) + new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties)) if (!stage.submissionTime.isDefined) { stage.submissionTime = Some(System.currentTimeMillis()) } @@ -527,7 +571,7 @@ class DAGScheduler( */ private def handleTaskCompletion(event: CompletionEvent) { val task = event.task - val stage = idToStage(task.stageId) + val stage = stageIdToStage(task.stageId) def markStageAsFinished(stage: Stage) = { val serviceTime = stage.submissionTime match { @@ -536,8 +580,7 @@ class DAGScheduler( } logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime)) stage.completionTime = Some(System.currentTimeMillis) - val stageComp = StageCompleted(stageToInfos(stage)) - sparkListeners.foreach{_.onStageCompleted(stageComp)} + listenerBus.post(StageCompleted(stageToInfos(stage))) running -= stage } event.reason match { @@ -557,11 +600,11 @@ class DAGScheduler( job.numFinished += 1 // If the whole job has finished, remove it if (job.numFinished == job.numPartitions) { - idToActiveJob -= stage.priority + idToActiveJob -= stage.jobId activeJobs -= job resultStageToJob -= stage markStageAsFinished(stage) - sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobSucceeded))) + listenerBus.post(SparkListenerJobEnd(job, JobSucceeded)) } job.listener.taskSucceeded(rt.outputId, event.result) } @@ -573,7 +616,7 @@ class DAGScheduler( val status = event.result.asInstanceOf[MapStatus] val execId = status.location.executorId logDebug("ShuffleMapTask finished on " + execId) - if (failedGeneration.contains(execId) && smt.generation <= failedGeneration(execId)) { + if (failedEpoch.contains(execId) && smt.epoch <= failedEpoch(execId)) { logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + execId) } else { stage.addOutputLoc(smt.partition, status) @@ -585,16 +628,16 @@ class DAGScheduler( logInfo("waiting: " + waiting) logInfo("failed: " + failed) if (stage.shuffleDep != None) { - // We supply true to increment the generation number here in case this is a + // We supply true to increment the epoch number here in case this is a // recomputation of the map outputs. In that case, some nodes may have cached // locations with holes (from when we detected the error) and will need the - // generation incremented to refetch them. - // TODO: Only increment the generation number if this is not the first time + // epoch incremented to refetch them. + // TODO: Only increment the epoch number if this is not the first time // we registered these map outputs. mapOutputTracker.registerMapOutputs( stage.shuffleDep.get.shuffleId, stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray, - true) + changeEpoch = true) } clearCacheLocs() if (stage.outputLocs.count(_ == Nil) != 0) { @@ -628,7 +671,7 @@ class DAGScheduler( case FetchFailed(bmAddress, shuffleId, mapId, reduceId) => // Mark the stage that the reducer was in as unrunnable - val failedStage = idToStage(task.stageId) + val failedStage = stageIdToStage(task.stageId) running -= failedStage failed += failedStage // TODO: Cancel running tasks in the stage @@ -648,7 +691,7 @@ class DAGScheduler( lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock // TODO: mark the executor as failed only if there were lots of fetch failures on it if (bmAddress != null) { - handleExecutorLost(bmAddress.executorId, Some(task.generation)) + handleExecutorLost(bmAddress.executorId, Some(task.epoch)) } case ExceptionFailure(className, description, stackTrace, metrics) => @@ -656,7 +699,7 @@ class DAGScheduler( case other => // Unrecognized failure - abort all jobs depending on this stage - abortStage(idToStage(task.stageId), task + " failed: " + other) + abortStage(stageIdToStage(task.stageId), task + " failed: " + other) } } @@ -664,36 +707,36 @@ class DAGScheduler( * Responds to an executor being lost. This is called inside the event loop, so it assumes it can * modify the scheduler's internal state. Use executorLost() to post a loss event from outside. * - * Optionally the generation during which the failure was caught can be passed to avoid allowing + * Optionally the epoch during which the failure was caught can be passed to avoid allowing * stray fetch failures from possibly retriggering the detection of a node as lost. */ - private def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) { - val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration) - if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) { - failedGeneration(execId) = currentGeneration - logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration)) + private def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) { + val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch) + if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) { + failedEpoch(execId) = currentEpoch + logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch)) blockManagerMaster.removeExecutor(execId) // TODO: This will be really slow if we keep accumulating shuffle map stages for ((shuffleId, stage) <- shuffleToMapStage) { stage.removeOutputsOnExecutor(execId) val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray - mapOutputTracker.registerMapOutputs(shuffleId, locs, true) + mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true) } if (shuffleToMapStage.isEmpty) { - mapOutputTracker.incrementGeneration() + mapOutputTracker.incrementEpoch() } clearCacheLocs() } else { logDebug("Additional executor lost message for " + execId + - "(generation " + currentGeneration + ")") + "(epoch " + currentEpoch + ")") } } - private def handleExecutorGained(execId: String, hostPort: String) { - // remove from failedGeneration(execId) ? - if (failedGeneration.contains(execId)) { - logInfo("Host gained which was in lost list earlier: " + hostPort) - failedGeneration -= execId + private def handleExecutorGained(execId: String, host: String) { + // remove from failedEpoch(execId) ? + if (failedEpoch.contains(execId)) { + logInfo("Host gained which was in lost list earlier: " + host) + failedEpoch -= execId } } @@ -708,8 +751,8 @@ class DAGScheduler( val job = resultStageToJob(resultStage) val error = new SparkException("Job failed: " + reason) job.listener.jobFailed(error) - sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage))))) - idToActiveJob -= resultStage.priority + listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage)))) + idToActiveJob -= resultStage.jobId activeJobs -= job resultStageToJob -= resultStage } @@ -733,7 +776,7 @@ class DAGScheduler( for (dep <- rdd.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - val mapStage = getShuffleMapStage(shufDep, stage.priority) + val mapStage = getShuffleMapStage(shufDep, stage.jobId) if (!mapStage.isAvailable) { visitedStages += mapStage visit(mapStage.rdd) @@ -748,16 +791,23 @@ class DAGScheduler( visitedRdds.contains(target.rdd) } - private def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = { + /** + * Synchronized method that might be called from other threads. + * @param rdd whose partitions are to be looked at + * @param partition to lookup locality information for + * @return list of machines that are preferred by the partition + */ + private[spark] + def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized { // If the partition is cached, return the cache locations val cached = getCacheLocs(rdd)(partition) - if (cached != Nil) { + if (!cached.isEmpty) { return cached } // If the RDD has some placement preferences (as is the case for input RDDs), get those val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList - if (rddPrefs != Nil) { - return rddPrefs + if (!rddPrefs.isEmpty) { + return rddPrefs.map(host => TaskLocation(host)) } // If the RDD has narrow dependencies, pick the first partition of the first narrow dep // that has any placement preferences. Ideally we would choose based on transfer sizes, @@ -771,13 +821,13 @@ class DAGScheduler( } case _ => }) - return Nil + Nil } private def cleanup(cleanupTime: Long) { - var sizeBefore = idToStage.size - idToStage.clearOldValues(cleanupTime) - logInfo("idToStage " + sizeBefore + " --> " + idToStage.size) + var sizeBefore = stageIdToStage.size + stageIdToStage.clearOldValues(cleanupTime) + logInfo("stageIdToStage " + sizeBefore + " --> " + stageIdToStage.size) sizeBefore = shuffleToMapStage.size shuffleToMapStage.clearOldValues(cleanupTime) diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala index 506c87f65b..0d99670648 100644 --- a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala @@ -15,15 +15,16 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.util.Properties -import spark.scheduler.cluster.TaskInfo +import org.apache.spark.scheduler.cluster.TaskInfo import scala.collection.mutable.Map -import spark._ -import spark.executor.TaskMetrics +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.executor.TaskMetrics /** * Types of events that can be handled by the DAGScheduler. The DAGScheduler uses an event queue @@ -43,6 +44,8 @@ private[spark] case class JobSubmitted( properties: Properties = null) extends DAGSchedulerEvent +private[spark] case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent + private[spark] case class CompletionEvent( task: Task[_], reason: TaskEndReason, @@ -52,9 +55,7 @@ private[spark] case class CompletionEvent( taskMetrics: TaskMetrics) extends DAGSchedulerEvent -private[spark] case class ExecutorGained(execId: String, hostPort: String) extends DAGSchedulerEvent { - Utils.checkHostPort(hostPort, "Required hostport") -} +private[spark] case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent private[spark] case class ExecutorLost(execId: String) extends DAGSchedulerEvent diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala new file mode 100644 index 0000000000..ce0dc9093d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala @@ -0,0 +1,30 @@ +package org.apache.spark.scheduler + +import com.codahale.metrics.{Gauge,MetricRegistry} + +import org.apache.spark.metrics.source.Source + +private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler) extends Source { + val metricRegistry = new MetricRegistry() + val sourceName = "DAGScheduler" + + metricRegistry.register(MetricRegistry.name("stage", "failedStages", "number"), new Gauge[Int] { + override def getValue: Int = dagScheduler.failed.size + }) + + metricRegistry.register(MetricRegistry.name("stage", "runningStages", "number"), new Gauge[Int] { + override def getValue: Int = dagScheduler.running.size + }) + + metricRegistry.register(MetricRegistry.name("stage", "waitingStages", "number"), new Gauge[Int] { + override def getValue: Int = dagScheduler.waiting.size + }) + + metricRegistry.register(MetricRegistry.name("job", "allJobs", "number"), new Gauge[Int] { + override def getValue: Int = dagScheduler.nextJobId.get() + }) + + metricRegistry.register(MetricRegistry.name("job", "activeJobs", "number"), new Gauge[Int] { + override def getValue: Int = dagScheduler.activeJobs.size + }) +} diff --git a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala index 65f8c3200e..370ccd183c 100644 --- a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.Logging +import org.apache.spark.{Logging, SparkEnv} import scala.collection.immutable.Set import org.apache.hadoop.mapred.{FileInputFormat, JobConf} import org.apache.hadoop.security.UserGroupInformation @@ -26,7 +26,6 @@ import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.conf.Configuration import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.collection.JavaConversions._ -import spark.deploy.SparkHadoopUtil /** @@ -88,8 +87,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl // This method does not expect failures, since validate has already passed ... private def prefLocsFromMapreduceInputFormat(): Set[SplitInfo] = { + val env = SparkEnv.get val conf = new JobConf(configuration) - SparkHadoopUtil.addCredentials(conf); + env.hadoop.addCredentials(conf) FileInputFormat.setInputPaths(conf, path) val instance: org.apache.hadoop.mapreduce.InputFormat[_, _] = @@ -108,8 +108,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl // This method does not expect failures, since validate has already passed ... private def prefLocsFromMapredInputFormat(): Set[SplitInfo] = { + val env = SparkEnv.get val jobConf = new JobConf(configuration) - SparkHadoopUtil.addCredentials(jobConf); + env.hadoop.addCredentials(jobConf) FileInputFormat.setInputPaths(jobConf, path) val instance: org.apache.hadoop.mapred.InputFormat[_, _] = diff --git a/core/src/main/scala/spark/scheduler/JobListener.scala b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala index af108b8fec..50c2b9acd6 100644 --- a/core/src/main/scala/spark/scheduler/JobListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler /** * Interface used to listen for job completion or failure events after submitting a job to the diff --git a/core/src/main/scala/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala index 85b5ddd4a8..c8b78bf00a 100644 --- a/core/src/main/scala/spark/scheduler/JobLogger.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala @@ -15,7 +15,7 @@ * limitations under the License.
*/
-package spark.scheduler
+package org.apache.spark.scheduler
import java.io.PrintWriter
import java.io.File
@@ -23,11 +23,14 @@ import java.io.FileNotFoundException import java.text.SimpleDateFormat
import java.util.{Date, Properties}
import java.util.concurrent.LinkedBlockingQueue
+
import scala.collection.mutable.{Map, HashMap, ListBuffer}
import scala.io.Source
-import spark._
-import spark.executor.TaskMetrics
-import spark.scheduler.cluster.TaskInfo
+
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
// Used to record runtime information for each job, including RDD graph
// tasks' start/stop shuffle information and information from outside
@@ -53,29 +56,6 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { def getJobIDToStages = jobIDToStages
def getEventQueue = eventQueue
- new Thread("JobLogger") {
- setDaemon(true)
- override def run() {
- while (true) {
- val event = eventQueue.take
- logDebug("Got event of type " + event.getClass.getName)
- event match {
- case SparkListenerJobStart(job, properties) =>
- processJobStartEvent(job, properties)
- case SparkListenerStageSubmitted(stage, taskSize) =>
- processStageSubmittedEvent(stage, taskSize)
- case StageCompleted(stageInfo) =>
- processStageCompletedEvent(stageInfo)
- case SparkListenerJobEnd(job, result) =>
- processJobEndEvent(job, result)
- case SparkListenerTaskEnd(task, reason, taskInfo, taskMetrics) =>
- processTaskEndEvent(task, reason, taskInfo, taskMetrics)
- case _ =>
- }
- }
- }
- }.start()
-
// Create a folder for log files, the folder's name is the creation time of the jobLogger
protected def createLogDir() {
val dir = new File(logDir + "/" + logDirName + "/")
@@ -123,7 +103,7 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime))
protected def buildJobDep(jobID: Int, stage: Stage) {
- if (stage.priority == jobID) {
+ if (stage.jobId == jobID) {
jobIDToStages.get(jobID) match {
case Some(stageList) => stageList += stage
case None => val stageList = new ListBuffer[Stage]
@@ -199,12 +179,12 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { }else{
stageInfo = "STAGE_ID=" + stage.id + " RESULT_STAGE"
}
- if (stage.priority == jobID) {
+ if (stage.jobId == jobID) {
jobLogInfo(jobID, indentString(indent) + stageInfo, false)
recordRddInStageGraph(jobID, stage.rdd, indent)
stage.parents.foreach(recordStageDepGraph(jobID, _, indent + 2))
} else
- jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.priority, false)
+ jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false)
}
// Record task metrics into job log files
@@ -236,37 +216,32 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { }
override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
- eventQueue.put(stageSubmitted)
- }
-
- protected def processStageSubmittedEvent(stage: Stage, taskSize: Int) {
- stageLogInfo(stage.id, "STAGE_ID=" + stage.id + " STATUS=SUBMITTED" + " TASK_SIZE=" + taskSize)
+ stageLogInfo(
+ stageSubmitted.stage.id,
+ "STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format(
+ stageSubmitted.stage.id, stageSubmitted.taskSize))
}
override def onStageCompleted(stageCompleted: StageCompleted) {
- eventQueue.put(stageCompleted)
- }
-
- protected def processStageCompletedEvent(stageInfo: StageInfo) {
- stageLogInfo(stageInfo.stage.id, "STAGE_ID=" +
- stageInfo.stage.id + " STATUS=COMPLETED")
+ stageLogInfo(
+ stageCompleted.stageInfo.stage.id,
+ "STAGE_ID=%d STATUS=COMPLETED".format(stageCompleted.stageInfo.stage.id))
}
-
- override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
- eventQueue.put(taskEnd)
- }
- protected def processTaskEndEvent(task: Task[_], reason: TaskEndReason,
- taskInfo: TaskInfo, taskMetrics: TaskMetrics) {
+ override def onTaskStart(taskStart: SparkListenerTaskStart) { }
+
+ override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+ val task = taskEnd.task
+ val taskInfo = taskEnd.taskInfo
var taskStatus = ""
task match {
case resultTask: ResultTask[_, _] => taskStatus = "TASK_TYPE=RESULT_TASK"
case shuffleMapTask: ShuffleMapTask => taskStatus = "TASK_TYPE=SHUFFLE_MAP_TASK"
}
- reason match {
+ taskEnd.reason match {
case Success => taskStatus += " STATUS=SUCCESS"
- recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskMetrics)
+ recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskEnd.taskMetrics)
case Resubmitted =>
taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId +
" STAGE_ID=" + task.stageId
@@ -285,39 +260,34 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { }
override def onJobEnd(jobEnd: SparkListenerJobEnd) {
- eventQueue.put(jobEnd)
- }
-
- protected def processJobEndEvent(job: ActiveJob, reason: JobResult) {
- var info = "JOB_ID=" + job.runId
- reason match {
+ val job = jobEnd.job
+ var info = "JOB_ID=" + job.jobId
+ jobEnd.jobResult match {
case JobSucceeded => info += " STATUS=SUCCESS"
case JobFailed(exception, _) =>
info += " STATUS=FAILED REASON="
exception.getMessage.split("\\s+").foreach(info += _ + "_")
case _ =>
}
- jobLogInfo(job.runId, info.substring(0, info.length - 1).toUpperCase)
- closeLogWriter(job.runId)
+ jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase)
+ closeLogWriter(job.jobId)
}
protected def recordJobProperties(jobID: Int, properties: Properties) {
if(properties != null) {
- val annotation = properties.getProperty("spark.job.annotation", "")
- jobLogInfo(jobID, annotation, false)
+ val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "")
+ jobLogInfo(jobID, description, false)
}
}
override def onJobStart(jobStart: SparkListenerJobStart) {
- eventQueue.put(jobStart)
- }
-
- protected def processJobStartEvent(job: ActiveJob, properties: Properties) {
- createLogWriter(job.runId)
- recordJobProperties(job.runId, properties)
- buildJobDep(job.runId, job.finalStage)
- recordStageDep(job.runId)
- recordStageDepGraph(job.runId, job.finalStage)
- jobLogInfo(job.runId, "JOB_ID=" + job.runId + " STATUS=STARTED")
+ val job = jobStart.job
+ val properties = jobStart.properties
+ createLogWriter(job.jobId)
+ recordJobProperties(job.jobId, properties)
+ buildJobDep(job.jobId, job.finalStage)
+ recordStageDep(job.jobId)
+ recordStageDepGraph(job.jobId, job.finalStage)
+ jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED")
}
}
diff --git a/core/src/main/scala/spark/scheduler/JobResult.scala b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala index a61b335152..c381348a8d 100644 --- a/core/src/main/scala/spark/scheduler/JobResult.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler /** * A result of a job in the DAGScheduler. diff --git a/core/src/main/scala/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala index 69cd161c1f..200d881799 100644 --- a/core/src/main/scala/spark/scheduler/JobWaiter.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import scala.collection.mutable.ArrayBuffer diff --git a/core/src/main/scala/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala index 2f6a68ee85..1c61687f28 100644 --- a/core/src/main/scala/spark/scheduler/MapStatus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.storage.BlockManagerId +import org.apache.spark.storage.BlockManagerId import java.io.{ObjectOutput, ObjectInput, Externalizable} /** diff --git a/core/src/main/scala/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala index 361b1e6b91..2b007cbe82 100644 --- a/core/src/main/scala/spark/scheduler/ResultTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala @@ -15,13 +15,16 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark._ import java.io._ -import util.{MetadataCleaner, TimeStampedHashMap} import java.util.zip.{GZIPInputStream, GZIPOutputStream} +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.RDDCheckpointData +import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap} + private[spark] object ResultTask { // A simple map between the stage id to the serialized byte array of a task. @@ -51,15 +54,13 @@ private[spark] object ResultTask { } def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], (TaskContext, Iterator[_]) => _) = { - synchronized { - val loader = Thread.currentThread.getContextClassLoader - val in = new GZIPInputStream(new ByteArrayInputStream(bytes)) - val ser = SparkEnv.get.closureSerializer.newInstance - val objIn = ser.deserializeStream(in) - val rdd = objIn.readObject().asInstanceOf[RDD[_]] - val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _] - return (rdd, func) - } + val loader = Thread.currentThread.getContextClassLoader + val in = new GZIPInputStream(new ByteArrayInputStream(bytes)) + val ser = SparkEnv.get.closureSerializer.newInstance + val objIn = ser.deserializeStream(in) + val rdd = objIn.readObject().asInstanceOf[RDD[_]] + val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _] + return (rdd, func) } def clearCache() { @@ -75,7 +76,7 @@ private[spark] class ResultTask[T, U]( var rdd: RDD[T], var func: (TaskContext, Iterator[T]) => U, var partition: Int, - @transient locs: Seq[String], + @transient locs: Seq[TaskLocation], val outputId: Int) extends Task[U](stageId) with Externalizable { @@ -87,11 +88,8 @@ private[spark] class ResultTask[T, U]( rdd.partitions(partition) } - private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq - - { - // DEBUG code - preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs)) + @transient private val preferredLocs: Seq[TaskLocation] = { + if (locs == null) Nil else locs.toSet.toSeq } override def run(attemptId: Long): U = { @@ -104,7 +102,7 @@ private[spark] class ResultTask[T, U]( } } - override def preferredLocations: Seq[String] = preferredLocs + override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString = "ResultTask(" + stageId + ", " + partition + ")" @@ -118,6 +116,7 @@ private[spark] class ResultTask[T, U]( out.write(bytes) out.writeInt(partition) out.writeInt(outputId) + out.writeLong(epoch) out.writeObject(split) } } @@ -132,6 +131,7 @@ private[spark] class ResultTask[T, U]( func = func_.asInstanceOf[(TaskContext, Iterator[T]) => U] partition = in.readInt() val outputId = in.readInt() + epoch = in.readLong() split = in.readObject().asInstanceOf[Partition] } } diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala index 1c25605f75..764775fede 100644 --- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala @@ -15,24 +15,19 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.io._ -import java.util.{HashMap => JHashMap} import java.util.zip.{GZIPInputStream, GZIPOutputStream} -import scala.collection.mutable.{ArrayBuffer, HashMap} -import scala.collection.JavaConversions._ +import scala.collection.mutable.HashMap -import it.unimi.dsi.fastutil.io.FastBufferedOutputStream - -import com.ning.compress.lzf.LZFInputStream -import com.ning.compress.lzf.LZFOutputStream - -import spark._ -import spark.executor.ShuffleWriteMetrics -import spark.storage._ -import spark.util.{TimeStampedHashMap, MetadataCleaner} +import org.apache.spark._ +import org.apache.spark.executor.ShuffleWriteMetrics +import org.apache.spark.storage._ +import org.apache.spark.util.{TimeStampedHashMap, MetadataCleaner} +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.RDDCheckpointData private[spark] object ShuffleMapTask { @@ -95,25 +90,18 @@ private[spark] class ShuffleMapTask( var rdd: RDD[_], var dep: ShuffleDependency[_,_], var partition: Int, - @transient private var locs: Seq[String]) + @transient private var locs: Seq[TaskLocation]) extends Task[MapStatus](stageId) with Externalizable with Logging { protected def this() = this(0, null, null, 0, null) - @transient private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq - - { - // DEBUG code - preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs)) + @transient private val preferredLocs: Seq[TaskLocation] = { + if (locs == null) Nil else locs.toSet.toSeq } - var split = if (rdd == null) { - null - } else { - rdd.partitions(partition) - } + var split = if (rdd == null) null else rdd.partitions(partition) override def writeExternal(out: ObjectOutput) { RDDCheckpointData.synchronized { @@ -123,7 +111,7 @@ private[spark] class ShuffleMapTask( out.writeInt(bytes.length) out.write(bytes) out.writeInt(partition) - out.writeLong(generation) + out.writeLong(epoch) out.writeObject(split) } } @@ -137,7 +125,7 @@ private[spark] class ShuffleMapTask( rdd = rdd_ dep = dep_ partition = in.readInt() - generation = in.readLong() + epoch = in.readLong() split = in.readObject().asInstanceOf[Partition] } @@ -159,7 +147,7 @@ private[spark] class ShuffleMapTask( // Write the map output to its associated buckets. for (elem <- rdd.iterator(split, taskContext)) { - val pair = elem.asInstanceOf[(Any, Any)] + val pair = elem.asInstanceOf[Product2[Any, Any]] val bucketId = dep.partitioner.getPartition(pair._1) buckets.writers(bucketId).write(pair) } @@ -197,7 +185,7 @@ private[spark] class ShuffleMapTask( } } - override def preferredLocations: Seq[String] = preferredLocs + override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partition) } diff --git a/core/src/main/scala/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala index 4fb1c5d42d..c3cf4b8907 100644 --- a/core/src/main/scala/spark/scheduler/SparkListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala @@ -15,27 +15,30 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.util.Properties -import spark.scheduler.cluster.TaskInfo -import spark.util.Distribution -import spark.{Logging, SparkContext, TaskEndReason, Utils} -import spark.executor.TaskMetrics +import org.apache.spark.scheduler.cluster.TaskInfo +import org.apache.spark.util.{Utils, Distribution} +import org.apache.spark.{Logging, SparkContext, TaskEndReason} +import org.apache.spark.executor.TaskMetrics sealed trait SparkListenerEvents -case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int) extends SparkListenerEvents +case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int, properties: Properties) + extends SparkListenerEvents case class StageCompleted(val stageInfo: StageInfo) extends SparkListenerEvents +case class SparkListenerTaskStart(task: Task[_], taskInfo: TaskInfo) extends SparkListenerEvents + case class SparkListenerTaskEnd(task: Task[_], reason: TaskEndReason, taskInfo: TaskInfo, taskMetrics: TaskMetrics) extends SparkListenerEvents -case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null) +case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null) extends SparkListenerEvents -case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult) +case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult) extends SparkListenerEvents trait SparkListener { @@ -43,12 +46,17 @@ trait SparkListener { * Called when a stage is completed, with information on the completed stage */ def onStageCompleted(stageCompleted: StageCompleted) { } - + /** * Called when a stage is submitted */ def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { } - + + /** + * Called when a task starts + */ + def onTaskStart(taskEnd: SparkListenerTaskStart) { } + /** * Called when a task ends */ @@ -58,12 +66,12 @@ trait SparkListener { * Called when a job starts */ def onJobStart(jobStart: SparkListenerJobStart) { } - + /** * Called when a job ends */ def onJobEnd(jobEnd: SparkListenerJobEnd) { } - + } /** @@ -71,7 +79,7 @@ trait SparkListener { */ class StatsReportListener extends SparkListener with Logging { override def onStageCompleted(stageCompleted: StageCompleted) { - import spark.scheduler.StatsReportListener._ + import org.apache.spark.scheduler.StatsReportListener._ implicit val sc = stageCompleted this.logInfo("Finished stage: " + stageCompleted.stageInfo) showMillisDistribution("task runtime:", (info, _) => Some(info.duration)) @@ -145,7 +153,7 @@ object StatsReportListener extends Logging { } def showBytesDistribution(heading: String, dist: Distribution) { - showDistribution(heading, dist, (d => Utils.memoryBytesToString(d.toLong)): Double => String) + showDistribution(heading, dist, (d => Utils.bytesToString(d.toLong)): Double => String) } def showMillisDistribution(heading: String, dOpt: Option[Distribution]) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala new file mode 100644 index 0000000000..a65e1ecd6d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala @@ -0,0 +1,74 @@ +/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import org.apache.spark.Logging
+
+/** Asynchronously passes SparkListenerEvents to registered SparkListeners. */
+private[spark] class SparkListenerBus() extends Logging {
+ private val sparkListeners = new ArrayBuffer[SparkListener]() with SynchronizedBuffer[SparkListener]
+
+ /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
+ * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
+ private val EVENT_QUEUE_CAPACITY = 10000
+ private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents](EVENT_QUEUE_CAPACITY)
+ private var queueFullErrorMessageLogged = false
+
+ new Thread("SparkListenerBus") {
+ setDaemon(true)
+ override def run() {
+ while (true) {
+ val event = eventQueue.take
+ event match {
+ case stageSubmitted: SparkListenerStageSubmitted =>
+ sparkListeners.foreach(_.onStageSubmitted(stageSubmitted))
+ case stageCompleted: StageCompleted =>
+ sparkListeners.foreach(_.onStageCompleted(stageCompleted))
+ case jobStart: SparkListenerJobStart =>
+ sparkListeners.foreach(_.onJobStart(jobStart))
+ case jobEnd: SparkListenerJobEnd =>
+ sparkListeners.foreach(_.onJobEnd(jobEnd))
+ case taskStart: SparkListenerTaskStart =>
+ sparkListeners.foreach(_.onTaskStart(taskStart))
+ case taskEnd: SparkListenerTaskEnd =>
+ sparkListeners.foreach(_.onTaskEnd(taskEnd))
+ case _ =>
+ }
+ }
+ }
+ }.start()
+
+ def addListener(listener: SparkListener) {
+ sparkListeners += listener
+ }
+
+ def post(event: SparkListenerEvents) {
+ val eventAdded = eventQueue.offer(event)
+ if (!eventAdded && !queueFullErrorMessageLogged) {
+ logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
+ "This likely means one of the SparkListeners is too slow and cannot keep up with the " +
+ "rate at which tasks are being started by the scheduler.")
+ queueFullErrorMessageLogged = true
+ }
+ }
+}
+
diff --git a/core/src/main/scala/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala index 4e3661ec5d..5b40a3eb29 100644 --- a/core/src/main/scala/spark/scheduler/SplitInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import collection.mutable.ArrayBuffer diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala index 5428daeb94..aa293dc6b3 100644 --- a/core/src/main/scala/spark/scheduler/Stage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala @@ -15,12 +15,11 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import java.net.URI - -import spark._ -import spark.storage.BlockManagerId +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.BlockManagerId /** * A stage is a set of independent tasks all computing the same function that need to run as part @@ -33,15 +32,16 @@ import spark.storage.BlockManagerId * initiated a job (e.g. count(), save(), etc). For shuffle map stages, we also track the nodes * that each output partition is on. * - * Each Stage also has a priority, which is (by default) based on the job it was submitted in. - * This allows Stages from earlier jobs to be computed first or recovered faster on failure. + * Each Stage also has a jobId, identifying the job that first submitted the stage. When FIFO + * scheduling is used, this allows Stages from earlier jobs to be computed first or recovered + * faster on failure. */ private[spark] class Stage( val id: Int, val rdd: RDD[_], val shuffleDep: Option[ShuffleDependency[_,_]], // Output shuffle if stage is a map stage val parents: List[Stage], - val priority: Int, + val jobId: Int, callSite: Option[String]) extends Logging { diff --git a/core/src/main/scala/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala index c4026f995a..72cb1c9ce8 100644 --- a/core/src/main/scala/spark/scheduler/StageInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.scheduler.cluster.TaskInfo +import org.apache.spark.scheduler.cluster.TaskInfo import scala.collection._ -import spark.executor.TaskMetrics +import org.apache.spark.executor.TaskMetrics case class StageInfo( val stage: Stage, diff --git a/core/src/main/scala/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala index 50768d43e0..598d91752a 100644 --- a/core/src/main/scala/spark/scheduler/Task.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala @@ -15,24 +15,24 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.serializer.SerializerInstance +import org.apache.spark.serializer.SerializerInstance import java.io.{DataInputStream, DataOutputStream} import java.nio.ByteBuffer import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream -import spark.util.ByteBufferInputStream +import org.apache.spark.util.ByteBufferInputStream import scala.collection.mutable.HashMap -import spark.executor.TaskMetrics +import org.apache.spark.executor.TaskMetrics /** * A task to execute on a worker node. */ private[spark] abstract class Task[T](val stageId: Int) extends Serializable { def run(attemptId: Long): T - def preferredLocations: Seq[String] = Nil + def preferredLocations: Seq[TaskLocation] = Nil - var generation: Long = -1 // Map output tracker generation. Will be set by TaskScheduler. + var epoch: Long = -1 // Map output tracker epoch. Will be set by TaskScheduler. var metrics: Option[TaskMetrics] = None diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala new file mode 100644 index 0000000000..67c9a6760b --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +/** + * A location where a task should run. This can either be a host or a (host, executorID) pair. + * In the latter case, we will prefer to launch the task on that executorID, but our next level + * of preference will be executors on the same host if this is not possible. + */ +private[spark] +class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable { + override def toString: String = "TaskLocation(" + host + ", " + executorId + ")" +} + +private[spark] object TaskLocation { + def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId)) + + def apply(host: String) = new TaskLocation(host, None) +} diff --git a/core/src/main/scala/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala index dc0621ea7b..5c7e5bb977 100644 --- a/core/src/main/scala/spark/scheduler/TaskResult.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala @@ -15,22 +15,33 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.io._ import scala.collection.mutable.Map -import spark.executor.TaskMetrics +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.{SparkEnv} +import java.nio.ByteBuffer +import org.apache.spark.util.Utils // Task result. Also contains updates to accumulator variables. // TODO: Use of distributed cache to return result is a hack to get around // what seems to be a bug with messages over 60KB in libprocess; fix it private[spark] -class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) extends Externalizable { +class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) + extends Externalizable +{ def this() = this(null.asInstanceOf[T], null, null) override def writeExternal(out: ObjectOutput) { - out.writeObject(value) + + val objectSer = SparkEnv.get.serializer.newInstance() + val bb = objectSer.serialize(value) + + out.writeInt(bb.remaining()) + Utils.writeByteBuffer(bb, out) + out.writeInt(accumUpdates.size) for ((key, value) <- accumUpdates) { out.writeLong(key) @@ -40,7 +51,14 @@ class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: } override def readExternal(in: ObjectInput) { - value = in.readObject().asInstanceOf[T] + + val objectSer = SparkEnv.get.serializer.newInstance() + + val blen = in.readInt() + val byteVal = new Array[Byte](blen) + in.readFully(byteVal) + value = objectSer.deserialize(ByteBuffer.wrap(byteVal)) + val numUpdates = in.readInt if (numUpdates == 0) { accumUpdates = null diff --git a/core/src/main/scala/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala index 5188308006..63be8ba3f5 100644 --- a/core/src/main/scala/spark/scheduler/TaskScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala @@ -15,8 +15,10 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler +import org.apache.spark.scheduler.cluster.Pool +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode /** * Low-level task scheduler interface, implemented by both ClusterScheduler and LocalScheduler. * These schedulers get sets of tasks submitted to them from the DAGScheduler for each stage, @@ -25,6 +27,11 @@ package spark.scheduler * the TaskSchedulerListener interface. */ private[spark] trait TaskScheduler { + + def rootPool: Pool + + def schedulingMode: SchedulingMode + def start(): Unit // Invoked after system has successfully initialized (typically in spark context). diff --git a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala index 245e7ccb52..83be051c1a 100644 --- a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala @@ -15,24 +15,27 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler -import spark.scheduler.cluster.TaskInfo +import org.apache.spark.scheduler.cluster.TaskInfo import scala.collection.mutable.Map -import spark.TaskEndReason -import spark.executor.TaskMetrics +import org.apache.spark.TaskEndReason +import org.apache.spark.executor.TaskMetrics /** * Interface for getting events back from the TaskScheduler. */ private[spark] trait TaskSchedulerListener { + // A task has started. + def taskStarted(task: Task[_], taskInfo: TaskInfo) + // A task has finished or failed. def taskEnded(task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Map[Long, Any], taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit // A node was added to the cluster. - def executorGained(execId: String, hostPort: String): Unit + def executorGained(execId: String, host: String): Unit // A node was lost from the cluster. def executorLost(execId: String): Unit diff --git a/core/src/main/scala/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala index dc3550dd0b..c3ad325156 100644 --- a/core/src/main/scala/spark/scheduler/TaskSet.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.util.Properties diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala new file mode 100644 index 0000000000..3196ab5022 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + +import java.lang.{Boolean => JBoolean} + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.HashMap +import scala.collection.mutable.HashSet + +import org.apache.spark._ +import org.apache.spark.TaskState.TaskState +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode +import java.nio.ByteBuffer +import java.util.concurrent.atomic.AtomicLong +import java.util.{TimerTask, Timer} + +/** + * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call + * initialize() and start(), then submit task sets through the runTasks method. + * + * This class can work with multiple types of clusters by acting through a SchedulerBackend. + * It handles common logic, like determining a scheduling order across jobs, waking up to launch + * speculative tasks, etc. + * + * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple + * threads, so it needs locks in public API methods to maintain its state. In addition, some + * SchedulerBackends sycnchronize on themselves when they want to send events here, and then + * acquire a lock on us, so we need to make sure that we don't try to lock the backend while + * we are holding a lock on ourselves. + */ +private[spark] class ClusterScheduler(val sc: SparkContext) + extends TaskScheduler + with Logging +{ + // How often to check for speculative tasks + val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong + + // Threshold above which we warn user initial TaskSet may be starved + val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong + + val activeTaskSets = new HashMap[String, TaskSetManager] + + val taskIdToTaskSetId = new HashMap[Long, String] + val taskIdToExecutorId = new HashMap[Long, String] + val taskSetTaskIds = new HashMap[String, HashSet[Long]] + + @volatile private var hasReceivedTask = false + @volatile private var hasLaunchedTask = false + private val starvationTimer = new Timer(true) + + // Incrementing Mesos task IDs + val nextTaskId = new AtomicLong(0) + + // Which executor IDs we have executors on + val activeExecutorIds = new HashSet[String] + + // The set of executors we have on each host; this is used to compute hostsAlive, which + // in turn is used to decide when we can attain data locality on a given host + private val executorsByHost = new HashMap[String, HashSet[String]] + + private val executorIdToHost = new HashMap[String, String] + + // JAR server, if any JARs were added by the user to the SparkContext + var jarServer: HttpServer = null + + // URIs of JARs to pass to executor + var jarUris: String = "" + + // Listener object to pass upcalls into + var listener: TaskSchedulerListener = null + + var backend: SchedulerBackend = null + + val mapOutputTracker = SparkEnv.get.mapOutputTracker + + var schedulableBuilder: SchedulableBuilder = null + var rootPool: Pool = null + // default scheduler is FIFO + val schedulingMode: SchedulingMode = SchedulingMode.withName( + System.getProperty("spark.cluster.schedulingmode", "FIFO")) + + override def setListener(listener: TaskSchedulerListener) { + this.listener = listener + } + + def initialize(context: SchedulerBackend) { + backend = context + // temporarily set rootPool name to empty + rootPool = new Pool("", schedulingMode, 0, 0) + schedulableBuilder = { + schedulingMode match { + case SchedulingMode.FIFO => + new FIFOSchedulableBuilder(rootPool) + case SchedulingMode.FAIR => + new FairSchedulableBuilder(rootPool) + } + } + schedulableBuilder.buildPools() + } + + def newTaskId(): Long = nextTaskId.getAndIncrement() + + override def start() { + backend.start() + + if (System.getProperty("spark.speculation", "false").toBoolean) { + new Thread("ClusterScheduler speculation check") { + setDaemon(true) + + override def run() { + logInfo("Starting speculative execution thread") + while (true) { + try { + Thread.sleep(SPECULATION_INTERVAL) + } catch { + case e: InterruptedException => {} + } + checkSpeculatableTasks() + } + } + }.start() + } + } + + override def submitTasks(taskSet: TaskSet) { + val tasks = taskSet.tasks + logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks") + this.synchronized { + val manager = new ClusterTaskSetManager(this, taskSet) + activeTaskSets(taskSet.id) = manager + schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties) + taskSetTaskIds(taskSet.id) = new HashSet[Long]() + + if (!hasReceivedTask) { + starvationTimer.scheduleAtFixedRate(new TimerTask() { + override def run() { + if (!hasLaunchedTask) { + logWarning("Initial job has not accepted any resources; " + + "check your cluster UI to ensure that workers are registered " + + "and have sufficient memory") + } else { + this.cancel() + } + } + }, STARVATION_TIMEOUT, STARVATION_TIMEOUT) + } + hasReceivedTask = true + } + backend.reviveOffers() + } + + def taskSetFinished(manager: TaskSetManager) { + this.synchronized { + activeTaskSets -= manager.taskSet.id + manager.parent.removeSchedulable(manager) + logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name)) + taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id) + taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id) + taskSetTaskIds.remove(manager.taskSet.id) + } + } + + /** + * Called by cluster manager to offer resources on slaves. We respond by asking our active task + * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so + * that tasks are balanced across the cluster. + */ + def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized { + SparkEnv.set(sc.env) + + // Mark each slave as alive and remember its hostname + for (o <- offers) { + executorIdToHost(o.executorId) = o.host + if (!executorsByHost.contains(o.host)) { + executorsByHost(o.host) = new HashSet[String]() + executorGained(o.executorId, o.host) + } + } + + // Build a list of tasks to assign to each worker + val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores)) + val availableCpus = offers.map(o => o.cores).toArray + val sortedTaskSets = rootPool.getSortedTaskSetQueue() + for (taskSet <- sortedTaskSets) { + logDebug("parentName: %s, name: %s, runningTasks: %s".format( + taskSet.parent.name, taskSet.name, taskSet.runningTasks)) + } + + // Take each TaskSet in our scheduling order, and then offer it each node in increasing order + // of locality levels so that it gets a chance to launch local tasks on all of them. + var launchedTask = false + for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) { + do { + launchedTask = false + for (i <- 0 until offers.size) { + val execId = offers(i).executorId + val host = offers(i).host + for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) { + tasks(i) += task + val tid = task.taskId + taskIdToTaskSetId(tid) = taskSet.taskSet.id + taskSetTaskIds(taskSet.taskSet.id) += tid + taskIdToExecutorId(tid) = execId + activeExecutorIds += execId + executorsByHost(host) += execId + availableCpus(i) -= 1 + launchedTask = true + } + } + } while (launchedTask) + } + + if (tasks.size > 0) { + hasLaunchedTask = true + } + return tasks + } + + def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { + var taskSetToUpdate: Option[TaskSetManager] = None + var failedExecutor: Option[String] = None + var taskFailed = false + synchronized { + try { + if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) { + // We lost this entire executor, so remember that it's gone + val execId = taskIdToExecutorId(tid) + if (activeExecutorIds.contains(execId)) { + removeExecutor(execId) + failedExecutor = Some(execId) + } + } + taskIdToTaskSetId.get(tid) match { + case Some(taskSetId) => + if (activeTaskSets.contains(taskSetId)) { + taskSetToUpdate = Some(activeTaskSets(taskSetId)) + } + if (TaskState.isFinished(state)) { + taskIdToTaskSetId.remove(tid) + if (taskSetTaskIds.contains(taskSetId)) { + taskSetTaskIds(taskSetId) -= tid + } + taskIdToExecutorId.remove(tid) + } + if (state == TaskState.FAILED) { + taskFailed = true + } + case None => + logInfo("Ignoring update from TID " + tid + " because its task set is gone") + } + } catch { + case e: Exception => logError("Exception in statusUpdate", e) + } + } + // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock + if (taskSetToUpdate != None) { + taskSetToUpdate.get.statusUpdate(tid, state, serializedData) + } + if (failedExecutor != None) { + listener.executorLost(failedExecutor.get) + backend.reviveOffers() + } + if (taskFailed) { + // Also revive offers if a task had failed for some reason other than host lost + backend.reviveOffers() + } + } + + def error(message: String) { + synchronized { + if (activeTaskSets.size > 0) { + // Have each task set throw a SparkException with the error + for ((taskSetId, manager) <- activeTaskSets) { + try { + manager.error(message) + } catch { + case e: Exception => logError("Exception in error callback", e) + } + } + } else { + // No task sets are active but we still got an error. Just exit since this + // must mean the error is during registration. + // It might be good to do something smarter here in the future. + logError("Exiting due to error from cluster scheduler: " + message) + System.exit(1) + } + } + } + + override def stop() { + if (backend != null) { + backend.stop() + } + if (jarServer != null) { + jarServer.stop() + } + + // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out. + // TODO: Do something better ! + Thread.sleep(5000L) + } + + override def defaultParallelism() = backend.defaultParallelism() + + + // Check for speculatable tasks in all our active jobs. + def checkSpeculatableTasks() { + var shouldRevive = false + synchronized { + shouldRevive = rootPool.checkSpeculatableTasks() + } + if (shouldRevive) { + backend.reviveOffers() + } + } + + // Check for pending tasks in all our active jobs. + def hasPendingTasks: Boolean = { + synchronized { + rootPool.hasPendingTasks() + } + } + + def executorLost(executorId: String, reason: ExecutorLossReason) { + var failedExecutor: Option[String] = None + + synchronized { + if (activeExecutorIds.contains(executorId)) { + val hostPort = executorIdToHost(executorId) + logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason)) + removeExecutor(executorId) + failedExecutor = Some(executorId) + } else { + // We may get multiple executorLost() calls with different loss reasons. For example, one + // may be triggered by a dropped connection from the slave while another may be a report + // of executor termination from Mesos. We produce log messages for both so we eventually + // report the termination reason. + logError("Lost an executor " + executorId + " (already removed): " + reason) + } + } + // Call listener.executorLost without holding the lock on this to prevent deadlock + if (failedExecutor != None) { + listener.executorLost(failedExecutor.get) + backend.reviveOffers() + } + } + + /** Remove an executor from all our data structures and mark it as lost */ + private def removeExecutor(executorId: String) { + activeExecutorIds -= executorId + val host = executorIdToHost(executorId) + val execs = executorsByHost.getOrElse(host, new HashSet) + execs -= executorId + if (execs.isEmpty) { + executorsByHost -= host + } + executorIdToHost -= executorId + rootPool.executorLost(executorId, host) + } + + def executorGained(execId: String, host: String) { + listener.executorGained(execId, host) + } + + def getExecutorsAliveOnHost(host: String): Option[Set[String]] = synchronized { + executorsByHost.get(host).map(_.toSet) + } + + def hasExecutorsAliveOnHost(host: String): Boolean = synchronized { + executorsByHost.contains(host) + } + + def isExecutorAlive(execId: String): Boolean = synchronized { + activeExecutorIds.contains(execId) + } + + // By default, rack is unknown + def getRackForHost(value: String): Option[String] = None +} + + +object ClusterScheduler { + /** + * Used to balance containers across hosts. + * + * Accepts a map of hosts to resource offers for that host, and returns a prioritized list of + * resource offers representing the order in which the offers should be used. The resource + * offers are ordered such that we'll allocate one container on each host before allocating a + * second container on any host, and so on, in order to reduce the damage if a host fails. + * + * For example, given <h1, [o1, o2, o3]>, <h2, [o4]>, <h1, [o5, o6]>, returns + * [o1, o5, o4, 02, o6, o3] + */ + def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = { + val _keyList = new ArrayBuffer[K](map.size) + _keyList ++= map.keys + + // order keyList based on population of value in map + val keyList = _keyList.sortWith( + (left, right) => map(left).size > map(right).size + ) + + val retval = new ArrayBuffer[T](keyList.size * 2) + var index = 0 + var found = true + + while (found) { + found = false + for (key <- keyList) { + val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null) + assert(containerList != null) + // Get the index'th entry for this host - if present + if (index < containerList.size){ + retval += containerList.apply(index) + found = true + } + } + index += 1 + } + + retval.toList + } +} diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala new file mode 100644 index 0000000000..1b31c8c57e --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -0,0 +1,712 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + +import java.nio.ByteBuffer +import java.util.{Arrays, NoSuchElementException} + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.HashMap +import scala.collection.mutable.HashSet +import scala.math.max +import scala.math.min + +import org.apache.spark.{FetchFailed, Logging, Resubmitted, SparkEnv, Success, TaskEndReason, TaskState} +import org.apache.spark.{ExceptionFailure, SparkException, TaskResultTooBigFailure} +import org.apache.spark.TaskState.TaskState +import org.apache.spark.scheduler._ +import scala.Some +import org.apache.spark.FetchFailed +import org.apache.spark.ExceptionFailure +import org.apache.spark.TaskResultTooBigFailure +import org.apache.spark.util.{SystemClock, Clock} + + +/** + * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of + * the status of each task, retries tasks if they fail (up to a limited number of times), and + * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces + * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, + * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished). + * + * THREADING: This class is designed to only be called from code with a lock on the + * ClusterScheduler (e.g. its event handlers). It should not be called from other threads. + */ +private[spark] class ClusterTaskSetManager( + sched: ClusterScheduler, + val taskSet: TaskSet, + clock: Clock = SystemClock) + extends TaskSetManager + with Logging +{ + // CPUs to request per task + val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt + + // Maximum times a task is allowed to fail before failing the job + val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt + + // Quantile of tasks at which to start speculation + val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble + val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble + + // Serializer for closures and tasks. + val env = SparkEnv.get + val ser = env.closureSerializer.newInstance() + + val tasks = taskSet.tasks + val numTasks = tasks.length + val copiesRunning = new Array[Int](numTasks) + val finished = new Array[Boolean](numTasks) + val numFailures = new Array[Int](numTasks) + val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil) + var tasksFinished = 0 + + var weight = 1 + var minShare = 0 + var runningTasks = 0 + var priority = taskSet.priority + var stageId = taskSet.stageId + var name = "TaskSet_"+taskSet.stageId.toString + var parent: Schedulable = null + + // Set of pending tasks for each executor. These collections are actually + // treated as stacks, in which new tasks are added to the end of the + // ArrayBuffer and removed from the end. This makes it faster to detect + // tasks that repeatedly fail because whenever a task failed, it is put + // back at the head of the stack. They are also only cleaned up lazily; + // when a task is launched, it remains in all the pending lists except + // the one that it was launched from, but gets removed from them later. + private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]] + + // Set of pending tasks for each host. Similar to pendingTasksForExecutor, + // but at host level. + private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]] + + // Set of pending tasks for each rack -- similar to the above. + private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]] + + // Set containing pending tasks with no locality preferences. + val pendingTasksWithNoPrefs = new ArrayBuffer[Int] + + // Set containing all pending tasks (also used as a stack, as above). + val allPendingTasks = new ArrayBuffer[Int] + + // Tasks that can be speculated. Since these will be a small fraction of total + // tasks, we'll just hold them in a HashSet. + val speculatableTasks = new HashSet[Int] + + // Task index, start and finish time for each task attempt (indexed by task ID) + val taskInfos = new HashMap[Long, TaskInfo] + + // Did the TaskSet fail? + var failed = false + var causeOfFailure = "" + + // How frequently to reprint duplicate exceptions in full, in milliseconds + val EXCEPTION_PRINT_INTERVAL = + System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong + + // Map of recent exceptions (identified by string representation and top stack frame) to + // duplicate count (how many times the same exception has appeared) and time the full exception + // was printed. This should ideally be an LRU map that can drop old exceptions automatically. + val recentExceptions = HashMap[String, (Int, Long)]() + + // Figure out the current map output tracker epoch and set it on all tasks + val epoch = sched.mapOutputTracker.getEpoch + logDebug("Epoch for " + taskSet + ": " + epoch) + for (t <- tasks) { + t.epoch = epoch + } + + // Add all our tasks to the pending lists. We do this in reverse order + // of task index so that tasks with low indices get launched first. + for (i <- (0 until numTasks).reverse) { + addPendingTask(i) + } + + // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling + val myLocalityLevels = computeValidLocalityLevels() + val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level + + // Delay scheduling variables: we keep track of our current locality level and the time we + // last launched a task at that level, and move up a level when localityWaits[curLevel] expires. + // We then move down if we manage to launch a "more local" task. + var currentLocalityIndex = 0 // Index of our current locality level in validLocalityLevels + var lastLaunchTime = clock.getTime() // Time we last launched a task at this level + + /** + * Add a task to all the pending-task lists that it should be on. If readding is set, we are + * re-adding the task so only include it in each list if it's not already there. + */ + private def addPendingTask(index: Int, readding: Boolean = false) { + // Utility method that adds `index` to a list only if readding=false or it's not already there + def addTo(list: ArrayBuffer[Int]) { + if (!readding || !list.contains(index)) { + list += index + } + } + + var hadAliveLocations = false + for (loc <- tasks(index).preferredLocations) { + for (execId <- loc.executorId) { + if (sched.isExecutorAlive(execId)) { + addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer)) + hadAliveLocations = true + } + } + if (sched.hasExecutorsAliveOnHost(loc.host)) { + addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer)) + for (rack <- sched.getRackForHost(loc.host)) { + addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer)) + } + hadAliveLocations = true + } + } + + if (!hadAliveLocations) { + // Even though the task might've had preferred locations, all of those hosts or executors + // are dead; put it in the no-prefs list so we can schedule it elsewhere right away. + addTo(pendingTasksWithNoPrefs) + } + + if (!readding) { + allPendingTasks += index // No point scanning this whole list to find the old task there + } + } + + /** + * Return the pending tasks list for a given executor ID, or an empty list if + * there is no map entry for that host + */ + private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = { + pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer()) + } + + /** + * Return the pending tasks list for a given host, or an empty list if + * there is no map entry for that host + */ + private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { + pendingTasksForHost.getOrElse(host, ArrayBuffer()) + } + + /** + * Return the pending rack-local task list for a given rack, or an empty list if + * there is no map entry for that rack + */ + private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = { + pendingTasksForRack.getOrElse(rack, ArrayBuffer()) + } + + /** + * Dequeue a pending task from the given list and return its index. + * Return None if the list is empty. + * This method also cleans up any tasks in the list that have already + * been launched, since we want that to happen lazily. + */ + private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = { + while (!list.isEmpty) { + val index = list.last + list.trimEnd(1) + if (copiesRunning(index) == 0 && !finished(index)) { + return Some(index) + } + } + return None + } + + /** Check whether a task is currently running an attempt on a given host */ + private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = { + !taskAttempts(taskIndex).exists(_.host == host) + } + + /** + * Return a speculative task for a given executor if any are available. The task should not have + * an attempt running on this host, in case the host is slow. In addition, the task should meet + * the given locality constraint. + */ + private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value) + : Option[(Int, TaskLocality.Value)] = + { + speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set + + if (!speculatableTasks.isEmpty) { + // Check for process-local or preference-less tasks; note that tasks can be process-local + // on multiple nodes when we replicate cached blocks, as in Spark Streaming + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val prefs = tasks(index).preferredLocations + val executors = prefs.flatMap(_.executorId) + if (prefs.size == 0 || executors.contains(execId)) { + speculatableTasks -= index + return Some((index, TaskLocality.PROCESS_LOCAL)) + } + } + + // Check for node-local tasks + if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) { + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val locations = tasks(index).preferredLocations.map(_.host) + if (locations.contains(host)) { + speculatableTasks -= index + return Some((index, TaskLocality.NODE_LOCAL)) + } + } + } + + // Check for rack-local tasks + if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { + for (rack <- sched.getRackForHost(host)) { + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost) + if (racks.contains(rack)) { + speculatableTasks -= index + return Some((index, TaskLocality.RACK_LOCAL)) + } + } + } + } + + // Check for non-local tasks + if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + speculatableTasks -= index + return Some((index, TaskLocality.ANY)) + } + } + } + + return None + } + + /** + * Dequeue a pending task for a given node and return its index and locality level. + * Only search for tasks matching the given locality constraint. + */ + private def findTask(execId: String, host: String, locality: TaskLocality.Value) + : Option[(Int, TaskLocality.Value)] = + { + for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) { + return Some((index, TaskLocality.PROCESS_LOCAL)) + } + + if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) { + for (index <- findTaskFromList(getPendingTasksForHost(host))) { + return Some((index, TaskLocality.NODE_LOCAL)) + } + } + + if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { + for { + rack <- sched.getRackForHost(host) + index <- findTaskFromList(getPendingTasksForRack(rack)) + } { + return Some((index, TaskLocality.RACK_LOCAL)) + } + } + + // Look for no-pref tasks after rack-local tasks since they can run anywhere. + for (index <- findTaskFromList(pendingTasksWithNoPrefs)) { + return Some((index, TaskLocality.PROCESS_LOCAL)) + } + + if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { + for (index <- findTaskFromList(allPendingTasks)) { + return Some((index, TaskLocality.ANY)) + } + } + + // Finally, if all else has failed, find a speculative task + return findSpeculativeTask(execId, host, locality) + } + + /** + * Respond to an offer of a single slave from the scheduler by finding a task + */ + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) + : Option[TaskDescription] = + { + if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { + val curTime = clock.getTime() + + var allowedLocality = getAllowedLocalityLevel(curTime) + if (allowedLocality > maxLocality) { + allowedLocality = maxLocality // We're not allowed to search for farther-away tasks + } + + findTask(execId, host, allowedLocality) match { + case Some((index, taskLocality)) => { + // Found a task; do some bookkeeping and return a task description + val task = tasks(index) + val taskId = sched.newTaskId() + // Figure out whether this should count as a preferred launch + logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format( + taskSet.id, index, taskId, execId, host, taskLocality)) + // Do various bookkeeping + copiesRunning(index) += 1 + val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality) + taskInfos(taskId) = info + taskAttempts(index) = info :: taskAttempts(index) + // Update our locality level for delay scheduling + currentLocalityIndex = getLocalityIndex(taskLocality) + lastLaunchTime = curTime + // Serialize and return the task + val startTime = clock.getTime() + // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here + // we assume the task can be serialized without exceptions. + val serializedTask = Task.serializeWithDependencies( + task, sched.sc.addedFiles, sched.sc.addedJars, ser) + val timeTaken = clock.getTime() - startTime + increaseRunningTasks(1) + logInfo("Serialized task %s:%d as %d bytes in %d ms".format( + taskSet.id, index, serializedTask.limit, timeTaken)) + val taskName = "task %s:%d".format(taskSet.id, index) + if (taskAttempts(index).size == 1) + taskStarted(task,info) + return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask)) + } + case _ => + } + } + return None + } + + /** + * Get the level we can launch tasks according to delay scheduling, based on current wait time. + */ + private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = { + while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) && + currentLocalityIndex < myLocalityLevels.length - 1) + { + // Jump to the next locality level, and remove our waiting time for the current one since + // we don't want to count it again on the next one + lastLaunchTime += localityWaits(currentLocalityIndex) + currentLocalityIndex += 1 + } + myLocalityLevels(currentLocalityIndex) + } + + /** + * Find the index in myLocalityLevels for a given locality. This is also designed to work with + * localities that are not in myLocalityLevels (in case we somehow get those) by returning the + * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY. + */ + def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = { + var index = 0 + while (locality > myLocalityLevels(index)) { + index += 1 + } + index + } + + /** Called by cluster scheduler when one of our tasks changes state */ + override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { + SparkEnv.set(env) + state match { + case TaskState.FINISHED => + taskFinished(tid, state, serializedData) + case TaskState.LOST => + taskLost(tid, state, serializedData) + case TaskState.FAILED => + taskLost(tid, state, serializedData) + case TaskState.KILLED => + taskLost(tid, state, serializedData) + case _ => + } + } + + def taskStarted(task: Task[_], info: TaskInfo) { + sched.listener.taskStarted(task, info) + } + + def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) { + val info = taskInfos(tid) + if (info.failed) { + // We might get two task-lost messages for the same task in coarse-grained Mesos mode, + // or even from Mesos itself when acks get delayed. + return + } + val index = info.index + info.markSuccessful() + decreaseRunningTasks(1) + if (!finished(index)) { + tasksFinished += 1 + logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format( + tid, info.duration, info.host, tasksFinished, numTasks)) + // Deserialize task result and pass it to the scheduler + try { + val result = ser.deserialize[TaskResult[_]](serializedData) + result.metrics.resultSize = serializedData.limit() + sched.listener.taskEnded( + tasks(index), Success, result.value, result.accumUpdates, info, result.metrics) + } catch { + case cnf: ClassNotFoundException => + val loader = Thread.currentThread().getContextClassLoader + throw new SparkException("ClassNotFound with classloader: " + loader, cnf) + case ex => throw ex + } + // Mark finished and stop if we've finished all the tasks + finished(index) = true + if (tasksFinished == numTasks) { + sched.taskSetFinished(this) + } + } else { + logInfo("Ignoring task-finished event for TID " + tid + + " because task " + index + " is already finished") + } + } + + def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) { + val info = taskInfos(tid) + if (info.failed) { + // We might get two task-lost messages for the same task in coarse-grained Mesos mode, + // or even from Mesos itself when acks get delayed. + return + } + val index = info.index + info.markFailed() + decreaseRunningTasks(1) + if (!finished(index)) { + logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index)) + copiesRunning(index) -= 1 + // Check if the problem is a map output fetch failure. In that case, this + // task will never succeed on any node, so tell the scheduler about it. + if (serializedData != null && serializedData.limit() > 0) { + val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader) + reason match { + case fetchFailed: FetchFailed => + logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress) + sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null) + finished(index) = true + tasksFinished += 1 + sched.taskSetFinished(this) + decreaseRunningTasks(runningTasks) + return + + case taskResultTooBig: TaskResultTooBigFailure => + logInfo("Loss was due to task %s result exceeding Akka frame size; aborting job".format( + tid)) + abort("Task %s result exceeded Akka frame size".format(tid)) + return + + case ef: ExceptionFailure => + sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null)) + val key = ef.description + val now = clock.getTime() + val (printFull, dupCount) = { + if (recentExceptions.contains(key)) { + val (dupCount, printTime) = recentExceptions(key) + if (now - printTime > EXCEPTION_PRINT_INTERVAL) { + recentExceptions(key) = (0, now) + (true, 0) + } else { + recentExceptions(key) = (dupCount + 1, printTime) + (false, dupCount + 1) + } + } else { + recentExceptions(key) = (0, now) + (true, 0) + } + } + if (printFull) { + val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString)) + logInfo("Loss was due to %s\n%s\n%s".format( + ef.className, ef.description, locs.mkString("\n"))) + } else { + logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount)) + } + + case _ => {} + } + } + // On non-fetch failures, re-enqueue the task as pending for a max number of retries + addPendingTask(index) + // Count failed attempts only on FAILED and LOST state (not on KILLED) + if (state == TaskState.FAILED || state == TaskState.LOST) { + numFailures(index) += 1 + if (numFailures(index) > MAX_TASK_FAILURES) { + logError("Task %s:%d failed more than %d times; aborting job".format( + taskSet.id, index, MAX_TASK_FAILURES)) + abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES)) + } + } + } else { + logInfo("Ignoring task-lost event for TID " + tid + + " because task " + index + " is already finished") + } + } + + override def error(message: String) { + // Save the error message + abort("Error: " + message) + } + + def abort(message: String) { + failed = true + causeOfFailure = message + // TODO: Kill running tasks if we were not terminated due to a Mesos error + sched.listener.taskSetFailed(taskSet, message) + decreaseRunningTasks(runningTasks) + sched.taskSetFinished(this) + } + + override def increaseRunningTasks(taskNum: Int) { + runningTasks += taskNum + if (parent != null) { + parent.increaseRunningTasks(taskNum) + } + } + + override def decreaseRunningTasks(taskNum: Int) { + runningTasks -= taskNum + if (parent != null) { + parent.decreaseRunningTasks(taskNum) + } + } + + override def getSchedulableByName(name: String): Schedulable = { + return null + } + + override def addSchedulable(schedulable: Schedulable) {} + + override def removeSchedulable(schedulable: Schedulable) {} + + override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { + var sortedTaskSetQueue = ArrayBuffer[TaskSetManager](this) + sortedTaskSetQueue += this + return sortedTaskSetQueue + } + + /** Called by cluster scheduler when an executor is lost so we can re-enqueue our tasks */ + override def executorLost(execId: String, host: String) { + logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id) + + // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a + // task that used to have locations on only this host might now go to the no-prefs list. Note + // that it's okay if we add a task to the same queue twice (if it had multiple preferred + // locations), because findTaskFromList will skip already-running tasks. + for (index <- getPendingTasksForExecutor(execId)) { + addPendingTask(index, readding=true) + } + for (index <- getPendingTasksForHost(host)) { + addPendingTask(index, readding=true) + } + + // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage + if (tasks(0).isInstanceOf[ShuffleMapTask]) { + for ((tid, info) <- taskInfos if info.executorId == execId) { + val index = taskInfos(tid).index + if (finished(index)) { + finished(index) = false + copiesRunning(index) -= 1 + tasksFinished -= 1 + addPendingTask(index) + // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our + // stage finishes when a total of tasks.size tasks finish. + sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null) + } + } + } + // Also re-enqueue any tasks that were running on the node + for ((tid, info) <- taskInfos if info.running && info.executorId == execId) { + taskLost(tid, TaskState.KILLED, null) + } + } + + /** + * Check for tasks to be speculated and return true if there are any. This is called periodically + * by the ClusterScheduler. + * + * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that + * we don't scan the whole task set. It might also help to make this sorted by launch time. + */ + override def checkSpeculatableTasks(): Boolean = { + // Can't speculate if we only have one task, or if all tasks have finished. + if (numTasks == 1 || tasksFinished == numTasks) { + return false + } + var foundTasks = false + val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt + logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation) + if (tasksFinished >= minFinishedForSpeculation) { + val time = clock.getTime() + val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray + Arrays.sort(durations) + val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1)) + val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100) + // TODO: Threshold should also look at standard deviation of task durations and have a lower + // bound based on that. + logDebug("Task length threshold for speculation: " + threshold) + for ((tid, info) <- taskInfos) { + val index = info.index + if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold && + !speculatableTasks.contains(index)) { + logInfo( + "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format( + taskSet.id, index, info.host, threshold)) + speculatableTasks += index + foundTasks = true + } + } + } + return foundTasks + } + + override def hasPendingTasks(): Boolean = { + numTasks > 0 && tasksFinished < numTasks + } + + private def getLocalityWait(level: TaskLocality.TaskLocality): Long = { + val defaultWait = System.getProperty("spark.locality.wait", "3000") + level match { + case TaskLocality.PROCESS_LOCAL => + System.getProperty("spark.locality.wait.process", defaultWait).toLong + case TaskLocality.NODE_LOCAL => + System.getProperty("spark.locality.wait.node", defaultWait).toLong + case TaskLocality.RACK_LOCAL => + System.getProperty("spark.locality.wait.rack", defaultWait).toLong + case TaskLocality.ANY => + 0L + } + } + + /** + * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been + * added to queues using addPendingTask. + */ + private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = { + import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY} + val levels = new ArrayBuffer[TaskLocality.TaskLocality] + if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) { + levels += PROCESS_LOCAL + } + if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) { + levels += NODE_LOCAL + } + if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) { + levels += RACK_LOCAL + } + levels += ANY + logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", ")) + levels.toArray + } +} diff --git a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala index 8825f2dd24..5077b2b48b 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import spark.executor.ExecutorExitCode +import org.apache.spark.executor.ExecutorExitCode /** * Represents an explanation for a executor or whole slave failing or exiting. diff --git a/core/src/main/scala/spark/scheduler/cluster/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala index 83708f07e1..35b32600da 100644 --- a/core/src/main/scala/spark/scheduler/cluster/Pool.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap -import spark.Logging -import spark.scheduler.cluster.SchedulingMode.SchedulingMode +import org.apache.spark.Logging +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode /** * An Schedulable entity that represent collection of Pools or TaskSetManagers diff --git a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala index f557b142c4..f4726450ec 100644 --- a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala @@ -15,16 +15,20 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import scala.collection.mutable.ArrayBuffer +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode +import scala.collection.mutable.ArrayBuffer /** * An interface for schedulable entities. * there are two type of Schedulable entities(Pools and TaskSetManagers) */ private[spark] trait Schedulable { var parent: Schedulable + // child queues + def schedulableQueue: ArrayBuffer[Schedulable] + def schedulingMode: SchedulingMode def weight: Int def minShare: Int def runningTasks: Int diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala index 95554023c0..d04eeb6b98 100644 --- a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala @@ -15,21 +15,16 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import java.io.{File, FileInputStream, FileOutputStream} +import java.io.{File, FileInputStream, FileOutputStream, FileNotFoundException} +import java.util.Properties -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashMap -import scala.collection.mutable.HashSet -import scala.util.control.Breaks._ -import scala.xml._ +import scala.xml.XML -import spark.Logging -import spark.scheduler.cluster.SchedulingMode.SchedulingMode +import org.apache.spark.Logging +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode -import java.util.Properties /** * An interface to build Schedulable tree @@ -41,10 +36,11 @@ private[spark] trait SchedulableBuilder { def addTaskSetManager(manager: Schedulable, properties: Properties) } -private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging { +private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) + extends SchedulableBuilder with Logging { override def buildPools() { - //nothing + // nothing } override def addTaskSetManager(manager: Schedulable, properties: Properties) { @@ -52,9 +48,10 @@ private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends Schedula } } -private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging { +private[spark] class FairSchedulableBuilder(val rootPool: Pool) + extends SchedulableBuilder with Logging { - val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file","unspecified") + val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file") val FAIR_SCHEDULER_PROPERTIES = "spark.scheduler.cluster.fair.pool" val DEFAULT_POOL_NAME = "default" val MINIMUM_SHARES_PROPERTY = "minShare" @@ -67,47 +64,53 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula val DEFAULT_WEIGHT = 1 override def buildPools() { + if (schedulerAllocFile != null) { val file = new File(schedulerAllocFile) - if (file.exists()) { - val xml = XML.loadFile(file) - for (poolNode <- (xml \\ POOLS_PROPERTY)) { - - val poolName = (poolNode \ POOL_NAME_PROPERTY).text - var schedulingMode = DEFAULT_SCHEDULING_MODE - var minShare = DEFAULT_MINIMUM_SHARE - var weight = DEFAULT_WEIGHT - - val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text - if (xmlSchedulingMode != "") { - try { - schedulingMode = SchedulingMode.withName(xmlSchedulingMode) - } catch { - case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode") + if (file.exists()) { + val xml = XML.loadFile(file) + for (poolNode <- (xml \\ POOLS_PROPERTY)) { + + val poolName = (poolNode \ POOL_NAME_PROPERTY).text + var schedulingMode = DEFAULT_SCHEDULING_MODE + var minShare = DEFAULT_MINIMUM_SHARE + var weight = DEFAULT_WEIGHT + + val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text + if (xmlSchedulingMode != "") { + try { + schedulingMode = SchedulingMode.withName(xmlSchedulingMode) + } catch { + case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode") + } } - } - val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text - if (xmlMinShare != "") { - minShare = xmlMinShare.toInt - } + val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text + if (xmlMinShare != "") { + minShare = xmlMinShare.toInt + } - val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text - if (xmlWeight != "") { - weight = xmlWeight.toInt - } + val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text + if (xmlWeight != "") { + weight = xmlWeight.toInt + } - val pool = new Pool(poolName, schedulingMode, minShare, weight) - rootPool.addSchedulable(pool) - logInfo("Create new pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format( - poolName, schedulingMode, minShare, weight)) + val pool = new Pool(poolName, schedulingMode, minShare, weight) + rootPool.addSchedulable(pool) + logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format( + poolName, schedulingMode, minShare, weight)) + } + } else { + throw new java.io.FileNotFoundException( + "Fair scheduler allocation file not found: " + schedulerAllocFile) } } - //finally create "default" pool + // finally create "default" pool if (rootPool.getSchedulableByName(DEFAULT_POOL_NAME) == null) { - val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT) + val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, + DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT) rootPool.addSchedulable(pool) - logInfo("Create default pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format( + logInfo("Created default pool %s, schedulingMode: %s, minShare: %d, weight: %d".format( DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)) } } @@ -119,10 +122,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula poolName = properties.getProperty(FAIR_SCHEDULER_PROPERTIES, DEFAULT_POOL_NAME) parentPool = rootPool.getSchedulableByName(poolName) if (parentPool == null) { - //we will create a new pool that user has configured in app instead of being defined in xml file - parentPool = new Pool(poolName,DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT) + // we will create a new pool that user has configured in app + // instead of being defined in xml file + parentPool = new Pool(poolName, DEFAULT_SCHEDULING_MODE, + DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT) rootPool.addSchedulable(parentPool) - logInfo("Create pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format( + logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format( poolName, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)) } } diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala index 4431744ec3..d57eb3276f 100644 --- a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import spark.{SparkContext, Utils} +import org.apache.spark.{SparkContext} /** * A backend interface for cluster scheduling systems that allows plugging in different ones under diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala index 69e0ac2a6b..cbeed4731a 100644 --- a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster /** * An interface for sort algorithm diff --git a/core/src/main/scala/spark/SoftReferenceCache.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala index f41a379582..34811389a0 100644 --- a/core/src/main/scala/spark/SoftReferenceCache.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala @@ -15,21 +15,15 @@ * limitations under the License. */ -package spark - -import com.google.common.collect.MapMaker +package org.apache.spark.scheduler.cluster /** - * An implementation of Cache that uses soft references. + * "FAIR" and "FIFO" determines which policy is used + * to order tasks amongst a Schedulable's sub-queues + * "NONE" is used when the a Schedulable has no sub-queues. */ -private[spark] class SoftReferenceCache extends Cache { - val map = new MapMaker().softValues().makeMap[Any, Any]() - - override def get(datasetId: Any, partition: Int): Any = - map.get((datasetId, partition)) +object SchedulingMode extends Enumeration("FAIR", "FIFO", "NONE") { - override def put(datasetId: Any, partition: Int, value: Any): CachePutResponse = { - map.put((datasetId, partition), value) - return CachePutSuccess(0) - } + type SchedulingMode = Value + val FAIR,FIFO,NONE = Value } diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala index 55d6c0a47e..d003bf1bba 100644 --- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala @@ -15,12 +15,13 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import spark.{Utils, Logging, SparkContext} -import spark.deploy.client.{Client, ClientListener} -import spark.deploy.{Command, ApplicationDescription} +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.deploy.client.{Client, ClientListener} +import org.apache.spark.deploy.{Command, ApplicationDescription} import scala.collection.mutable.HashMap +import org.apache.spark.util.Utils private[spark] class SparkDeploySchedulerBackend( scheduler: ClusterScheduler, @@ -45,9 +46,9 @@ private[spark] class SparkDeploySchedulerBackend( System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"), StandaloneSchedulerBackend.ACTOR_NAME) val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}") - val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs) - val sparkHome = sc.getSparkHome().getOrElse( - throw new IllegalArgumentException("must supply spark home for spark standalone")) + val command = Command( + "org.apache.spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs) + val sparkHome = sc.getSparkHome().getOrElse(null) val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome, sc.ui.appUIAddress) @@ -77,7 +78,7 @@ private[spark] class SparkDeploySchedulerBackend( override def executorAdded(executorId: String, workerId: String, hostPort: String, cores: Int, memory: Int) { logInfo("Granted executor ID %s on hostPort %s with %d cores, %s RAM".format( - executorId, hostPort, cores, Utils.memoryMegabytesToString(memory))) + executorId, hostPort, cores, Utils.megabytesToString(memory))) } override def executorRemoved(executorId: String, message: String, exitStatus: Option[Int]) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala new file mode 100644 index 0000000000..9c36d221f6 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + +import java.nio.ByteBuffer + +import org.apache.spark.TaskState.TaskState +import org.apache.spark.util.{Utils, SerializableBuffer} + + +private[spark] sealed trait StandaloneClusterMessage extends Serializable + +private[spark] object StandaloneClusterMessages { + + // Driver to executors + case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage + + case class RegisteredExecutor(sparkProperties: Seq[(String, String)]) + extends StandaloneClusterMessage + + case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage + + // Executors to driver + case class RegisterExecutor(executorId: String, hostPort: String, cores: Int) + extends StandaloneClusterMessage { + Utils.checkHostPort(hostPort, "Expected host port") + } + + case class StatusUpdate(executorId: String, taskId: Long, state: TaskState, + data: SerializableBuffer) extends StandaloneClusterMessage + + object StatusUpdate { + /** Alternate factory method that takes a ByteBuffer directly for the data field */ + def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer) + : StatusUpdate = { + StatusUpdate(executorId, taskId, state, new SerializableBuffer(data)) + } + } + + // Internal messages in driver + case object ReviveOffers extends StandaloneClusterMessage + + case object StopDriver extends StandaloneClusterMessage + + case class RemoveExecutor(executorId: String, reason: String) extends StandaloneClusterMessage + +} diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index 03a64e0192..b4ea0be415 100644 --- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -15,19 +15,22 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster + +import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import akka.actor._ -import akka.util.duration._ +import akka.dispatch.Await import akka.pattern.ask +import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent} import akka.util.Duration +import akka.util.duration._ -import spark.{Utils, SparkException, Logging, TaskState} -import akka.dispatch.Await -import java.util.concurrent.atomic.AtomicInteger -import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent} +import org.apache.spark.{SparkException, Logging, TaskState} +import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._ +import org.apache.spark.util.Utils /** * A standalone scheduler backend, which waits for standalone executors to connect to it through @@ -36,15 +39,15 @@ import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClient */ private[spark] class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem) - extends SchedulerBackend with Logging { - + extends SchedulerBackend with Logging +{ // Use an atomic variable to track total number of cores in the cluster for simplicity and speed var totalCoreCount = new AtomicInteger(0) class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor { private val executorActor = new HashMap[String, ActorRef] private val executorAddress = new HashMap[String, Address] - private val executorHostPort = new HashMap[String, String] + private val executorHost = new HashMap[String, String] private val freeCores = new HashMap[String, Int] private val actorToExecutorId = new HashMap[ActorRef, String] private val addressToExecutorId = new HashMap[Address, String] @@ -52,6 +55,10 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor override def preStart() { // Listen for remote client disconnection events, since they don't go through Akka's watch() context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent]) + + // Periodically revive offers to allow delay scheduling to work + val reviveInterval = System.getProperty("spark.scheduler.revive.interval", "1000").toLong + context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers) } def receive = { @@ -64,7 +71,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor sender ! RegisteredExecutor(sparkProperties) context.watch(sender) executorActor(executorId) = sender - executorHostPort(executorId) = hostPort + executorHost(executorId) = Utils.parseHostPort(hostPort)._1 freeCores(executorId) = cores executorAddress(executorId) = sender.path.address actorToExecutorId(sender) = executorId @@ -104,13 +111,13 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor // Make fake resource offers on all executors def makeOffers() { launchTasks(scheduler.resourceOffers( - executorHostPort.toArray.map {case (id, hostPort) => new WorkerOffer(id, hostPort, freeCores(id))})) + executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))})) } // Make fake resource offers on just one executor def makeOffers(executorId: String) { launchTasks(scheduler.resourceOffers( - Seq(new WorkerOffer(executorId, executorHostPort(executorId), freeCores(executorId))))) + Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId))))) } // Launch tasks returned by a set of resource offers @@ -129,9 +136,8 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor actorToExecutorId -= executorActor(executorId) addressToExecutorId -= executorAddress(executorId) executorActor -= executorId - executorHostPort -= executorId + executorHost -= executorId freeCores -= executorId - executorHostPort -= executorId totalCoreCount.addAndGet(-numCores) scheduler.executorLost(executorId, SlaveLost(reason)) } diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala index 761fdf6919..309ac2f6c9 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala @@ -15,15 +15,16 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster import java.nio.ByteBuffer -import spark.util.SerializableBuffer +import org.apache.spark.util.SerializableBuffer private[spark] class TaskDescription( val taskId: Long, val executorId: String, val name: String, + val index: Int, // Index within this task's TaskSet _serializedTask: ByteBuffer) extends Serializable { @@ -31,4 +32,6 @@ private[spark] class TaskDescription( private val buffer = new SerializableBuffer(_serializedTask) def serializedTask: ByteBuffer = buffer.value + + override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index) } diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala index a1ebd48b01..9685fb1a67 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import spark.Utils +import org.apache.spark.util.Utils /** * Information about a running task attempt inside a TaskSet. @@ -28,11 +28,9 @@ class TaskInfo( val index: Int, val launchTime: Long, val executorId: String, - val hostPort: String, + val host: String, val taskLocality: TaskLocality.TaskLocality) { - Utils.checkHostPort(hostPort, "Expected hostport") - var finishTime: Long = 0 var failed = false @@ -51,6 +49,17 @@ class TaskInfo( def running: Boolean = !finished + def status: String = { + if (running) + "RUNNING" + else if (failed) + "FAILED" + else if (successful) + "SUCCESS" + else + "UNKNOWN" + } + def duration: Long = { if (!finished) { throw new UnsupportedOperationException("duration() called on unfinished tasks") diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala new file mode 100644 index 0000000000..5d4130e14a --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + + +private[spark] object TaskLocality + extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") +{ + // process local is expected to be used ONLY within tasksetmanager for now. + val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value + + type TaskLocality = Value + + def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = { + condition <= constraint + } +} diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala index 07c3ddcc7e..648a3ef922 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala @@ -15,20 +15,37 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import scala.collection.mutable.ArrayBuffer -import spark.scheduler._ -import spark.TaskState.TaskState import java.nio.ByteBuffer +import org.apache.spark.TaskState.TaskState +import org.apache.spark.scheduler.TaskSet + +/** + * Tracks and schedules the tasks within a single TaskSet. This class keeps track of the status of + * each task and is responsible for retries on failure and locality. The main interfaces to it + * are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, and + * statusUpdate, which tells it that one of its tasks changed state (e.g. finished). + * + * THREADING: This class is designed to only be called from code with a lock on the TaskScheduler + * (e.g. its event handlers). It should not be called from other threads. + */ private[spark] trait TaskSetManager extends Schedulable { + def schedulableQueue = null + + def schedulingMode = SchedulingMode.NONE + def taskSet: TaskSet - def slaveOffer(execId: String, hostPort: String, availableCpus: Double, - overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] - def numPendingTasksForHostPort(hostPort: String): Int - def numRackLocalPendingTasksForHost(hostPort :String): Int - def numPendingTasksForHost(hostPort: String): Int + + def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) + : Option[TaskDescription] + def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) + def error(message: String) } diff --git a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala index 06d1203f70..938f62883a 100644 --- a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala @@ -15,11 +15,10 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster /** * Represents free resources available on an executor. */ private[spark] -class WorkerOffer(val executorId: String, val hostPort: String, val cores: Int) { -} +class WorkerOffer(val executorId: String, val host: String, val cores: Int) diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala index 1f73cb99a7..e8fa5e2f17 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala @@ -15,21 +15,26 @@ * limitations under the License. */ -package spark.scheduler.local +package org.apache.spark.scheduler.local import java.io.File +import java.lang.management.ManagementFactory import java.util.concurrent.atomic.AtomicInteger import java.nio.ByteBuffer + +import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet -import spark._ -import spark.TaskState.TaskState -import spark.executor.ExecutorURLClassLoader -import spark.scheduler._ -import spark.scheduler.cluster._ +import org.apache.spark._ +import org.apache.spark.TaskState.TaskState +import org.apache.spark.executor.ExecutorURLClassLoader +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster._ +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode import akka.actor._ +import org.apache.spark.util.Utils /** * A FIFO or Fair TaskScheduler implementation that runs tasks locally in a thread pool. Optionally @@ -37,10 +42,15 @@ import akka.actor._ * testing fault recovery. */ -private[spark] case class LocalReviveOffers() -private[spark] case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) +private[spark] +case class LocalReviveOffers() + +private[spark] +case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) + +private[spark] +class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging { -private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging { def receive = { case LocalReviveOffers => launchTask(localScheduler.resourceOffer(freeCores)) @@ -55,7 +65,7 @@ private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: I freeCores -= 1 localScheduler.threadPool.submit(new Runnable { def run() { - localScheduler.runTask(task.taskId,task.serializedTask) + localScheduler.runTask(task.taskId, task.serializedTask) } }) } @@ -80,6 +90,8 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: var schedulableBuilder: SchedulableBuilder = null var rootPool: Pool = null + val schedulingMode: SchedulingMode = SchedulingMode.withName( + System.getProperty("spark.cluster.schedulingmode", "FIFO")) val activeTaskSets = new HashMap[String, TaskSetManager] val taskIdToTaskSetId = new HashMap[Long, String] val taskSetTaskIds = new HashMap[String, HashSet[Long]] @@ -87,15 +99,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: var localActor: ActorRef = null override def start() { - //default scheduler is FIFO - val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO") - //temporarily set rootPool name to empty - rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0) + // temporarily set rootPool name to empty + rootPool = new Pool("", schedulingMode, 0, 0) schedulableBuilder = { schedulingMode match { - case "FIFO" => + case SchedulingMode.FIFO => new FIFOSchedulableBuilder(rootPool) - case "FAIR" => + case SchedulingMode.FAIR => new FairSchedulableBuilder(rootPool) } } @@ -110,7 +120,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: override def submitTasks(taskSet: TaskSet) { synchronized { - var manager = new LocalTaskSetManager(this, taskSet) + val manager = new LocalTaskSetManager(this, taskSet) schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties) activeTaskSets(taskSet.id) = manager taskSetTaskIds(taskSet.id) = new HashSet[Long]() @@ -124,14 +134,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: val tasks = new ArrayBuffer[TaskDescription](freeCores) val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue() for (manager <- sortedTaskSetQueue) { - logDebug("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks)) + logDebug("parentName:%s,name:%s,runningTasks:%s".format( + manager.parent.name, manager.name, manager.runningTasks)) } var launchTask = false for (manager <- sortedTaskSetQueue) { do { launchTask = false - manager.slaveOffer(null,null,freeCpuCores) match { + manager.resourceOffer(null, null, freeCpuCores, null) match { case Some(task) => tasks += task taskIdToTaskSetId(task.taskId) = manager.taskSet.id @@ -139,7 +150,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: freeCpuCores -= 1 launchTask = true case None => {} - } + } } while(launchTask) } return tasks @@ -162,9 +173,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: // Set the Spark execution environment for the worker thread SparkEnv.set(env) val ser = SparkEnv.get.closureSerializer.newInstance() - var attemptedTask: Option[Task[_]] = None + val objectSer = SparkEnv.get.serializer.newInstance() + var attemptedTask: Option[Task[_]] = None val start = System.currentTimeMillis() var taskStart: Long = 0 + def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum + val startGCTime = getTotalGCTime + try { Accumulators.clear() Thread.currentThread().setContextClassLoader(classLoader) @@ -186,14 +201,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: // executor does. This is useful to catch serialization errors early // on in development (so when users move their local Spark programs // to the cluster, they don't get surprised by serialization errors). - val serResult = ser.serialize(result) + val serResult = objectSer.serialize(result) deserializedTask.metrics.get.resultSize = serResult.limit() - val resultToReturn = ser.deserialize[Any](serResult) + val resultToReturn = objectSer.deserialize[Any](serResult) val accumUpdates = ser.deserialize[collection.mutable.Map[Long, Any]]( ser.serialize(Accumulators.values)) val serviceTime = System.currentTimeMillis() - taskStart logInfo("Finished " + taskId) deserializedTask.metrics.get.executorRunTime = serviceTime.toInt + deserializedTask.metrics.get.jvmGCTime = getTotalGCTime - startGCTime deserializedTask.metrics.get.executorDeserializeTime = deserTime.toInt val taskResult = new TaskResult(result, accumUpdates, deserializedTask.metrics.getOrElse(null)) val serializedResult = ser.serialize(taskResult) @@ -202,7 +218,10 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: case t: Throwable => { val serviceTime = System.currentTimeMillis() - taskStart val metrics = attemptedTask.flatMap(t => t.metrics) - metrics.foreach{m => m.executorRunTime = serviceTime.toInt} + for (m <- metrics) { + m.executorRunTime = serviceTime.toInt + m.jvmGCTime = getTotalGCTime - startGCTime + } val failure = new ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics) localActor ! LocalStatusUpdate(taskId, TaskState.FAILED, ser.serialize(failure)) } diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala index e662ad6709..e52cb998bd 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala @@ -15,83 +15,79 @@ * limitations under the License. */ -package spark.scheduler.local +package org.apache.spark.scheduler.local -import java.io.File -import java.util.concurrent.atomic.AtomicInteger import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap -import scala.collection.mutable.HashSet -import spark._ -import spark.TaskState.TaskState -import spark.scheduler._ -import spark.scheduler.cluster._ +import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, Success, TaskState} +import org.apache.spark.TaskState.TaskState +import org.apache.spark.scheduler.{Task, TaskResult, TaskSet} +import org.apache.spark.scheduler.cluster.{Schedulable, TaskDescription, TaskInfo, TaskLocality, TaskSetManager} + + +private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet) + extends TaskSetManager with Logging { -private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet) extends TaskSetManager with Logging { var parent: Schedulable = null var weight: Int = 1 var minShare: Int = 0 var runningTasks: Int = 0 var priority: Int = taskSet.priority var stageId: Int = taskSet.stageId - var name: String = "TaskSet_"+taskSet.stageId.toString - + var name: String = "TaskSet_" + taskSet.stageId.toString var failCount = new Array[Int](taskSet.tasks.size) val taskInfos = new HashMap[Long, TaskInfo] val numTasks = taskSet.tasks.size var numFinished = 0 - val ser = SparkEnv.get.closureSerializer.newInstance() + val env = SparkEnv.get + val ser = env.closureSerializer.newInstance() val copiesRunning = new Array[Int](numTasks) val finished = new Array[Boolean](numTasks) val numFailures = new Array[Int](numTasks) val MAX_TASK_FAILURES = sched.maxFailures - def increaseRunningTasks(taskNum: Int): Unit = { - runningTasks += taskNum - if (parent != null) { - parent.increaseRunningTasks(taskNum) - } + override def increaseRunningTasks(taskNum: Int): Unit = { + runningTasks += taskNum + if (parent != null) { + parent.increaseRunningTasks(taskNum) + } } - def decreaseRunningTasks(taskNum: Int): Unit = { + override def decreaseRunningTasks(taskNum: Int): Unit = { runningTasks -= taskNum if (parent != null) { parent.decreaseRunningTasks(taskNum) } } - def addSchedulable(schedulable: Schedulable): Unit = { - //nothing + override def addSchedulable(schedulable: Schedulable): Unit = { + // nothing } - def removeSchedulable(schedulable: Schedulable): Unit = { - //nothing + override def removeSchedulable(schedulable: Schedulable): Unit = { + // nothing } - def getSchedulableByName(name: String): Schedulable = { + override def getSchedulableByName(name: String): Schedulable = { return null } - def executorLost(executorId: String, host: String): Unit = { - //nothing + override def executorLost(executorId: String, host: String): Unit = { + // nothing } - def checkSpeculatableTasks(): Boolean = { - return true - } + override def checkSpeculatableTasks() = true - def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { + override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] sortedTaskSetQueue += this return sortedTaskSetQueue } - def hasPendingTasks(): Boolean = { - return true - } + override def hasPendingTasks() = true def findTask(): Option[Int] = { for (i <- 0 to numTasks-1) { @@ -102,41 +98,42 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas return None } - def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = { + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) + : Option[TaskDescription] = + { SparkEnv.set(sched.env) - logDebug("availableCpus:%d,numFinished:%d,numTasks:%d".format(availableCpus.toInt, numFinished, numTasks)) + logDebug("availableCpus:%d, numFinished:%d, numTasks:%d".format( + availableCpus.toInt, numFinished, numTasks)) if (availableCpus > 0 && numFinished < numTasks) { findTask() match { case Some(index) => val taskId = sched.attemptId.getAndIncrement() val task = taskSet.tasks(index) - val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1", TaskLocality.NODE_LOCAL) + val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1", + TaskLocality.NODE_LOCAL) taskInfos(taskId) = info - val bytes = Task.serializeWithDependencies(task, sched.sc.addedFiles, sched.sc.addedJars, ser) + // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here + // we assume the task can be serialized without exceptions. + val bytes = Task.serializeWithDependencies( + task, sched.sc.addedFiles, sched.sc.addedJars, ser) logInfo("Size of task " + taskId + " is " + bytes.limit + " bytes") val taskName = "task %s:%d".format(taskSet.id, index) copiesRunning(index) += 1 increaseRunningTasks(1) - return Some(new TaskDescription(taskId, null, taskName, bytes)) + taskStarted(task, info) + return Some(new TaskDescription(taskId, null, taskName, index, bytes)) case None => {} } } return None } - def numPendingTasksForHostPort(hostPort: String): Int = { - return 0 - } - - def numRackLocalPendingTasksForHost(hostPort :String): Int = { - return 0 - } - - def numPendingTasksForHost(hostPort: String): Int = { - return 0 - } - - def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { + override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { + SparkEnv.set(env) state match { case TaskState.FINISHED => taskEnded(tid, state, serializedData) @@ -146,6 +143,10 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas } } + def taskStarted(task: Task[_], info: TaskInfo) { + sched.listener.taskStarted(task, info) + } + def taskEnded(tid: Long, state: TaskState, serializedData: ByteBuffer) { val info = taskInfos(tid) val index = info.index @@ -168,15 +169,18 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas val task = taskSet.tasks(index) info.markFailed() decreaseRunningTasks(1) - val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](serializedData, getClass.getClassLoader) + val reason: ExceptionFailure = ser.deserialize[ExceptionFailure]( + serializedData, getClass.getClassLoader) sched.listener.taskEnded(task, reason, null, null, info, reason.metrics.getOrElse(null)) if (!finished(index)) { copiesRunning(index) -= 1 numFailures(index) += 1 val locs = reason.stackTrace.map(loc => "\tat %s".format(loc.toString)) - logInfo("Loss was due to %s\n%s\n%s".format(reason.className, reason.description, locs.mkString("\n"))) + logInfo("Loss was due to %s\n%s\n%s".format( + reason.className, reason.description, locs.mkString("\n"))) if (numFailures(index) > MAX_TASK_FAILURES) { - val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(taskSet.id, index, 4, reason.description) + val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format( + taskSet.id, index, 4, reason.description) decreaseRunningTasks(runningTasks) sched.listener.taskSetFailed(taskSet, errorMessage) // need to delete failed Taskset from schedule queue @@ -185,6 +189,6 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas } } - def error(message: String) { + override def error(message: String) { } } diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala index 7bc6040544..3dbe61d706 100644 --- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler.mesos +package org.apache.spark.scheduler.mesos import com.google.protobuf.ByteString @@ -23,14 +23,14 @@ import org.apache.mesos.{Scheduler => MScheduler} import org.apache.mesos._ import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _} -import spark.{SparkException, Utils, Logging, SparkContext} +import org.apache.spark.{SparkException, Logging, SparkContext} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.collection.JavaConversions._ import java.io.File -import spark.scheduler.cluster._ +import org.apache.spark.scheduler.cluster._ import java.util.{ArrayList => JArrayList, List => JList} import java.util.Collections -import spark.TaskState +import org.apache.spark.TaskState /** * A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds @@ -110,12 +110,6 @@ private[spark] class CoarseMesosSchedulerBackend( } def createCommand(offer: Offer, numCores: Int): CommandInfo = { - val runScript = new File(sparkHome, "run").getCanonicalPath - val driverUrl = "akka://spark@%s:%s/user/%s".format( - System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"), - StandaloneSchedulerBackend.ACTOR_NAME) - val command = "\"%s\" spark.executor.StandaloneExecutorBackend %s %s %s %d".format( - runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores) val environment = Environment.newBuilder() sc.executorEnvs.foreach { case (key, value) => environment.addVariables(Environment.Variable.newBuilder() @@ -123,7 +117,28 @@ private[spark] class CoarseMesosSchedulerBackend( .setValue(value) .build()) } - return CommandInfo.newBuilder().setValue(command).setEnvironment(environment).build() + val command = CommandInfo.newBuilder() + .setEnvironment(environment) + val driverUrl = "akka://spark@%s:%s/user/%s".format( + System.getProperty("spark.driver.host"), + System.getProperty("spark.driver.port"), + StandaloneSchedulerBackend.ACTOR_NAME) + val uri = System.getProperty("spark.executor.uri") + if (uri == null) { + val runScript = new File(sparkHome, "spark-class").getCanonicalPath + command.setValue( + "\"%s\" org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format( + runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) + } else { + // Grab everything to the first '.'. We'll use that and '*' to + // glob the directory "correctly". + val basename = uri.split('/').last.split('.').head + command.setValue( + "cd %s*; ./spark-class org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format( + basename, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) + command.addUris(CommandInfo.URI.newBuilder().setValue(uri)) + } + return command.build() } override def offerRescinded(d: SchedulerDriver, o: OfferID) {} diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala index 75b8268b55..541f86e338 100644 --- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler.mesos +package org.apache.spark.scheduler.mesos import com.google.protobuf.ByteString @@ -23,14 +23,15 @@ import org.apache.mesos.{Scheduler => MScheduler} import org.apache.mesos._ import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _} -import spark.{SparkException, Utils, Logging, SparkContext} +import org.apache.spark.{SparkException, Logging, SparkContext} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.collection.JavaConversions._ import java.io.File -import spark.scheduler.cluster._ +import org.apache.spark.scheduler.cluster._ import java.util.{ArrayList => JArrayList, List => JList} import java.util.Collections -import spark.TaskState +import org.apache.spark.TaskState +import org.apache.spark.util.Utils /** * A SchedulerBackend for running fine-grained tasks on Mesos. Each Spark task is mapped to a @@ -89,7 +90,6 @@ private[spark] class MesosSchedulerBackend( val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException( "Spark home is not set; set it through the spark.home system " + "property, the SPARK_HOME environment variable or the SparkContext constructor")) - val execScript = new File(sparkHome, "spark-executor").getCanonicalPath val environment = Environment.newBuilder() sc.executorEnvs.foreach { case (key, value) => environment.addVariables(Environment.Variable.newBuilder() @@ -97,15 +97,23 @@ private[spark] class MesosSchedulerBackend( .setValue(value) .build()) } + val command = CommandInfo.newBuilder() + .setEnvironment(environment) + val uri = System.getProperty("spark.executor.uri") + if (uri == null) { + command.setValue(new File(sparkHome, "spark-executor").getCanonicalPath) + } else { + // Grab everything to the first '.'. We'll use that and '*' to + // glob the directory "correctly". + val basename = uri.split('/').last.split('.').head + command.setValue("cd %s*; ./spark-executor".format(basename)) + command.addUris(CommandInfo.URI.newBuilder().setValue(uri)) + } val memory = Resource.newBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Value.Scalar.newBuilder().setValue(executorMemory).build()) .build() - val command = CommandInfo.newBuilder() - .setValue(execScript) - .setEnvironment(environment) - .build() ExecutorInfo.newBuilder() .setExecutorId(ExecutorID.newBuilder().setValue(execId).build()) .setCommand(command) diff --git a/core/src/main/scala/spark/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala index 04c5f44e6b..4de81617b1 100644 --- a/core/src/main/scala/spark/JavaSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala @@ -15,13 +15,12 @@ * limitations under the License. */ -package spark +package org.apache.spark.serializer import java.io._ import java.nio.ByteBuffer -import serializer.{Serializer, SerializerInstance, DeserializationStream, SerializationStream} -import spark.util.ByteBufferInputStream +import org.apache.spark.util.ByteBufferInputStream private[spark] class JavaSerializationStream(out: OutputStream) extends SerializationStream { val objOut = new ObjectOutputStream(out) diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala new file mode 100644 index 0000000000..24ef204aa1 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.serializer + +import java.nio.ByteBuffer +import java.io.{EOFException, InputStream, OutputStream} + +import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer} +import com.esotericsoftware.kryo.{KryoException, Kryo} +import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput} +import com.twitter.chill.ScalaKryoInstantiator + +import org.apache.spark.{SerializableWritable, Logging} +import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock, StorageLevel} + +import org.apache.spark.broadcast.HttpBroadcast + +/** + * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]]. + */ +class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging { + private val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024 + + def newKryoOutput() = new KryoOutput(bufferSize) + + def newKryoInput() = new KryoInput(bufferSize) + + def newKryo(): Kryo = { + val instantiator = new ScalaKryoInstantiator + val kryo = instantiator.newKryo() + val classLoader = Thread.currentThread.getContextClassLoader + + // Register some commonly used classes + val toRegister: Seq[AnyRef] = Seq( + ByteBuffer.allocate(1), + StorageLevel.MEMORY_ONLY, + PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY), + GotBlock("1", ByteBuffer.allocate(1)), + GetBlock("1") + ) + + for (obj <- toRegister) kryo.register(obj.getClass) + + // Allow sending SerializableWritable + kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer()) + kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer()) + + // Allow the user to register their own classes by setting spark.kryo.registrator + try { + Option(System.getProperty("spark.kryo.registrator")).foreach { regCls => + logDebug("Running user registrator: " + regCls) + val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator] + reg.registerClasses(kryo) + } + } catch { + case _: Exception => println("Failed to register spark.kryo.registrator") + } + + kryo.setClassLoader(classLoader) + + // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops + kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean) + + kryo + } + + def newInstance(): SerializerInstance = { + new KryoSerializerInstance(this) + } +} + +private[spark] +class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream { + val output = new KryoOutput(outStream) + + def writeObject[T](t: T): SerializationStream = { + kryo.writeClassAndObject(output, t) + this + } + + def flush() { output.flush() } + def close() { output.close() } +} + +private[spark] +class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream { + val input = new KryoInput(inStream) + + def readObject[T](): T = { + try { + kryo.readClassAndObject(input).asInstanceOf[T] + } catch { + // DeserializationStream uses the EOF exception to indicate stopping condition. + case _: KryoException => throw new EOFException + } + } + + def close() { + // Kryo's Input automatically closes the input stream it is using. + input.close() + } +} + +private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance { + val kryo = ks.newKryo() + val output = ks.newKryoOutput() + val input = ks.newKryoInput() + + def serialize[T](t: T): ByteBuffer = { + output.clear() + kryo.writeClassAndObject(output, t) + ByteBuffer.wrap(output.toBytes) + } + + def deserialize[T](bytes: ByteBuffer): T = { + input.setBuffer(bytes.array) + kryo.readClassAndObject(input).asInstanceOf[T] + } + + def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = { + val oldClassLoader = kryo.getClassLoader + kryo.setClassLoader(loader) + input.setBuffer(bytes.array) + val obj = kryo.readClassAndObject(input).asInstanceOf[T] + kryo.setClassLoader(oldClassLoader) + obj + } + + def serializeStream(s: OutputStream): SerializationStream = { + new KryoSerializationStream(kryo, s) + } + + def deserializeStream(s: InputStream): DeserializationStream = { + new KryoDeserializationStream(kryo, s) + } +} + +/** + * Interface implemented by clients to register their classes with Kryo when using Kryo + * serialization. + */ +trait KryoRegistrator { + def registerClasses(kryo: Kryo) +} diff --git a/core/src/main/scala/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala index dc94d42bb6..160cca4d6c 100644 --- a/core/src/main/scala/spark/serializer/Serializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala @@ -15,19 +15,19 @@ * limitations under the License. */ -package spark.serializer +package org.apache.spark.serializer import java.io.{EOFException, InputStream, OutputStream} import java.nio.ByteBuffer import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream -import spark.util.ByteBufferInputStream +import org.apache.spark.util.{NextIterator, ByteBufferInputStream} /** * A serializer. Because some serialization libraries are not thread safe, this class is used to - * create [[spark.serializer.SerializerInstance]] objects that do the actual serialization and are + * create [[org.apache.spark.serializer.SerializerInstance]] objects that do the actual serialization and are * guaranteed to only be called from one thread at a time. */ trait Serializer { @@ -95,7 +95,7 @@ trait DeserializationStream { * Read the elements of this stream through an iterator. This can only be called once, as * reading each element will consume data from the input source. */ - def asIterator: Iterator[Any] = new spark.util.NextIterator[Any] { + def asIterator: Iterator[Any] = new NextIterator[Any] { override protected def getNext() = { try { readObject[Any]() diff --git a/core/src/main/scala/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala index b7b24705a2..2955986fec 100644 --- a/core/src/main/scala/spark/serializer/SerializerManager.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.serializer +package org.apache.spark.serializer import java.util.concurrent.ConcurrentHashMap diff --git a/core/src/main/scala/spark/storage/BlockException.scala b/core/src/main/scala/org/apache/spark/storage/BlockException.scala index 8ebfaf3cbf..290dbce4f5 100644 --- a/core/src/main/scala/spark/storage/BlockException.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockException.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage private[spark] case class BlockException(blockId: String, message: String) extends Exception(message) diff --git a/core/src/main/scala/spark/storage/BlockFetchTracker.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala index 265e554ad8..2e0b0e6eda 100644 --- a/core/src/main/scala/spark/storage/BlockFetchTracker.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage private[spark] trait BlockFetchTracker { def totalBlocks : Int diff --git a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala index 1965c5bc19..3aeda3879d 100644 --- a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer import java.util.concurrent.LinkedBlockingQueue @@ -26,13 +26,13 @@ import scala.collection.mutable.Queue import io.netty.buffer.ByteBuf -import spark.Logging -import spark.Utils -import spark.SparkException -import spark.network.BufferMessage -import spark.network.ConnectionManagerId -import spark.network.netty.ShuffleCopier -import spark.serializer.Serializer +import org.apache.spark.Logging +import org.apache.spark.SparkException +import org.apache.spark.network.BufferMessage +import org.apache.spark.network.ConnectionManagerId +import org.apache.spark.network.netty.ShuffleCopier +import org.apache.spark.serializer.Serializer +import org.apache.spark.util.Utils /** @@ -111,7 +111,7 @@ object BlockFetcherIterator { protected def sendRequest(req: FetchRequest) { logDebug("Sending request for %d blocks (%s) from %s".format( - req.blocks.size, Utils.memoryBytesToString(req.size), req.address.hostPort)) + req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort)) val cmId = new ConnectionManagerId(req.address.host, req.address.port) val blockMessageArray = new BlockMessageArray(req.blocks.map { case (blockId, size) => BlockMessage.fromGetBlock(GetBlock(blockId)) @@ -132,9 +132,10 @@ object BlockFetcherIterator { "Unexpected message " + blockMessage.getType + " received from " + cmId) } val blockId = blockMessage.getId + val networkSize = blockMessage.getData.limit() results.put(new FetchResult(blockId, sizeMap(blockId), () => dataDeserialize(blockId, blockMessage.getData, serializer))) - _remoteBytesRead += req.size + _remoteBytesRead += networkSize logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime)) } } @@ -309,7 +310,7 @@ object BlockFetcherIterator { } logDebug("Sending request for %d blocks (%s) from %s".format( - req.blocks.size, Utils.memoryBytesToString(req.size), req.address.host)) + req.blocks.size, Utils.bytesToString(req.size), req.address.host)) val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort) val cpier = new ShuffleCopier cpier.getBlocks(cmId, req.blocks, putResult) diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index e4ffa57ad2..60fdc5f2ee 100644 --- a/core/src/main/scala/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.io.{InputStream, OutputStream} import java.nio.{ByteBuffer, MappedByteBuffer} @@ -27,14 +27,13 @@ import akka.dispatch.{Await, Future} import akka.util.Duration import akka.util.duration._ -import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} - import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream -import spark.{Logging, SparkEnv, SparkException, Utils} -import spark.network._ -import spark.serializer.Serializer -import spark.util.{ByteBufferInputStream, IdGenerator, MetadataCleaner, TimeStampedHashMap} +import org.apache.spark.{Logging, SparkEnv, SparkException} +import org.apache.spark.io.CompressionCodec +import org.apache.spark.network._ +import org.apache.spark.serializer.Serializer +import org.apache.spark.util._ import sun.nio.ch.DirectBuffer @@ -158,6 +157,13 @@ private[spark] class BlockManager( val metadataCleaner = new MetadataCleaner("BlockManager", this.dropOldBlocks) initialize() + // The compression codec to use. Note that the "lazy" val is necessary because we want to delay + // the initialization of the compression codec until it is first used. The reason is that a Spark + // program could be using a user-defined codec in a third party jar, which is loaded in + // Executor.updateDependencies. When the BlockManager is initialized, user level jars hasn't been + // loaded yet. + private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec() + /** * Construct a BlockManager with a memory limit set based on system properties. */ @@ -919,18 +925,14 @@ private[spark] class BlockManager( * Wrap an output stream for compression if block compression is enabled for its block type */ def wrapForCompression(blockId: String, s: OutputStream): OutputStream = { - if (shouldCompress(blockId)) { - (new LZFOutputStream(s)).setFinishBlockOnFlush(true) - } else { - s - } + if (shouldCompress(blockId)) compressionCodec.compressedOutputStream(s) else s } /** * Wrap an input stream for compression if block compression is enabled for its block type */ def wrapForCompression(blockId: String, s: InputStream): InputStream = { - if (shouldCompress(blockId)) new LZFInputStream(s) else s + if (shouldCompress(blockId)) compressionCodec.compressedInputStream(s) else s } def dataSerialize( @@ -1002,43 +1004,43 @@ private[spark] object BlockManager extends Logging { } } - def blockIdsToExecutorLocations(blockIds: Array[String], env: SparkEnv, blockManagerMaster: BlockManagerMaster = null): HashMap[String, List[String]] = { + def blockIdsToBlockManagers( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[BlockManagerId]] = + { // env == null and blockManagerMaster != null is used in tests assert (env != null || blockManagerMaster != null) - val locationBlockIds: Seq[Seq[BlockManagerId]] = - if (env != null) { - env.blockManager.getLocationBlockIds(blockIds) - } else { - blockManagerMaster.getLocations(blockIds) - } + val blockLocations: Seq[Seq[BlockManagerId]] = if (env != null) { + env.blockManager.getLocationBlockIds(blockIds) + } else { + blockManagerMaster.getLocations(blockIds) + } - // Convert from block master locations to executor locations (we need that for task scheduling) - val executorLocations = new HashMap[String, List[String]]() + val blockManagers = new HashMap[String, Seq[BlockManagerId]] for (i <- 0 until blockIds.length) { - val blockId = blockIds(i) - val blockLocations = locationBlockIds(i) - - val executors = new HashSet[String]() - - if (env != null) { - for (bkLocation <- blockLocations) { - val executorHostPort = env.resolveExecutorIdToHostPort(bkLocation.executorId, bkLocation.host) - executors += executorHostPort - // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort) - } - } else { - // Typically while testing, etc - revert to simply using host. - for (bkLocation <- blockLocations) { - executors += bkLocation.host - // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort) - } - } - - executorLocations.put(blockId, executors.toSeq.toList) + blockManagers(blockIds(i)) = blockLocations(i) } + blockManagers.toMap + } - executorLocations + def blockIdsToExecutorIds( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[String]] = + { + blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.executorId)) } + def blockIdsToHosts( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[String]] = + { + blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host)) + } } diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala index b36a6176c0..74207f59af 100644 --- a/core/src/main/scala/spark/storage/BlockManagerId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} import java.util.concurrent.ConcurrentHashMap -import spark.Utils +import org.apache.spark.util.Utils /** * This class represent an unique identifier for a BlockManager. @@ -92,13 +92,13 @@ private[spark] class BlockManagerId private ( private[spark] object BlockManagerId { /** - * Returns a [[spark.storage.BlockManagerId]] for the given configuraiton. + * Returns a [[org.apache.spark.storage.BlockManagerId]] for the given configuraiton. * * @param execId ID of the executor. * @param host Host name of the block manager. * @param port Port of the block manager. * @param nettyPort Optional port for the Netty-based shuffle sender. - * @return A new [[spark.storage.BlockManagerId]]. + * @return A new [[org.apache.spark.storage.BlockManagerId]]. */ def apply(execId: String, host: String, port: Int, nettyPort: Int) = getCachedBlockManagerId(new BlockManagerId(execId, host, port, nettyPort)) diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala index 3186f7c85b..cf463d6ffc 100644 --- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala @@ -15,14 +15,15 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import akka.actor.ActorRef import akka.dispatch.{Await, Future} import akka.pattern.ask import akka.util.Duration -import spark.{Logging, SparkException} +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.storage.BlockManagerMessages._ private[spark] class BlockManagerMaster(var driverActor: ActorRef) extends Logging { diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala index 244000d952..c7b23ab094 100644 --- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.util.{HashMap => JHashMap} @@ -28,7 +28,10 @@ import akka.pattern.ask import akka.util.Duration import akka.util.duration._ -import spark.{Logging, Utils, SparkException} +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.storage.BlockManagerMessages._ +import org.apache.spark.util.Utils + /** * BlockManagerMasterActor is an actor on the master node to track statuses of @@ -330,7 +333,7 @@ object BlockManagerMasterActor { private val _blocks = new JHashMap[String, BlockStatus] logInfo("Registering block manager %s with %s RAM".format( - blockManagerId.hostPort, Utils.memoryBytesToString(maxMem))) + blockManagerId.hostPort, Utils.bytesToString(maxMem))) def updateLastSeenMs() { _lastSeenMs = System.currentTimeMillis() @@ -356,12 +359,12 @@ object BlockManagerMasterActor { if (storageLevel.useMemory) { _remainingMem -= memSize logInfo("Added %s in memory on %s (size: %s, free: %s)".format( - blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize), - Utils.memoryBytesToString(_remainingMem))) + blockId, blockManagerId.hostPort, Utils.bytesToString(memSize), + Utils.bytesToString(_remainingMem))) } if (storageLevel.useDisk) { logInfo("Added %s on disk on %s (size: %s)".format( - blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize))) + blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize))) } } else if (_blocks.containsKey(blockId)) { // If isValid is not true, drop the block. @@ -370,12 +373,12 @@ object BlockManagerMasterActor { if (blockStatus.storageLevel.useMemory) { _remainingMem += blockStatus.memSize logInfo("Removed %s on %s in memory (size: %s, free: %s)".format( - blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize), - Utils.memoryBytesToString(_remainingMem))) + blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize), + Utils.bytesToString(_remainingMem))) } if (blockStatus.storageLevel.useDisk) { logInfo("Removed %s on %s on disk (size: %s)".format( - blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize))) + blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.diskSize))) } } } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala new file mode 100644 index 0000000000..24333a179c --- /dev/null +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.storage + +import java.io.{Externalizable, ObjectInput, ObjectOutput} + +import akka.actor.ActorRef + + +private[storage] object BlockManagerMessages { + ////////////////////////////////////////////////////////////////////////////////// + // Messages from the master to slaves. + ////////////////////////////////////////////////////////////////////////////////// + sealed trait ToBlockManagerSlave + + // Remove a block from the slaves that have it. This can only be used to remove + // blocks that the master knows about. + case class RemoveBlock(blockId: String) extends ToBlockManagerSlave + + // Remove all blocks belonging to a specific RDD. + case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave + + + ////////////////////////////////////////////////////////////////////////////////// + // Messages from slaves to the master. + ////////////////////////////////////////////////////////////////////////////////// + sealed trait ToBlockManagerMaster + + case class RegisterBlockManager( + blockManagerId: BlockManagerId, + maxMemSize: Long, + sender: ActorRef) + extends ToBlockManagerMaster + + case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster + + class UpdateBlockInfo( + var blockManagerId: BlockManagerId, + var blockId: String, + var storageLevel: StorageLevel, + var memSize: Long, + var diskSize: Long) + extends ToBlockManagerMaster + with Externalizable { + + def this() = this(null, null, null, 0, 0) // For deserialization only + + override def writeExternal(out: ObjectOutput) { + blockManagerId.writeExternal(out) + out.writeUTF(blockId) + storageLevel.writeExternal(out) + out.writeLong(memSize) + out.writeLong(diskSize) + } + + override def readExternal(in: ObjectInput) { + blockManagerId = BlockManagerId(in) + blockId = in.readUTF() + storageLevel = StorageLevel(in) + memSize = in.readLong() + diskSize = in.readLong() + } + } + + object UpdateBlockInfo { + def apply(blockManagerId: BlockManagerId, + blockId: String, + storageLevel: StorageLevel, + memSize: Long, + diskSize: Long): UpdateBlockInfo = { + new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize) + } + + // For pattern-matching + def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = { + Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize)) + } + } + + case class GetLocations(blockId: String) extends ToBlockManagerMaster + + case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster + + case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster + + case class RemoveExecutor(execId: String) extends ToBlockManagerMaster + + case object StopBlockManagerMaster extends ToBlockManagerMaster + + case object GetMemoryStatus extends ToBlockManagerMaster + + case object ExpireDeadHosts extends ToBlockManagerMaster + + case object GetStorageStatus extends ToBlockManagerMaster +} diff --git a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala index 45cffad810..951503019f 100644 --- a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import akka.actor.Actor -import spark.{Logging, SparkException, Utils} +import org.apache.spark.storage.BlockManagerMessages._ /** diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala new file mode 100644 index 0000000000..24190cdd67 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala @@ -0,0 +1,48 @@ +package org.apache.spark.storage + +import com.codahale.metrics.{Gauge,MetricRegistry} + +import org.apache.spark.metrics.source.Source + + +private[spark] class BlockManagerSource(val blockManager: BlockManager) extends Source { + val metricRegistry = new MetricRegistry() + val sourceName = "BlockManager" + + metricRegistry.register(MetricRegistry.name("memory", "maxMem", "MBytes"), new Gauge[Long] { + override def getValue: Long = { + val storageStatusList = blockManager.master.getStorageStatus + val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _) + maxMem / 1024 / 1024 + } + }) + + metricRegistry.register(MetricRegistry.name("memory", "remainingMem", "MBytes"), new Gauge[Long] { + override def getValue: Long = { + val storageStatusList = blockManager.master.getStorageStatus + val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _) + remainingMem / 1024 / 1024 + } + }) + + metricRegistry.register(MetricRegistry.name("memory", "memUsed", "MBytes"), new Gauge[Long] { + override def getValue: Long = { + val storageStatusList = blockManager.master.getStorageStatus + val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _) + val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _) + (maxMem - remainingMem) / 1024 / 1024 + } + }) + + metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed", "MBytes"), new Gauge[Long] { + override def getValue: Long = { + val storageStatusList = blockManager.master.getStorageStatus + val diskSpaceUsed = storageStatusList + .flatMap(_.blocks.values.map(_.diskSize)) + .reduceOption(_ + _) + .getOrElse(0L) + + diskSpaceUsed / 1024 / 1024 + } + }) +} diff --git a/core/src/main/scala/spark/storage/BlockManagerWorker.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala index 39064bce92..678c38203c 100644 --- a/core/src/main/scala/spark/storage/BlockManagerWorker.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala @@ -15,12 +15,13 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer -import spark.{Logging, Utils} -import spark.network._ +import org.apache.spark.{Logging} +import org.apache.spark.network._ +import org.apache.spark.util.Utils /** * A network interface for BlockManager. Each slave should have one diff --git a/core/src/main/scala/spark/storage/BlockMessage.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala index ab72dbb62b..d8fa6a91d1 100644 --- a/core/src/main/scala/spark/storage/BlockMessage.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala @@ -15,15 +15,14 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer import scala.collection.mutable.StringBuilder import scala.collection.mutable.ArrayBuffer -import spark._ -import spark.network._ +import org.apache.spark.network._ private[spark] case class GetBlock(id: String) private[spark] case class GotBlock(id: String, data: ByteBuffer) diff --git a/core/src/main/scala/spark/storage/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala index b0229d6124..0aaf846b5b 100644 --- a/core/src/main/scala/spark/storage/BlockMessageArray.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala @@ -15,15 +15,14 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer -import scala.collection.mutable.StringBuilder import scala.collection.mutable.ArrayBuffer -import spark._ -import spark.network._ +import org.apache.spark._ +import org.apache.spark.network._ private[spark] class BlockMessageArray(var blockMessages: Seq[BlockMessage]) extends Seq[BlockMessage] with Logging { @@ -113,7 +112,7 @@ private[spark] object BlockMessageArray { def main(args: Array[String]) { val blockMessages = - (0 until 10).map(i => { + (0 until 10).map { i => if (i % 2 == 0) { val buffer = ByteBuffer.allocate(100) buffer.clear @@ -121,7 +120,7 @@ private[spark] object BlockMessageArray { } else { BlockMessage.fromGetBlock(GetBlock(i.toString)) } - }) + } val blockMessageArray = new BlockMessageArray(blockMessages) println("Block message array created") diff --git a/core/src/main/scala/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala index 01ed6e8c1f..39f103297f 100644 --- a/core/src/main/scala/spark/storage/BlockObjectWriter.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala @@ -15,9 +15,7 @@ * limitations under the License. */ -package spark.storage - -import java.nio.ByteBuffer +package org.apache.spark.storage /** diff --git a/core/src/main/scala/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala index c8db0022b0..fa834371f4 100644 --- a/core/src/main/scala/spark/storage/BlockStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer -import spark.Logging +import org.apache.spark.Logging /** * Abstract class to store blocks diff --git a/core/src/main/scala/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala index 3495d653bd..fc25ef0fae 100644 --- a/core/src/main/scala/spark/storage/DiskStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.io.{File, FileOutputStream, OutputStream, RandomAccessFile} import java.nio.ByteBuffer @@ -28,12 +28,12 @@ import scala.collection.mutable.ArrayBuffer import it.unimi.dsi.fastutil.io.FastBufferedOutputStream -import spark.Utils -import spark.executor.ExecutorExitCode -import spark.serializer.{Serializer, SerializationStream} -import spark.Logging -import spark.network.netty.ShuffleSender -import spark.network.netty.PathResolver +import org.apache.spark.executor.ExecutorExitCode +import org.apache.spark.serializer.{Serializer, SerializationStream} +import org.apache.spark.Logging +import org.apache.spark.network.netty.ShuffleSender +import org.apache.spark.network.netty.PathResolver +import org.apache.spark.util.Utils /** @@ -66,7 +66,6 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String) override def close() { if (initialized) { objOut.close() - bs.close() channel = null bs = null objOut = null @@ -148,7 +147,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String) channel.close() val finishTime = System.currentTimeMillis logDebug("Block %s stored as %s file on disk in %d ms".format( - blockId, Utils.memoryBytesToString(bytes.limit), (finishTime - startTime))) + blockId, Utils.bytesToString(bytes.limit), (finishTime - startTime))) } private def getFileBytes(file: File): ByteBuffer = { @@ -182,7 +181,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String) val timeTaken = System.currentTimeMillis - startTime logDebug("Block %s stored as %s file on disk in %d ms".format( - blockId, Utils.memoryBytesToString(length), timeTaken)) + blockId, Utils.bytesToString(length), timeTaken)) if (returnValues) { // Return a byte buffer for the contents of the file diff --git a/core/src/main/scala/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala index b5a86b85a7..3b3b2342fa 100644 --- a/core/src/main/scala/spark/storage/MemoryStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.util.LinkedHashMap import java.util.concurrent.ArrayBlockingQueue -import spark.{SizeEstimator, Utils} import java.nio.ByteBuffer import collection.mutable.ArrayBuffer +import org.apache.spark.util.{SizeEstimator, Utils} /** * Stores blocks in memory, either as ArrayBuffers of deserialized Java objects or as @@ -38,7 +38,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long) // blocks from the memory store. private val putLock = new Object() - logInfo("MemoryStore started with capacity %s.".format(Utils.memoryBytesToString(maxMemory))) + logInfo("MemoryStore started with capacity %s.".format(Utils.bytesToString(maxMemory))) def freeMemory: Long = maxMemory - currentMemory @@ -164,10 +164,10 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long) currentMemory += size if (deserialized) { logInfo("Block %s stored as values to memory (estimated size %s, free %s)".format( - blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory))) + blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory))) } else { logInfo("Block %s stored as bytes to memory (size %s, free %s)".format( - blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory))) + blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory))) } true } else { diff --git a/core/src/main/scala/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala index 3a0974fe15..2eba2f06b5 100644 --- a/core/src/main/scala/spark/storage/PutResult.scala +++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer diff --git a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala index 8a7a6f9ed3..9da11efb57 100644 --- a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage -import spark.serializer.Serializer +import org.apache.spark.serializer.Serializer private[spark] diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala index f52650988c..755f1a760e 100644 --- a/core/src/main/scala/spark/storage/StorageLevel.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} @@ -23,7 +23,7 @@ import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} * Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory, * whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory * in a serialized format, and whether to replicate the RDD partitions on multiple nodes. - * The [[spark.storage.StorageLevel$]] singleton object contains some static constants for + * The [[org.apache.spark.storage.StorageLevel$]] singleton object contains some static constants for * commonly useful storage levels. To create your own storage level object, use the factor method * of the singleton object (`StorageLevel(...)`). */ diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala index 2aeed4ea3c..2bb7715696 100644 --- a/core/src/main/scala/spark/storage/StorageUtils.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage -import spark.{Utils, SparkContext} +import org.apache.spark.{SparkContext} import BlockManagerMasterActor.BlockStatus +import org.apache.spark.util.Utils private[spark] case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long, @@ -42,9 +43,9 @@ case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel, numCachedPartitions: Int, numPartitions: Int, memSize: Long, diskSize: Long) extends Ordered[RDDInfo] { override def toString = { - import Utils.memoryBytesToString + import Utils.bytesToString "RDD \"%s\" (%d) Storage: %s; CachedPartitions: %d; TotalPartitions: %d; MemorySize: %s; DiskSize: %s".format(name, id, - storageLevel.toString, numCachedPartitions, numPartitions, memoryBytesToString(memSize), memoryBytesToString(diskSize)) + storageLevel.toString, numCachedPartitions, numPartitions, bytesToString(memSize), bytesToString(diskSize)) } override def compare(that: RDDInfo) = { diff --git a/core/src/main/scala/spark/storage/ThreadingTest.scala b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala index b3ab1ff4b4..f2ae8dd97d 100644 --- a/core/src/main/scala/spark/storage/ThreadingTest.scala +++ b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import akka.actor._ -import spark.KryoSerializer import java.util.concurrent.ArrayBlockingQueue import util.Random +import org.apache.spark.serializer.KryoSerializer /** * This class tests the BlockManager and MemoryStore for thread safety and diff --git a/core/src/main/scala/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index ca6088ad93..7211dbc7c6 100644 --- a/core/src/main/scala/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -15,22 +15,22 @@ * limitations under the License. */ -package spark.ui - -import annotation.tailrec +package org.apache.spark.ui import javax.servlet.http.{HttpServletResponse, HttpServletRequest} +import scala.annotation.tailrec +import scala.util.{Try, Success, Failure} +import scala.xml.Node + import net.liftweb.json.{JValue, pretty, render} import org.eclipse.jetty.server.{Server, Request, Handler} import org.eclipse.jetty.server.handler.{ResourceHandler, HandlerList, ContextHandler, AbstractHandler} import org.eclipse.jetty.util.thread.QueuedThreadPool -import scala.util.{Try, Success, Failure} -import scala.xml.Node +import org.apache.spark.Logging -import spark.Logging /** Utilities for launching a web server using Jetty's HTTP Server class */ private[spark] object JettyUtils extends Logging { @@ -48,7 +48,7 @@ private[spark] object JettyUtils extends Logging { implicit def textResponderToHandler(responder: Responder[String]): Handler = createHandler(responder, "text/plain") - private def createHandler[T <% AnyRef](responder: Responder[T], contentType: String, + def createHandler[T <% AnyRef](responder: Responder[T], contentType: String, extractFn: T => String = (in: Any) => in.toString): Handler = { new AbstractHandler { def handle(target: String, @@ -117,7 +117,6 @@ private[spark] object JettyUtils extends Logging { Try { server.start() } match { case s: Success[_] => - sys.addShutdownHook(server.stop()) // Be kind, un-bind (server, server.getConnectors.head.getLocalPort) case f: Failure[_] => server.stop() diff --git a/core/src/main/scala/spark/ui/Page.scala b/core/src/main/scala/org/apache/spark/ui/Page.scala index a31e750d06..b2a069a375 100644 --- a/core/src/main/scala/spark/ui/Page.scala +++ b/core/src/main/scala/org/apache/spark/ui/Page.scala @@ -15,6 +15,8 @@ * limitations under the License. */ -package spark.ui +package org.apache.spark.ui -private[spark] object Page extends Enumeration { val Storage, Jobs, Environment = Value } +private[spark] object Page extends Enumeration { + val Stages, Storage, Environment, Executors = Value +} diff --git a/core/src/main/scala/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 9396f22063..ad456ea565 100644 --- a/core/src/main/scala/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -15,21 +15,23 @@ * limitations under the License. */ -package spark.ui +package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import org.eclipse.jetty.server.{Handler, Server} -import spark.{Logging, SparkContext, Utils} -import spark.ui.env.EnvironmentUI -import spark.ui.storage.BlockManagerUI -import spark.ui.jobs.JobProgressUI -import spark.ui.JettyUtils._ +import org.apache.spark.{Logging, SparkContext, SparkEnv} +import org.apache.spark.ui.env.EnvironmentUI +import org.apache.spark.ui.exec.ExecutorsUI +import org.apache.spark.ui.storage.BlockManagerUI +import org.apache.spark.ui.jobs.JobProgressUI +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.util.Utils /** Top level user interface for Spark */ private[spark] class SparkUI(sc: SparkContext) extends Logging { - val host = Utils.localHostName() + val host = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(Utils.localHostName()) val port = Option(System.getProperty("spark.ui.port")).getOrElse(SparkUI.DEFAULT_PORT).toInt var boundPort: Option[Int] = None var server: Option[Server] = None @@ -41,7 +43,13 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { val storage = new BlockManagerUI(sc) val jobs = new JobProgressUI(sc) val env = new EnvironmentUI(sc) - val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++ handlers + val exec = new ExecutorsUI(sc) + + // Add MetricsServlet handlers by default + val metricsServletHandlers = SparkEnv.get.metricsSystem.getServletHandlers + + val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++ + exec.getHandlers ++ metricsServletHandlers ++ handlers /** Bind the HTTP server which backs this web interface */ def bind() { @@ -51,9 +59,9 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { server = Some(srv) boundPort = Some(usedPort) } catch { - case e: Exception => - logError("Failed to create Spark JettyUtils", e) - System.exit(1) + case e: Exception => + logError("Failed to create Spark JettyUtils", e) + System.exit(1) } } @@ -64,6 +72,7 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { // This server must register all handlers, including JobProgressUI, before binding // JobProgressUI registers a listener with SparkContext, which requires sc to initialize jobs.start() + exec.start() } def stop() { @@ -74,6 +83,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { } private[spark] object SparkUI { - val DEFAULT_PORT = "33000" - val STATIC_RESOURCE_DIR = "spark/ui/static" + val DEFAULT_PORT = "3030" + val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static" } diff --git a/core/src/main/scala/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index b1d11954dd..ce1acf564c 100644 --- a/core/src/main/scala/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.ui +package org.apache.spark.ui import scala.xml.Node -import spark.SparkContext +import org.apache.spark.SparkContext /** Utility functions for generating XML pages with spark content. */ private[spark] object UIUtils { @@ -28,64 +28,53 @@ private[spark] object UIUtils { /** Returns a spark page with correctly formatted headers */ def headerSparkPage(content: => Seq[Node], sc: SparkContext, title: String, page: Page.Value) : Seq[Node] = { + val jobs = page match { + case Stages => <li class="active"><a href="/stages">Stages</a></li> + case _ => <li><a href="/stages">Stages</a></li> + } val storage = page match { case Storage => <li class="active"><a href="/storage">Storage</a></li> case _ => <li><a href="/storage">Storage</a></li> } - val jobs = page match { - case Jobs => <li class="active"><a href="/stages">Jobs</a></li> - case _ => <li><a href="/stages">Jobs</a></li> - } val environment = page match { case Environment => <li class="active"><a href="/environment">Environment</a></li> case _ => <li><a href="/environment">Environment</a></li> } + val executors = page match { + case Executors => <li class="active"><a href="/executors">Executors</a></li> + case _ => <li><a href="/executors">Executors</a></li> + } <html> <head> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> <link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" /> <link rel="stylesheet" href="/static/webui.css" type="text/css" /> - <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" /> <script src="/static/sorttable.js"></script> <title>{sc.appName} - {title}</title> - <style type="text/css"> - table.sortable thead {{ cursor: pointer; }} - </style> </head> <body> - <div class="container"> - - <div class="row"> - <div class="span12"> - <div class="navbar"> - <div class="navbar-inner"> - <div class="container"> - <div class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></div> - <ul class="nav"> - {storage} - {jobs} - {environment} - </ul> - <ul id="infolist"> - <li>Application: <strong>{sc.appName}</strong></li> - <li>Master: <strong>{sc.master}</strong></li> - <li>Executors: <strong>{sc.getExecutorStorageStatus.size}</strong></li> - </ul> - </div> - </div> - </div> - </div> + <div class="navbar navbar-static-top"> + <div class="navbar-inner"> + <a href="/" class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></a> + <ul class="nav"> + {jobs} + {storage} + {environment} + {executors} + </ul> + <p class="navbar-text pull-right"><strong>{sc.appName}</strong> application UI</p> </div> + </div> - <div class="row" style="padding-top: 5px;"> + <div class="container-fluid"> + <div class="row-fluid"> <div class="span12"> - <h1 style="vertical-align: bottom; display: inline-block;"> + <h3 style="vertical-align: bottom; display: inline-block;"> {title} - </h1> + </h3> </div> </div> - <hr/> {content} </div> </body> @@ -98,23 +87,18 @@ private[spark] object UIUtils { <head> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> <link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" /> - <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" /> + <link rel="stylesheet" href="/static/webui.css" type="text/css" /> <script src="/static/sorttable.js"></script> <title>{title}</title> - <style type="text/css"> - table.sortable thead {{ cursor: pointer; }} - </style> </head> <body> - <div class="container"> - <div class="row"> - <div class="span2"> - <img src="/static/spark_logo.png" /> - </div> - <div class="span10"> - <h1 style="vertical-align: bottom; margin-top: 40px; display: inline-block;"> + <div class="container-fluid"> + <div class="row-fluid"> + <div class="span12"> + <h3 style="vertical-align: middle; display: inline-block;"> + <img src="/static/spark-logo-77x50px-hd.png" style="margin-right: 15px;" /> {title} - </h1> + </h3> </div> </div> {content} @@ -124,9 +108,21 @@ private[spark] object UIUtils { } /** Returns an HTML table constructed by generating a row for each object in a sequence. */ - def listingTable[T](headers: Seq[String], makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = { - <table class="table table-bordered table-striped table-condensed sortable"> - <thead>{headers.map(h => <th>{h}</th>)}</thead> + def listingTable[T]( + headers: Seq[String], + makeRow: T => Seq[Node], + rows: Seq[T], + fixedWidth: Boolean = false): Seq[Node] = { + + val colWidth = 100.toDouble / headers.size + val colWidthAttr = if (fixedWidth) colWidth + "%" else "" + var tableClass = "table table-bordered table-striped table-condensed sortable" + if (fixedWidth) { + tableClass += " table-fixed" + } + + <table class={tableClass}> + <thead>{headers.map(h => <th width={colWidthAttr}>{h}</th>)}</thead> <tbody> {rows.map(r => makeRow(r))} </tbody> diff --git a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala index a80e2d7002..0ecb22d2f9 100644 --- a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala @@ -15,12 +15,14 @@ * limitations under the License. */ -package spark.ui +package org.apache.spark.ui import scala.util.Random -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark.scheduler.cluster.SchedulingMode + /** * Continuously generates jobs that expose various features of the WebUI (internal testing tool). @@ -29,18 +31,29 @@ import spark.SparkContext._ */ private[spark] object UIWorkloadGenerator { val NUM_PARTITIONS = 100 - val INTER_JOB_WAIT_MS = 500 + val INTER_JOB_WAIT_MS = 5000 def main(args: Array[String]) { + if (args.length < 2) { + println("usage: ./spark-class spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]") + System.exit(1) + } val master = args(0) + val schedulingMode = SchedulingMode.withName(args(1)) val appName = "Spark UI Tester" + + if (schedulingMode == SchedulingMode.FAIR) { + System.setProperty("spark.cluster.schedulingmode", "FAIR") + } val sc = new SparkContext(master, appName) - // NOTE: Right now there is no easy way for us to show spark.job.annotation for a given phase, - // but we pass it here anyways since it will be useful once we do. - def setName(s: String) = { - sc.addLocalProperties("spark.job.annotation", s) + def setProperties(s: String) = { + if(schedulingMode == SchedulingMode.FAIR) { + sc.setLocalProperty("spark.scheduler.cluster.fair.pool", s) + } + sc.setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, s) } + val baseData = sc.makeRDD(1 to NUM_PARTITIONS * 10, NUM_PARTITIONS) def nextFloat() = (new Random()).nextFloat() @@ -73,14 +86,18 @@ private[spark] object UIWorkloadGenerator { while (true) { for ((desc, job) <- jobs) { - try { - setName(desc) - job() - println("Job funished: " + desc) - } catch { - case e: Exception => - println("Job Failed: " + desc) - } + new Thread { + override def run() { + try { + setProperties(desc) + job() + println("Job funished: " + desc) + } catch { + case e: Exception => + println("Job Failed: " + desc) + } + } + }.start Thread.sleep(INTER_JOB_WAIT_MS) } } diff --git a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala index 5ae7935ed4..c5bf2acc9e 100644 --- a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala @@ -15,22 +15,21 @@ * limitations under the License. */ -package spark.ui.env +package org.apache.spark.ui.env import javax.servlet.http.HttpServletRequest -import org.eclipse.jetty.server.Handler - import scala.collection.JavaConversions._ import scala.util.Properties +import scala.xml.Node -import spark.ui.JettyUtils._ -import spark.ui.UIUtils.headerSparkPage -import spark.ui.Page.Environment -import spark.SparkContext -import spark.ui.UIUtils +import org.eclipse.jetty.server.Handler + +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.ui.UIUtils +import org.apache.spark.ui.Page.Environment +import org.apache.spark.SparkContext -import scala.xml.Node private[spark] class EnvironmentUI(sc: SparkContext) { @@ -44,22 +43,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) { ("Java Home", Properties.javaHome), ("Scala Version", Properties.versionString), ("Scala Home", Properties.scalaHome) - ) + ).sorted def jvmRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr> - def jvmTable = UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation) + def jvmTable = + UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation, fixedWidth = true) val properties = System.getProperties.iterator.toSeq - val classPathProperty = properties - .filter{case (k, v) => k.contains("java.class.path")} - .headOption - .getOrElse("", "") - val sparkProperties = properties.filter(_._1.startsWith("spark")) - val otherProperties = properties.diff(sparkProperties :+ classPathProperty) + val classPathProperty = properties.find { case (k, v) => + k.contains("java.class.path") + }.getOrElse(("", "")) + val sparkProperties = properties.filter(_._1.startsWith("spark")).sorted + val otherProperties = properties.diff(sparkProperties :+ classPathProperty).sorted val propertyHeaders = Seq("Name", "Value") def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr> - val sparkPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties) - val otherPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties) + val sparkPropertyTable = + UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties, fixedWidth = true) + val otherPropertyTable = + UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties, fixedWidth = true) val classPathEntries = classPathProperty._2 .split(System.getProperty("path.separator", ":")) @@ -67,20 +68,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) { .map(e => (e, "System Classpath")) val addedJars = sc.addedJars.iterator.toSeq.map{case (path, time) => (path, "Added By User")} val addedFiles = sc.addedFiles.iterator.toSeq.map{case (path, time) => (path, "Added By User")} - val classPath = addedJars ++ addedFiles ++ classPathEntries + val classPath = (addedJars ++ addedFiles ++ classPathEntries).sorted val classPathHeaders = Seq("Resource", "Source") def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr> - val classPathTable = UIUtils.listingTable(classPathHeaders, classPathRow, classPath) + val classPathTable = + UIUtils.listingTable(classPathHeaders, classPathRow, classPath, fixedWidth = true) val content = <span> - <h2>Runtime Information</h2> {jvmTable} - <h2>Spark Properties</h2> {sparkPropertyTable} - <h2>System Properties</h2> {otherPropertyTable} - <h2>Classpath Entries</h2> {classPathTable} + <h4>Runtime Information</h4> {jvmTable} + <h4>Spark Properties</h4> + {sparkPropertyTable} + <h4>System Properties</h4> + {otherPropertyTable} + <h4>Classpath Entries</h4> + {classPathTable} </span> - headerSparkPage(content, sc, "Environment", Environment) + UIUtils.headerSparkPage(content, sc, "Environment", Environment) } } diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala new file mode 100644 index 0000000000..6e56c22d04 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala @@ -0,0 +1,137 @@ +package org.apache.spark.ui.exec + +import javax.servlet.http.HttpServletRequest + +import scala.collection.mutable.{HashMap, HashSet} +import scala.xml.Node + +import org.eclipse.jetty.server.Handler + +import org.apache.spark.{ExceptionFailure, Logging, SparkContext} +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.scheduler.cluster.TaskInfo +import org.apache.spark.scheduler.{SparkListenerTaskStart, SparkListenerTaskEnd, SparkListener} +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.ui.Page.Executors +import org.apache.spark.ui.UIUtils +import org.apache.spark.util.Utils + + +private[spark] class ExecutorsUI(val sc: SparkContext) { + + private var _listener: Option[ExecutorsListener] = None + def listener = _listener.get + + def start() { + _listener = Some(new ExecutorsListener) + sc.addSparkListener(listener) + } + + def getHandlers = Seq[(String, Handler)]( + ("/executors", (request: HttpServletRequest) => render(request)) + ) + + def render(request: HttpServletRequest): Seq[Node] = { + val storageStatusList = sc.getExecutorStorageStatus + + val maxMem = storageStatusList.map(_.maxMem).fold(0L)(_+_) + val memUsed = storageStatusList.map(_.memUsed()).fold(0L)(_+_) + val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)).fold(0L)(_+_) + + val execHead = Seq("Executor ID", "Address", "RDD blocks", "Memory used", "Disk used", + "Active tasks", "Failed tasks", "Complete tasks", "Total tasks") + + def execRow(kv: Seq[String]) = { + <tr> + <td>{kv(0)}</td> + <td>{kv(1)}</td> + <td>{kv(2)}</td> + <td sorttable_customkey={kv(3)}> + {Utils.bytesToString(kv(3).toLong)} / {Utils.bytesToString(kv(4).toLong)} + </td> + <td sorttable_customkey={kv(5)}> + {Utils.bytesToString(kv(5).toLong)} + </td> + <td>{kv(6)}</td> + <td>{kv(7)}</td> + <td>{kv(8)}</td> + <td>{kv(9)}</td> + </tr> + } + + val execInfo = for (b <- 0 until storageStatusList.size) yield getExecInfo(b) + val execTable = UIUtils.listingTable(execHead, execRow, execInfo) + + val content = + <div class="row-fluid"> + <div class="span12"> + <ul class="unstyled"> + <li><strong>Memory:</strong> + {Utils.bytesToString(memUsed)} Used + ({Utils.bytesToString(maxMem)} Total) </li> + <li><strong>Disk:</strong> {Utils.bytesToString(diskSpaceUsed)} Used </li> + </ul> + </div> + </div> + <div class = "row"> + <div class="span12"> + {execTable} + </div> + </div>; + + UIUtils.headerSparkPage(content, sc, "Executors (" + execInfo.size + ")", Executors) + } + + def getExecInfo(a: Int): Seq[String] = { + val execId = sc.getExecutorStorageStatus(a).blockManagerId.executorId + val hostPort = sc.getExecutorStorageStatus(a).blockManagerId.hostPort + val rddBlocks = sc.getExecutorStorageStatus(a).blocks.size.toString + val memUsed = sc.getExecutorStorageStatus(a).memUsed().toString + val maxMem = sc.getExecutorStorageStatus(a).maxMem.toString + val diskUsed = sc.getExecutorStorageStatus(a).diskUsed().toString + val activeTasks = listener.executorToTasksActive.get(a.toString).map(l => l.size).getOrElse(0) + val failedTasks = listener.executorToTasksFailed.getOrElse(a.toString, 0) + val completedTasks = listener.executorToTasksComplete.getOrElse(a.toString, 0) + val totalTasks = activeTasks + failedTasks + completedTasks + + Seq( + execId, + hostPort, + rddBlocks, + memUsed, + maxMem, + diskUsed, + activeTasks.toString, + failedTasks.toString, + completedTasks.toString, + totalTasks.toString + ) + } + + private[spark] class ExecutorsListener extends SparkListener with Logging { + val executorToTasksActive = HashMap[String, HashSet[TaskInfo]]() + val executorToTasksComplete = HashMap[String, Int]() + val executorToTasksFailed = HashMap[String, Int]() + + override def onTaskStart(taskStart: SparkListenerTaskStart) { + val eid = taskStart.taskInfo.executorId + val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]()) + activeTasks += taskStart.taskInfo + } + + override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { + val eid = taskEnd.taskInfo.executorId + val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]()) + activeTasks -= taskEnd.taskInfo + val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) = + taskEnd.reason match { + case e: ExceptionFailure => + executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1 + (Some(e), e.metrics) + case _ => + executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1 + (None, Option(taskEnd.taskMetrics)) + } + } + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala new file mode 100644 index 0000000000..3b428effaf --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.jobs + +import javax.servlet.http.HttpServletRequest + +import scala.xml.{NodeSeq, Node} + +import org.apache.spark.scheduler.cluster.SchedulingMode +import org.apache.spark.ui.Page._ +import org.apache.spark.ui.UIUtils._ + + +/** Page showing list of all ongoing and recently finished stages and pools*/ +private[spark] class IndexPage(parent: JobProgressUI) { + def listener = parent.listener + + def render(request: HttpServletRequest): Seq[Node] = { + listener.synchronized { + val activeStages = listener.activeStages.toSeq + val completedStages = listener.completedStages.reverse.toSeq + val failedStages = listener.failedStages.reverse.toSeq + val now = System.currentTimeMillis() + + var activeTime = 0L + for (tasks <- listener.stageToTasksActive.values; t <- tasks) { + activeTime += t.timeRunning(now) + } + + val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent) + val completedStagesTable = new StageTable(completedStages.sortBy(_.submissionTime).reverse, parent) + val failedStagesTable = new StageTable(failedStages.sortBy(_.submissionTime).reverse, parent) + + val pools = listener.sc.getAllPools + val poolTable = new PoolTable(pools, listener) + val summary: NodeSeq = + <div> + <ul class="unstyled"> + <li> + <strong>Total Duration: </strong> + {parent.formatDuration(now - listener.sc.startTime)} + </li> + <li><strong>Scheduling Mode:</strong> {parent.sc.getSchedulingMode}</li> + <li> + <a href="#active"><strong>Active Stages:</strong></a> + {activeStages.size} + </li> + <li> + <a href="#completed"><strong>Completed Stages:</strong></a> + {completedStages.size} + </li> + <li> + <a href="#failed"><strong>Failed Stages:</strong></a> + {failedStages.size} + </li> + </ul> + </div> + + val content = summary ++ + {if (listener.sc.getSchedulingMode == SchedulingMode.FAIR) { + <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq + } else { + Seq() + }} ++ + <h4 id="active">Active Stages ({activeStages.size})</h4> ++ + activeStagesTable.toNodeSeq++ + <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++ + completedStagesTable.toNodeSeq++ + <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++ + failedStagesTable.toNodeSeq + + headerSparkPage(content, parent.sc, "Spark Stages", Stages) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala new file mode 100644 index 0000000000..86e0af0399 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala @@ -0,0 +1,156 @@ +package org.apache.spark.ui.jobs + +import scala.Seq +import scala.collection.mutable.{ListBuffer, HashMap, HashSet} + +import org.apache.spark.{ExceptionFailure, SparkContext, Success} +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster.TaskInfo +import org.apache.spark.executor.TaskMetrics +import collection.mutable + +/** + * Tracks task-level information to be displayed in the UI. + * + * All access to the data structures in this class must be synchronized on the + * class, since the UI thread and the DAGScheduler event loop may otherwise + * be reading/updating the internal data structures concurrently. + */ +private[spark] class JobProgressListener(val sc: SparkContext) extends SparkListener { + // How many stages to remember + val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt + val DEFAULT_POOL_NAME = "default" + + val stageToPool = new HashMap[Stage, String]() + val stageToDescription = new HashMap[Stage, String]() + val poolToActiveStages = new HashMap[String, HashSet[Stage]]() + + val activeStages = HashSet[Stage]() + val completedStages = ListBuffer[Stage]() + val failedStages = ListBuffer[Stage]() + + // Total metrics reflect metrics only for completed tasks + var totalTime = 0L + var totalShuffleRead = 0L + var totalShuffleWrite = 0L + + val stageToTime = HashMap[Int, Long]() + val stageToShuffleRead = HashMap[Int, Long]() + val stageToShuffleWrite = HashMap[Int, Long]() + val stageToTasksActive = HashMap[Int, HashSet[TaskInfo]]() + val stageToTasksComplete = HashMap[Int, Int]() + val stageToTasksFailed = HashMap[Int, Int]() + val stageToTaskInfos = + HashMap[Int, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]() + + override def onJobStart(jobStart: SparkListenerJobStart) {} + + override def onStageCompleted(stageCompleted: StageCompleted) = synchronized { + val stage = stageCompleted.stageInfo.stage + poolToActiveStages(stageToPool(stage)) -= stage + activeStages -= stage + completedStages += stage + trimIfNecessary(completedStages) + } + + /** If stages is too large, remove and garbage collect old stages */ + def trimIfNecessary(stages: ListBuffer[Stage]) = synchronized { + if (stages.size > RETAINED_STAGES) { + val toRemove = RETAINED_STAGES / 10 + stages.takeRight(toRemove).foreach( s => { + stageToTaskInfos.remove(s.id) + stageToTime.remove(s.id) + stageToShuffleRead.remove(s.id) + stageToShuffleWrite.remove(s.id) + stageToTasksActive.remove(s.id) + stageToTasksComplete.remove(s.id) + stageToTasksFailed.remove(s.id) + stageToPool.remove(s) + if (stageToDescription.contains(s)) {stageToDescription.remove(s)} + }) + stages.trimEnd(toRemove) + } + } + + /** For FIFO, all stages are contained by "default" pool but "default" pool here is meaningless */ + override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = synchronized { + val stage = stageSubmitted.stage + activeStages += stage + + val poolName = Option(stageSubmitted.properties).map { + p => p.getProperty("spark.scheduler.cluster.fair.pool", DEFAULT_POOL_NAME) + }.getOrElse(DEFAULT_POOL_NAME) + stageToPool(stage) = poolName + + val description = Option(stageSubmitted.properties).flatMap { + p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)) + } + description.map(d => stageToDescription(stage) = d) + + val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashSet[Stage]()) + stages += stage + } + + override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized { + val sid = taskStart.task.stageId + val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]()) + tasksActive += taskStart.taskInfo + val taskList = stageToTaskInfos.getOrElse( + sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]()) + taskList += ((taskStart.taskInfo, None, None)) + stageToTaskInfos(sid) = taskList + } + + override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized { + val sid = taskEnd.task.stageId + val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]()) + tasksActive -= taskEnd.taskInfo + val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) = + taskEnd.reason match { + case e: ExceptionFailure => + stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1 + (Some(e), e.metrics) + case _ => + stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1 + (None, Option(taskEnd.taskMetrics)) + } + + stageToTime.getOrElseUpdate(sid, 0L) + val time = metrics.map(m => m.executorRunTime).getOrElse(0) + stageToTime(sid) += time + totalTime += time + + stageToShuffleRead.getOrElseUpdate(sid, 0L) + val shuffleRead = metrics.flatMap(m => m.shuffleReadMetrics).map(s => + s.remoteBytesRead).getOrElse(0L) + stageToShuffleRead(sid) += shuffleRead + totalShuffleRead += shuffleRead + + stageToShuffleWrite.getOrElseUpdate(sid, 0L) + val shuffleWrite = metrics.flatMap(m => m.shuffleWriteMetrics).map(s => + s.shuffleBytesWritten).getOrElse(0L) + stageToShuffleWrite(sid) += shuffleWrite + totalShuffleWrite += shuffleWrite + + val taskList = stageToTaskInfos.getOrElse( + sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]()) + taskList -= ((taskEnd.taskInfo, None, None)) + taskList += ((taskEnd.taskInfo, metrics, failureInfo)) + stageToTaskInfos(sid) = taskList + } + + override def onJobEnd(jobEnd: SparkListenerJobEnd) = synchronized { + jobEnd match { + case end: SparkListenerJobEnd => + end.jobResult match { + case JobFailed(ex, Some(stage)) => + activeStages -= stage + poolToActiveStages(stageToPool(stage)) -= stage + failedStages += stage + trimIfNecessary(failedStages) + case _ => + } + case _ => + } + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala new file mode 100644 index 0000000000..6aecef5120 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.jobs + +import akka.util.Duration + +import java.text.SimpleDateFormat + +import javax.servlet.http.HttpServletRequest + +import org.eclipse.jetty.server.Handler + +import scala.Seq +import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer} + +import org.apache.spark.ui.JettyUtils._ +import org.apache.spark.{ExceptionFailure, SparkContext, Success} +import org.apache.spark.scheduler._ +import collection.mutable +import org.apache.spark.scheduler.cluster.SchedulingMode +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode +import org.apache.spark.util.Utils + +/** Web UI showing progress status of all jobs in the given SparkContext. */ +private[spark] class JobProgressUI(val sc: SparkContext) { + private var _listener: Option[JobProgressListener] = None + def listener = _listener.get + val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") + + private val indexPage = new IndexPage(this) + private val stagePage = new StagePage(this) + private val poolPage = new PoolPage(this) + + def start() { + _listener = Some(new JobProgressListener(sc)) + sc.addSparkListener(listener) + } + + def formatDuration(ms: Long) = Utils.msDurationToString(ms) + + def getHandlers = Seq[(String, Handler)]( + ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)), + ("/stages/pool", (request: HttpServletRequest) => poolPage.render(request)), + ("/stages", (request: HttpServletRequest) => indexPage.render(request)) + ) +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala new file mode 100644 index 0000000000..ce92b6932b --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala @@ -0,0 +1,32 @@ +package org.apache.spark.ui.jobs + +import javax.servlet.http.HttpServletRequest + +import scala.xml.{NodeSeq, Node} +import scala.collection.mutable.HashSet + +import org.apache.spark.scheduler.Stage +import org.apache.spark.ui.UIUtils._ +import org.apache.spark.ui.Page._ + +/** Page showing specific pool details */ +private[spark] class PoolPage(parent: JobProgressUI) { + def listener = parent.listener + + def render(request: HttpServletRequest): Seq[Node] = { + listener.synchronized { + val poolName = request.getParameter("poolname") + val poolToActiveStages = listener.poolToActiveStages + val activeStages = poolToActiveStages.get(poolName).toSeq.flatten + val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent) + + val pool = listener.sc.getPoolForName(poolName).get + val poolTable = new PoolTable(Seq(pool), listener) + + val content = <h4>Summary </h4> ++ poolTable.toNodeSeq() ++ + <h4>{activeStages.size} Active Stages</h4> ++ activeStagesTable.toNodeSeq() + + headerSparkPage(content, parent.sc, "Fair Scheduler Pool: " + poolName, Stages) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala new file mode 100644 index 0000000000..f31465e59d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala @@ -0,0 +1,55 @@ +package org.apache.spark.ui.jobs + +import scala.collection.mutable.HashMap +import scala.collection.mutable.HashSet +import scala.xml.Node + +import org.apache.spark.scheduler.Stage +import org.apache.spark.scheduler.cluster.Schedulable + +/** Table showing list of pools */ +private[spark] class PoolTable(pools: Seq[Schedulable], listener: JobProgressListener) { + + var poolToActiveStages: HashMap[String, HashSet[Stage]] = listener.poolToActiveStages + + def toNodeSeq(): Seq[Node] = { + listener.synchronized { + poolTable(poolRow, pools) + } + } + + private def poolTable(makeRow: (Schedulable, HashMap[String, HashSet[Stage]]) => Seq[Node], + rows: Seq[Schedulable] + ): Seq[Node] = { + <table class="table table-bordered table-striped table-condensed sortable table-fixed"> + <thead> + <th>Pool Name</th> + <th>Minimum Share</th> + <th>Pool Weight</th> + <th>Active Stages</th> + <th>Running Tasks</th> + <th>SchedulingMode</th> + </thead> + <tbody> + {rows.map(r => makeRow(r, poolToActiveStages))} + </tbody> + </table> + } + + private def poolRow(p: Schedulable, poolToActiveStages: HashMap[String, HashSet[Stage]]) + : Seq[Node] = { + val activeStages = poolToActiveStages.get(p.name) match { + case Some(stages) => stages.size + case None => 0 + } + <tr> + <td><a href={"/stages/pool?poolname=%s".format(p.name)}>{p.name}</a></td> + <td>{p.minShare}</td> + <td>{p.weight}</td> + <td>{activeStages}</td> + <td>{p.runningTasks}</td> + <td>{p.schedulingMode}</td> + </tr> + } +} + diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala new file mode 100644 index 0000000000..a9969ab1c0 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.jobs + +import java.util.Date + +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node + +import org.apache.spark.ui.UIUtils._ +import org.apache.spark.ui.Page._ +import org.apache.spark.util.{Utils, Distribution} +import org.apache.spark.{ExceptionFailure} +import org.apache.spark.scheduler.cluster.TaskInfo +import org.apache.spark.executor.TaskMetrics + +/** Page showing statistics and task list for a given stage */ +private[spark] class StagePage(parent: JobProgressUI) { + def listener = parent.listener + val dateFmt = parent.dateFmt + + def render(request: HttpServletRequest): Seq[Node] = { + listener.synchronized { + val stageId = request.getParameter("id").toInt + val now = System.currentTimeMillis() + + if (!listener.stageToTaskInfos.contains(stageId)) { + val content = + <div> + <h4>Summary Metrics</h4> No tasks have started yet + <h4>Tasks</h4> No tasks have started yet + </div> + return headerSparkPage(content, parent.sc, "Details for Stage %s".format(stageId), Stages) + } + + val tasks = listener.stageToTaskInfos(stageId).toSeq.sortBy(_._1.launchTime) + + val numCompleted = tasks.count(_._1.finished) + val shuffleReadBytes = listener.stageToShuffleRead.getOrElse(stageId, 0L) + val hasShuffleRead = shuffleReadBytes > 0 + val shuffleWriteBytes = listener.stageToShuffleWrite.getOrElse(stageId, 0L) + val hasShuffleWrite = shuffleWriteBytes > 0 + + var activeTime = 0L + listener.stageToTasksActive(stageId).foreach(activeTime += _.timeRunning(now)) + + val summary = + <div> + <ul class="unstyled"> + <li> + <strong>CPU time: </strong> + {parent.formatDuration(listener.stageToTime.getOrElse(stageId, 0L) + activeTime)} + </li> + {if (hasShuffleRead) + <li> + <strong>Shuffle read: </strong> + {Utils.bytesToString(shuffleReadBytes)} + </li> + } + {if (hasShuffleWrite) + <li> + <strong>Shuffle write: </strong> + {Utils.bytesToString(shuffleWriteBytes)} + </li> + } + </ul> + </div> + + val taskHeaders: Seq[String] = + Seq("Task ID", "Status", "Locality Level", "Executor", "Launch Time", "Duration") ++ + Seq("GC Time") ++ + {if (hasShuffleRead) Seq("Shuffle Read") else Nil} ++ + {if (hasShuffleWrite) Seq("Shuffle Write") else Nil} ++ + Seq("Errors") + + val taskTable = listingTable(taskHeaders, taskRow(hasShuffleRead, hasShuffleWrite), tasks) + + // Excludes tasks which failed and have incomplete metrics + val validTasks = tasks.filter(t => t._1.status == "SUCCESS" && (t._2.isDefined)) + + val summaryTable: Option[Seq[Node]] = + if (validTasks.size == 0) { + None + } + else { + val serviceTimes = validTasks.map{case (info, metrics, exception) => + metrics.get.executorRunTime.toDouble} + val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map( + ms => parent.formatDuration(ms.toLong)) + + def getQuantileCols(data: Seq[Double]) = + Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong)) + + val shuffleReadSizes = validTasks.map { + case(info, metrics, exception) => + metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble + } + val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes) + + val shuffleWriteSizes = validTasks.map { + case(info, metrics, exception) => + metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble + } + val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes) + + val listings: Seq[Seq[String]] = Seq(serviceQuantiles, + if (hasShuffleRead) shuffleReadQuantiles else Nil, + if (hasShuffleWrite) shuffleWriteQuantiles else Nil) + + val quantileHeaders = Seq("Metric", "Min", "25th percentile", + "Median", "75th percentile", "Max") + def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr> + Some(listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true)) + } + + val content = + summary ++ + <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++ + <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++ + <h4>Tasks</h4> ++ taskTable; + + headerSparkPage(content, parent.sc, "Details for Stage %d".format(stageId), Stages) + } + } + + + def taskRow(shuffleRead: Boolean, shuffleWrite: Boolean) + (taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = { + def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] = + trace.map(e => <span style="display:block;">{e.toString}</span>) + val (info, metrics, exception) = taskData + + val duration = if (info.status == "RUNNING") info.timeRunning(System.currentTimeMillis()) + else metrics.map(m => m.executorRunTime).getOrElse(1) + val formatDuration = if (info.status == "RUNNING") parent.formatDuration(duration) + else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("") + val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L) + + <tr> + <td>{info.taskId}</td> + <td>{info.status}</td> + <td>{info.taskLocality}</td> + <td>{info.host}</td> + <td>{dateFmt.format(new Date(info.launchTime))}</td> + <td sorttable_customkey={duration.toString}> + {formatDuration} + </td> + <td sorttable_customkey={gcTime.toString}> + {if (gcTime > 0) parent.formatDuration(gcTime) else ""} + </td> + {if (shuffleRead) { + <td>{metrics.flatMap{m => m.shuffleReadMetrics}.map{s => + Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")}</td> + }} + {if (shuffleWrite) { + <td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => + Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")}</td> + }} + <td>{exception.map(e => + <span> + {e.className} ({e.description})<br/> + {fmtStackTrace(e.stackTrace)} + </span>).getOrElse("")} + </td> + </tr> + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala new file mode 100644 index 0000000000..71e58a977e --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala @@ -0,0 +1,107 @@ +package org.apache.spark.ui.jobs + +import java.util.Date + +import scala.xml.Node +import scala.collection.mutable.HashSet + +import org.apache.spark.scheduler.cluster.{SchedulingMode, TaskInfo} +import org.apache.spark.scheduler.Stage +import org.apache.spark.util.Utils + + +/** Page showing list of all ongoing and recently finished stages */ +private[spark] class StageTable(val stages: Seq[Stage], val parent: JobProgressUI) { + + val listener = parent.listener + val dateFmt = parent.dateFmt + val isFairScheduler = listener.sc.getSchedulingMode == SchedulingMode.FAIR + + def toNodeSeq(): Seq[Node] = { + listener.synchronized { + stageTable(stageRow, stages) + } + } + + /** Special table which merges two header cells. */ + private def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = { + <table class="table table-bordered table-striped table-condensed sortable"> + <thead> + <th>Stage Id</th> + {if (isFairScheduler) {<th>Pool Name</th>} else {}} + <th>Description</th> + <th>Submitted</th> + <th>Duration</th> + <th>Tasks: Succeeded/Total</th> + <th>Shuffle Read</th> + <th>Shuffle Write</th> + </thead> + <tbody> + {rows.map(r => makeRow(r))} + </tbody> + </table> + } + + private def makeProgressBar(started: Int, completed: Int, failed: String, total: Int): Seq[Node] = { + val completeWidth = "width: %s%%".format((completed.toDouble/total)*100) + val startWidth = "width: %s%%".format((started.toDouble/total)*100) + + <div class="progress"> + <span style="text-align:center; position:absolute; width:100%;"> + {completed}/{total} {failed} + </span> + <div class="bar bar-completed" style={completeWidth}></div> + <div class="bar bar-running" style={startWidth}></div> + </div> + } + + + private def stageRow(s: Stage): Seq[Node] = { + val submissionTime = s.submissionTime match { + case Some(t) => dateFmt.format(new Date(t)) + case None => "Unknown" + } + + val shuffleRead = listener.stageToShuffleRead.getOrElse(s.id, 0L) match { + case 0 => "" + case b => Utils.bytesToString(b) + } + val shuffleWrite = listener.stageToShuffleWrite.getOrElse(s.id, 0L) match { + case 0 => "" + case b => Utils.bytesToString(b) + } + + val startedTasks = listener.stageToTasksActive.getOrElse(s.id, HashSet[TaskInfo]()).size + val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0) + val failedTasks = listener.stageToTasksFailed.getOrElse(s.id, 0) match { + case f if f > 0 => "(%s failed)".format(f) + case _ => "" + } + val totalTasks = s.numPartitions + + val poolName = listener.stageToPool.get(s) + + val nameLink = <a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a> + val description = listener.stageToDescription.get(s) + .map(d => <div><em>{d}</em></div><div>{nameLink}</div>).getOrElse(nameLink) + val finishTime = s.completionTime.getOrElse(System.currentTimeMillis()) + val duration = s.submissionTime.map(t => finishTime - t) + + <tr> + <td>{s.id}</td> + {if (isFairScheduler) { + <td><a href={"/stages/pool?poolname=%s".format(poolName.get)}>{poolName.get}</a></td>} + } + <td>{description}</td> + <td valign="middle">{submissionTime}</td> + <td sorttable_customkey={duration.getOrElse(-1).toString}> + {duration.map(d => parent.formatDuration(d)).getOrElse("Unknown")} + </td> + <td class="progress-cell"> + {makeProgressBar(startedTasks, completedTasks, failedTasks, totalTasks)} + </td> + <td>{shuffleRead}</td> + <td>{shuffleWrite}</td> + </tr> + } +} diff --git a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala index 49ed069c75..1d633d374a 100644 --- a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.ui.storage +package org.apache.spark.ui.storage import akka.util.Duration @@ -23,8 +23,8 @@ import javax.servlet.http.HttpServletRequest import org.eclipse.jetty.server.Handler -import spark.{Logging, SparkContext} -import spark.ui.JettyUtils._ +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.ui.JettyUtils._ /** Web UI showing storage status of all RDD's in the given SparkContext. */ private[spark] class BlockManagerUI(val sc: SparkContext) extends Logging { diff --git a/core/src/main/scala/spark/ui/storage/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala index 4e0360d19a..c3ec907370 100644 --- a/core/src/main/scala/spark/ui/storage/IndexPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala @@ -15,16 +15,16 @@ * limitations under the License. */ -package spark.ui.storage +package org.apache.spark.ui.storage import javax.servlet.http.HttpServletRequest import scala.xml.Node -import spark.storage.{RDDInfo, StorageUtils} -import spark.Utils -import spark.ui.UIUtils._ -import spark.ui.Page._ +import org.apache.spark.storage.{RDDInfo, StorageUtils} +import org.apache.spark.ui.UIUtils._ +import org.apache.spark.ui.Page._ +import org.apache.spark.util.Utils /** Page showing list of RDD's currently stored in the cluster */ private[spark] class IndexPage(parent: BlockManagerUI) { @@ -33,34 +33,18 @@ private[spark] class IndexPage(parent: BlockManagerUI) { def render(request: HttpServletRequest): Seq[Node] = { val storageStatusList = sc.getExecutorStorageStatus // Calculate macro-level statistics - val maxMem = storageStatusList.map(_.maxMem).reduce(_+_) - val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_) - val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)) - .reduceOption(_+_).getOrElse(0L) val rddHeaders = Seq( "RDD Name", "Storage Level", "Cached Partitions", - "Fraction Partitions Cached", + "Fraction Cached", "Size in Memory", "Size on Disk") val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc) - val rddTable = listingTable(rddHeaders, rddRow, rdds) + val content = listingTable(rddHeaders, rddRow, rdds) - val content = - <div class="row"> - <div class="span12"> - <ul class="unstyled"> - <li><strong>Memory:</strong> - {Utils.memoryBytesToString(maxMem - remainingMem)} Used - ({Utils.memoryBytesToString(remainingMem)} Available) </li> - <li><strong>Disk:</strong> {Utils.memoryBytesToString(diskSpaceUsed)} Used </li> - </ul> - </div> - </div> ++ {rddTable}; - - headerSparkPage(content, parent.sc, "Spark Storage ", Storage) + headerSparkPage(content, parent.sc, "Storage ", Storage) } def rddRow(rdd: RDDInfo): Seq[Node] = { @@ -73,9 +57,9 @@ private[spark] class IndexPage(parent: BlockManagerUI) { <td>{rdd.storageLevel.description} </td> <td>{rdd.numCachedPartitions}</td> - <td>{rdd.numCachedPartitions / rdd.numPartitions.toDouble}</td> - <td>{Utils.memoryBytesToString(rdd.memSize)}</td> - <td>{Utils.memoryBytesToString(rdd.diskSize)}</td> + <td>{"%.0f%%".format(rdd.numCachedPartitions * 100.0 / rdd.numPartitions)}</td> + <td>{Utils.bytesToString(rdd.memSize)}</td> + <td>{Utils.bytesToString(rdd.diskSize)}</td> </tr> } } diff --git a/core/src/main/scala/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala index 003be54ad8..43c1257677 100644 --- a/core/src/main/scala/spark/ui/storage/RDDPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala @@ -15,17 +15,18 @@ * limitations under the License. */ -package spark.ui.storage +package org.apache.spark.ui.storage import javax.servlet.http.HttpServletRequest import scala.xml.Node -import spark.storage.{StorageStatus, StorageUtils} -import spark.ui.UIUtils._ -import spark.Utils -import spark.storage.BlockManagerMasterActor.BlockStatus -import spark.ui.Page._ +import org.apache.spark.storage.{StorageStatus, StorageUtils} +import org.apache.spark.storage.BlockManagerMasterActor.BlockStatus +import org.apache.spark.ui.UIUtils._ +import org.apache.spark.ui.Page._ +import org.apache.spark.util.Utils + /** Page showing storage details for a given RDD */ private[spark] class RDDPage(parent: BlockManagerUI) { @@ -44,7 +45,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) { val workerTable = listingTable(workerHeaders, workerRow, workers) val blockHeaders = Seq("Block Name", "Storage Level", "Size in Memory", "Size on Disk", - "Locations") + "Executors") val blockStatuses = filteredStorageStatusList.flatMap(_.blocks).toArray.sortWith(_._1 < _._1) val blockLocations = StorageUtils.blockLocationsFromStorageStatus(filteredStorageStatusList) @@ -54,7 +55,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) { val blockTable = listingTable(blockHeaders, blockRow, blocks) val content = - <div class="row"> + <div class="row-fluid"> <div class="span12"> <ul class="unstyled"> <li> @@ -71,30 +72,31 @@ private[spark] class RDDPage(parent: BlockManagerUI) { </li> <li> <strong>Memory Size:</strong> - {Utils.memoryBytesToString(rddInfo.memSize)} + {Utils.bytesToString(rddInfo.memSize)} </li> <li> <strong>Disk Size:</strong> - {Utils.memoryBytesToString(rddInfo.diskSize)} + {Utils.bytesToString(rddInfo.diskSize)} </li> </ul> </div> </div> - <hr/> - <div class="row"> + + <div class="row-fluid"> <div class="span12"> + <h4> Data Distribution on {workers.size} Executors </h4> {workerTable} </div> </div> - <hr/> - <div class="row"> + + <div class="row-fluid"> <div class="span12"> - <h3> RDD Summary </h3> + <h4> {blocks.size} Partitions </h4> {blockTable} </div> </div>; - headerSparkPage(content, parent.sc, "RDD Info: " + rddInfo.name, Jobs) + headerSparkPage(content, parent.sc, "RDD Storage Info for " + rddInfo.name, Storage) } def blockRow(row: (String, BlockStatus, Seq[String])): Seq[Node] = { @@ -105,10 +107,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) { {block.storageLevel.description} </td> <td sorttable_customkey={block.memSize.toString}> - {Utils.memoryBytesToString(block.memSize)} + {Utils.bytesToString(block.memSize)} </td> <td sorttable_customkey={block.diskSize.toString}> - {Utils.memoryBytesToString(block.diskSize)} + {Utils.bytesToString(block.diskSize)} </td> <td> {locations.map(l => <span>{l}<br/></span>)} @@ -121,10 +123,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) { <tr> <td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td> <td> - {Utils.memoryBytesToString(status.memUsed(prefix))} - ({Utils.memoryBytesToString(status.memRemaining)} Total Available) + {Utils.bytesToString(status.memUsed(prefix))} + ({Utils.bytesToString(status.memRemaining)} Remaining) </td> - <td>{Utils.memoryBytesToString(status.diskUsed(prefix))}</td> + <td>{Utils.bytesToString(status.diskUsed(prefix))}</td> </tr> } } diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala index 9233277bdb..d4c5065c3f 100644 --- a/core/src/main/scala/spark/util/AkkaUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import akka.actor.{ActorSystem, ExtendedActorSystem} import com.typesafe.config.ConfigFactory diff --git a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala index 0575497f5d..0b51c23f7b 100644 --- a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala +++ b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.io.Serializable import java.util.{PriorityQueue => JPriorityQueue} diff --git a/core/src/main/scala/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala index 47a28e2f76..e214d2a519 100644 --- a/core/src/main/scala/spark/util/ByteBufferInputStream.scala +++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.io.InputStream import java.nio.ByteBuffer -import spark.storage.BlockManager +import org.apache.spark.storage.BlockManager /** * Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose() diff --git a/core/src/main/scala/org/apache/spark/util/Clock.scala b/core/src/main/scala/org/apache/spark/util/Clock.scala new file mode 100644 index 0000000000..97c2b45aab --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/Clock.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +/** + * An interface to represent clocks, so that they can be mocked out in unit tests. + */ +private[spark] trait Clock { + def getTime(): Long +} + +private[spark] object SystemClock extends Clock { + def getTime(): Long = System.currentTimeMillis() +} diff --git a/core/src/main/scala/spark/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala index 8b39241095..7108595e3e 100644 --- a/core/src/main/scala/spark/ClosureCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import java.lang.reflect.Field @@ -25,6 +25,7 @@ import scala.collection.mutable.Set import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor, Type} import org.objectweb.asm.Opcodes._ import java.io.{InputStream, IOException, ByteArrayOutputStream, ByteArrayInputStream, BufferedInputStream} +import org.apache.spark.Logging private[spark] object ClosureCleaner extends Logging { // Get an ASM class reader for a given class from the JAR that loaded it diff --git a/core/src/main/scala/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala index 210450892b..dc15a38b29 100644 --- a/core/src/main/scala/spark/util/CompletionIterator.scala +++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util /** * Wrapper around an iterator which calls a completion method after it successfully iterates through all the elements diff --git a/core/src/main/scala/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala index 5d4d7a6c50..33bf3562fe 100644 --- a/core/src/main/scala/spark/util/Distribution.scala +++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.io.PrintStream diff --git a/core/src/main/scala/spark/util/IdGenerator.scala b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala index 3422280559..17e55f7996 100644 --- a/core/src/main/scala/spark/util/IdGenerator.scala +++ b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.util.concurrent.atomic.AtomicInteger diff --git a/core/src/main/scala/spark/util/IntParam.scala b/core/src/main/scala/org/apache/spark/util/IntParam.scala index daf0d58fa2..626bb49eea 100644 --- a/core/src/main/scala/spark/util/IntParam.scala +++ b/core/src/main/scala/org/apache/spark/util/IntParam.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util /** * An extractor object for parsing strings into integers. diff --git a/core/src/main/scala/spark/util/MemoryParam.scala b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala index 298562323a..4869c9897a 100644 --- a/core/src/main/scala/spark/util/MemoryParam.scala +++ b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala @@ -15,9 +15,7 @@ * limitations under the License. */ -package spark.util - -import spark.Utils +package org.apache.spark.util /** * An extractor object for parsing JVM memory strings, such as "10g", into an Int representing diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala index 92909e0959..a430a75451 100644 --- a/core/src/main/scala/spark/util/MetadataCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.util.concurrent.{TimeUnit, ScheduledFuture, Executors} import java.util.{TimerTask, Timer} -import spark.Logging +import org.apache.spark.Logging /** diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/util/MutablePair.scala index 1a7cdf4788..34f1f6606f 100644 --- a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/spark/util/MutablePair.scala @@ -15,16 +15,22 @@ * limitations under the License. */ -package org.apache.hadoop.mapreduce +package org.apache.spark.util -import org.apache.hadoop.conf.Configuration -import task.{TaskAttemptContextImpl, JobContextImpl} -trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) +/** + * A tuple of 2 elements. This can be used as an alternative to Scala's Tuple2 when we want to + * minimize object allocation. + * + * @param _1 Element 1 of this MutablePair + * @param _2 Element 2 of this MutablePair + */ +case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T1, + @specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T2] + (var _1: T1, var _2: T2) + extends Product2[T1, T2] +{ + override def toString = "(" + _1 + "," + _2 + ")" - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = - new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) + override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_,_]] } diff --git a/core/src/main/scala/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala index 22163ece8d..8266e5e495 100644 --- a/core/src/main/scala/spark/util/NextIterator.scala +++ b/core/src/main/scala/org/apache/spark/util/NextIterator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util /** Provides a basic/boilerplate Iterator implementation. */ private[spark] abstract class NextIterator[U] extends Iterator[U] { diff --git a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala index 00f782bbe7..47e1b45004 100644 --- a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala +++ b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import scala.annotation.tailrec diff --git a/core/src/main/scala/spark/util/SerializableBuffer.scala b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala index 7e6842628a..f2b1ad7d0e 100644 --- a/core/src/main/scala/spark/util/SerializableBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.nio.ByteBuffer import java.io.{IOException, ObjectOutputStream, EOFException, ObjectInputStream} diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index 6cc57566d7..a25b37a2a9 100644 --- a/core/src/main/scala/spark/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import java.lang.reflect.Field import java.lang.reflect.Modifier @@ -30,6 +30,7 @@ import java.lang.management.ManagementFactory import scala.collection.mutable.ArrayBuffer import it.unimi.dsi.fastutil.ints.IntOpenHashSet +import org.apache.spark.Logging /** * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in diff --git a/core/src/main/scala/spark/util/StatCounter.scala b/core/src/main/scala/org/apache/spark/util/StatCounter.scala index 76358d4151..020d5edba9 100644 --- a/core/src/main/scala/spark/util/StatCounter.scala +++ b/core/src/main/scala/org/apache/spark/util/StatCounter.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util /** * A class for tracking the statistics of a set of numbers (count, mean and variance) in a diff --git a/core/src/main/scala/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala index cc7909194a..277de2f8a6 100644 --- a/core/src/main/scala/spark/util/TimeStampedHashMap.scala +++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala @@ -15,12 +15,14 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import java.util.concurrent.ConcurrentHashMap import scala.collection.JavaConversions import scala.collection.mutable.Map -import spark.scheduler.MapStatus +import scala.collection.immutable +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.Logging /** * This is a custom implementation of scala.collection.mutable.Map which stores the insertion @@ -28,7 +30,7 @@ import spark.scheduler.MapStatus * threshold time can them be removed using the clearOldValues method. This is intended to be a drop-in * replacement of scala.collection.mutable.HashMap. */ -class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging { +class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging { val internalMap = new ConcurrentHashMap[A, (B, Long)]() def get(key: A): Option[B] = { @@ -99,6 +101,8 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging { } } + def toMap: immutable.Map[A, B] = iterator.toMap + /** * Removes old key-value pairs that have timestamp earlier than `threshTime` */ diff --git a/core/src/main/scala/spark/util/TimeStampedHashSet.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala index 41e3fd8cba..26983138ff 100644 --- a/core/src/main/scala/spark/util/TimeStampedHashSet.scala +++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import scala.collection.mutable.Set import scala.collection.JavaConversions diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index e6a96a5ec1..bb47fc0a2c 100644 --- a/core/src/main/scala/spark/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import java.io._ import java.net.{InetAddress, URL, URI, NetworkInterface, Inet4Address, ServerSocket} @@ -33,14 +33,16 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder import org.apache.hadoop.fs.{Path, FileSystem, FileUtil} -import spark.serializer.SerializerInstance -import spark.deploy.SparkHadoopUtil +import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} +import org.apache.spark.deploy.SparkHadoopUtil +import java.nio.ByteBuffer +import org.apache.spark.{SparkEnv, SparkException, Logging} /** * Various utility methods used by Spark. */ -private object Utils extends Logging { +private[spark] object Utils extends Logging { /** Serialize an object using Java serialization */ def serialize[T](o: T): Array[Byte] = { @@ -68,6 +70,47 @@ private object Utils extends Logging { return ois.readObject.asInstanceOf[T] } + /** Serialize via nested stream using specific serializer */ + def serializeViaNestedStream(os: OutputStream, ser: SerializerInstance)(f: SerializationStream => Unit) = { + val osWrapper = ser.serializeStream(new OutputStream { + def write(b: Int) = os.write(b) + + override def write(b: Array[Byte], off: Int, len: Int) = os.write(b, off, len) + }) + try { + f(osWrapper) + } finally { + osWrapper.close() + } + } + + /** Deserialize via nested stream using specific serializer */ + def deserializeViaNestedStream(is: InputStream, ser: SerializerInstance)(f: DeserializationStream => Unit) = { + val isWrapper = ser.deserializeStream(new InputStream { + def read(): Int = is.read() + + override def read(b: Array[Byte], off: Int, len: Int): Int = is.read(b, off, len) + }) + try { + f(isWrapper) + } finally { + isWrapper.close() + } + } + + /** + * Primitive often used when writing {@link java.nio.ByteBuffer} to {@link java.io.DataOutput}. + */ + def writeByteBuffer(bb: ByteBuffer, out: ObjectOutput) = { + if (bb.hasArray) { + out.write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()) + } else { + val bbval = new Array[Byte](bb.remaining()) + bb.get(bbval) + out.write(bbval) + } + } + def isAlpha(c: Char): Boolean = { (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') } @@ -224,8 +267,9 @@ private object Utils extends Logging { } case _ => // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others + val env = SparkEnv.get val uri = new URI(url) - val conf = SparkHadoopUtil.newConfiguration() + val conf = env.hadoop.newConfiguration() val fs = FileSystem.get(uri, conf) val in = fs.open(new Path(uri)) val out = new FileOutputStream(tempFile) @@ -351,48 +395,17 @@ private object Utils extends Logging { retval } -/* - // Used by DEBUG code : remove when all testing done - private val ipPattern = Pattern.compile("^[0-9]+(\\.[0-9]+)*$") def checkHost(host: String, message: String = "") { - // Currently catches only ipv4 pattern, this is just a debugging tool - not rigourous ! - // if (host.matches("^[0-9]+(\\.[0-9]+)*$")) { - if (ipPattern.matcher(host).matches()) { - Utils.logErrorWithStack("Unexpected to have host " + host + " which matches IP pattern. Message " + message) - } - if (Utils.parseHostPort(host)._2 != 0){ - Utils.logErrorWithStack("Unexpected to have host " + host + " which has port in it. Message " + message) - } + assert(host.indexOf(':') == -1, message) } - // Used by DEBUG code : remove when all testing done def checkHostPort(hostPort: String, message: String = "") { - val (host, port) = Utils.parseHostPort(hostPort) - checkHost(host) - if (port <= 0){ - Utils.logErrorWithStack("Unexpected to have port " + port + " which is not valid in " + hostPort + ". Message " + message) - } + assert(hostPort.indexOf(':') != -1, message) } // Used by DEBUG code : remove when all testing done def logErrorWithStack(msg: String) { try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } } - // temp code for debug - System.exit(-1) - } -*/ - - // Once testing is complete in various modes, replace with this ? - def checkHost(host: String, message: String = "") {} - def checkHostPort(hostPort: String, message: String = "") {} - - // Used by DEBUG code : remove when all testing done - def logErrorWithStack(msg: String) { - try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } } - } - - def getUserNameFromEnvironment(): String = { - SparkHadoopUtil.getUserNameFromEnvironment } // Typically, this will be of order of number of nodes in cluster @@ -479,9 +492,9 @@ private object Utils extends Logging { } /** - * Convert a memory quantity in bytes to a human-readable string such as "4.0 MB". + * Convert a quantity in bytes to a human-readable string such as "4.0 MB". */ - def memoryBytesToString(size: Long): String = { + def bytesToString(size: Long): String = { val TB = 1L << 40 val GB = 1L << 30 val MB = 1L << 20 @@ -524,10 +537,10 @@ private object Utils extends Logging { } /** - * Convert a memory quantity in megabytes to a human-readable string such as "4.0 MB". + * Convert a quantity in megabytes to a human-readable string such as "4.0 MB". */ - def memoryMegabytesToString(megabytes: Long): String = { - memoryBytesToString(megabytes * 1024L * 1024L) + def megabytesToString(megabytes: Long): String = { + bytesToString(megabytes * 1024L * 1024L) } /** @@ -596,7 +609,7 @@ private object Utils extends Logging { output.toString } - /** + /** * A regular expression to match classes of the "core" Spark API that we want to skip when * finding the call site of a method. */ @@ -756,4 +769,13 @@ private object Utils extends Logging { } return buf } + + /* Calculates 'x' modulo 'mod', takes to consideration sign of x, + * i.e. if 'x' is negative, than 'x' % 'mod' is negative too + * so function return (x % mod) + mod in that case. + */ + def nonNegativeMod(x: Int, mod: Int): Int = { + val rawMod = x % mod + rawMod + (if (rawMod < 0) mod else 0) + } } diff --git a/core/src/main/scala/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala index ed49386f18..fe710c58ac 100644 --- a/core/src/main/scala/spark/util/Vector.scala +++ b/core/src/main/scala/org/apache/spark/util/Vector.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util class Vector(val elements: Array[Double]) extends Serializable { def length = elements.length @@ -73,7 +73,6 @@ class Vector(val elements: Array[Double]) extends Serializable { def += (other: Vector): Vector = { if (length != other.length) throw new IllegalArgumentException("Vectors of different length") - var ans = 0.0 var i = 0 while (i < length) { elements(i) += other(i) @@ -117,9 +116,7 @@ object Vector { def apply(elements: Double*) = new Vector(elements.toArray) def apply(length: Int, initializer: Int => Double): Vector = { - val elements = new Array[Double](length) - for (i <- 0 until length) - elements(i) = initializer(i) + val elements: Array[Double] = Array.tabulate(length)(initializer) return new Vector(elements) } @@ -133,7 +130,7 @@ object Vector { implicit def doubleToMultiplier(num: Double) = new Multiplier(num) - implicit object VectorAccumParam extends spark.AccumulatorParam[Vector] { + implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector) = t1 + t2 def zero(initialValue: Vector) = Vector.zeros(initialValue.length) diff --git a/core/src/main/scala/spark/Cache.scala b/core/src/main/scala/spark/Cache.scala deleted file mode 100644 index b0c83ce59d..0000000000 --- a/core/src/main/scala/spark/Cache.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark - -import java.util.concurrent.atomic.AtomicInteger - -private[spark] sealed trait CachePutResponse -private[spark] case class CachePutSuccess(size: Long) extends CachePutResponse -private[spark] case class CachePutFailure() extends CachePutResponse - -/** - * An interface for caches in Spark, to allow for multiple implementations. Caches are used to store - * both partitions of cached RDDs and broadcast variables on Spark executors. Caches are also aware - * of which entries are part of the same dataset (for example, partitions in the same RDD). The key - * for each value in a cache is a (datasetID, partition) pair. - * - * A single Cache instance gets created on each machine and is shared by all caches (i.e. both the - * RDD split cache and the broadcast variable cache), to enable global replacement policies. - * However, because these several independent modules all perform caching, it is important to give - * them separate key namespaces, so that an RDD and a broadcast variable (for example) do not use - * the same key. For this purpose, Cache has the notion of KeySpaces. Each client module must first - * ask for a KeySpace, and then call get() and put() on that space using its own keys. - * - * This abstract class handles the creation of key spaces, so that subclasses need only deal with - * keys that are unique across modules. - */ -private[spark] abstract class Cache { - private val nextKeySpaceId = new AtomicInteger(0) - private def newKeySpaceId() = nextKeySpaceId.getAndIncrement() - - def newKeySpace() = new KeySpace(this, newKeySpaceId()) - - /** - * Get the value for a given (datasetId, partition), or null if it is not - * found. - */ - def get(datasetId: Any, partition: Int): Any - - /** - * Attempt to put a value in the cache; returns CachePutFailure if this was - * not successful (e.g. because the cache replacement policy forbids it), and - * CachePutSuccess if successful. If size estimation is available, the cache - * implementation should set the size field in CachePutSuccess. - */ - def put(datasetId: Any, partition: Int, value: Any): CachePutResponse - - /** - * Report the capacity of the cache partition. By default this just reports - * zero. Specific implementations can choose to provide the capacity number. - */ - def getCapacity: Long = 0L -} - -/** - * A key namespace in a Cache. - */ -private[spark] class KeySpace(cache: Cache, val keySpaceId: Int) { - def get(datasetId: Any, partition: Int): Any = - cache.get((keySpaceId, datasetId), partition) - - def put(datasetId: Any, partition: Int, value: Any): CachePutResponse = - cache.put((keySpaceId, datasetId), partition, value) - - def getCapacity: Long = cache.getCapacity -} diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala deleted file mode 100644 index ee37da7948..0000000000 --- a/core/src/main/scala/spark/KryoSerializer.scala +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark - -import java.io._ -import java.nio.ByteBuffer -import java.nio.channels.Channels - -import scala.collection.immutable -import scala.collection.mutable - -import com.esotericsoftware.kryo._ -import com.esotericsoftware.kryo.{Serializer => KSerializer} -import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput} -import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer} -import de.javakaffee.kryoserializers.KryoReflectionFactorySupport - -import serializer.{SerializerInstance, DeserializationStream, SerializationStream} -import spark.broadcast._ -import spark.storage._ - -private[spark] -class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream { - - val output = new KryoOutput(outStream) - - def writeObject[T](t: T): SerializationStream = { - kryo.writeClassAndObject(output, t) - this - } - - def flush() { output.flush() } - def close() { output.close() } -} - -private[spark] -class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream { - - val input = new KryoInput(inStream) - - def readObject[T](): T = { - try { - kryo.readClassAndObject(input).asInstanceOf[T] - } catch { - // DeserializationStream uses the EOF exception to indicate stopping condition. - case e: com.esotericsoftware.kryo.KryoException => throw new java.io.EOFException - } - } - - def close() { - // Kryo's Input automatically closes the input stream it is using. - input.close() - } -} - -private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance { - - val kryo = ks.kryo.get() - val output = ks.output.get() - val input = ks.input.get() - - def serialize[T](t: T): ByteBuffer = { - output.clear() - kryo.writeClassAndObject(output, t) - ByteBuffer.wrap(output.toBytes) - } - - def deserialize[T](bytes: ByteBuffer): T = { - input.setBuffer(bytes.array) - kryo.readClassAndObject(input).asInstanceOf[T] - } - - def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = { - val oldClassLoader = kryo.getClassLoader - kryo.setClassLoader(loader) - input.setBuffer(bytes.array) - val obj = kryo.readClassAndObject(input).asInstanceOf[T] - kryo.setClassLoader(oldClassLoader) - obj - } - - def serializeStream(s: OutputStream): SerializationStream = { - new KryoSerializationStream(kryo, s) - } - - def deserializeStream(s: InputStream): DeserializationStream = { - new KryoDeserializationStream(kryo, s) - } -} - -/** - * Interface implemented by clients to register their classes with Kryo when using Kryo - * serialization. - */ -trait KryoRegistrator { - def registerClasses(kryo: Kryo): Unit -} - -/** - * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]]. - */ -class KryoSerializer extends spark.serializer.Serializer with Logging { - - val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024 - - val kryo = new ThreadLocal[Kryo] { - override def initialValue = createKryo() - } - - val output = new ThreadLocal[KryoOutput] { - override def initialValue = new KryoOutput(bufferSize) - } - - val input = new ThreadLocal[KryoInput] { - override def initialValue = new KryoInput(bufferSize) - } - - def createKryo(): Kryo = { - val kryo = new KryoReflectionFactorySupport() - - // Register some commonly used classes - val toRegister: Seq[AnyRef] = Seq( - // Arrays - Array(1), Array(1.0), Array(1.0f), Array(1L), Array(""), Array(("", "")), - Array(new java.lang.Object), Array(1.toByte), Array(true), Array('c'), - // Specialized Tuple2s - ("", ""), ("", 1), (1, 1), (1.0, 1.0), (1L, 1L), - (1, 1.0), (1.0, 1), (1L, 1.0), (1.0, 1L), (1, 1L), (1L, 1), - // Scala collections - List(1), mutable.ArrayBuffer(1), - // Options and Either - Some(1), Left(1), Right(1), - // Higher-dimensional tuples - (1, 1, 1), (1, 1, 1, 1), (1, 1, 1, 1, 1), - None, - ByteBuffer.allocate(1), - StorageLevel.MEMORY_ONLY, - PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY), - GotBlock("1", ByteBuffer.allocate(1)), - GetBlock("1") - ) - for (obj <- toRegister) { - kryo.register(obj.getClass) - } - - // Allow sending SerializableWritable - kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer()) - kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer()) - - // Register some commonly used Scala singleton objects. Because these - // are singletons, we must return the exact same local object when we - // deserialize rather than returning a clone as FieldSerializer would. - class SingletonSerializer[T](obj: T) extends KSerializer[T] { - override def write(kryo: Kryo, output: KryoOutput, obj: T) {} - override def read(kryo: Kryo, input: KryoInput, cls: java.lang.Class[T]): T = obj - } - kryo.register(None.getClass, new SingletonSerializer[AnyRef](None)) - kryo.register(Nil.getClass, new SingletonSerializer[AnyRef](Nil)) - - // Register maps with a special serializer since they have complex internal structure - class ScalaMapSerializer(buildMap: Array[(Any, Any)] => scala.collection.Map[Any, Any]) - extends KSerializer[Array[(Any, Any)] => scala.collection.Map[Any, Any]] { - - //hack, look at https://groups.google.com/forum/#!msg/kryo-users/Eu5V4bxCfws/k-8UQ22y59AJ - private final val FAKE_REFERENCE = new Object() - override def write( - kryo: Kryo, - output: KryoOutput, - obj: Array[(Any, Any)] => scala.collection.Map[Any, Any]) { - val map = obj.asInstanceOf[scala.collection.Map[Any, Any]] - output.writeInt(map.size) - for ((k, v) <- map) { - kryo.writeClassAndObject(output, k) - kryo.writeClassAndObject(output, v) - } - } - override def read ( - kryo: Kryo, - input: KryoInput, - cls: Class[Array[(Any, Any)] => scala.collection.Map[Any, Any]]) - : Array[(Any, Any)] => scala.collection.Map[Any, Any] = { - kryo.reference(FAKE_REFERENCE) - val size = input.readInt() - val elems = new Array[(Any, Any)](size) - for (i <- 0 until size) { - val k = kryo.readClassAndObject(input) - val v = kryo.readClassAndObject(input) - elems(i)=(k,v) - } - buildMap(elems).asInstanceOf[Array[(Any, Any)] => scala.collection.Map[Any, Any]] - } - } - kryo.register(mutable.HashMap().getClass, new ScalaMapSerializer(mutable.HashMap() ++ _)) - // TODO: add support for immutable maps too; this is more annoying because there are many - // subclasses of immutable.Map for small maps (with <= 4 entries) - val map1 = Map[Any, Any](1 -> 1) - val map2 = Map[Any, Any](1 -> 1, 2 -> 2) - val map3 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3) - val map4 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4) - val map5 = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4, 5 -> 5) - kryo.register(map1.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap)) - kryo.register(map2.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap)) - kryo.register(map3.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap)) - kryo.register(map4.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap)) - kryo.register(map5.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap)) - - // Allow the user to register their own classes by setting spark.kryo.registrator - val regCls = System.getProperty("spark.kryo.registrator") - if (regCls != null) { - logInfo("Running user registrator: " + regCls) - val classLoader = Thread.currentThread.getContextClassLoader - val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator] - reg.registerClasses(kryo) - } - - // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops - kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean) - - kryo - } - - def newInstance(): SerializerInstance = { - this.kryo.get().setClassLoader(Thread.currentThread().getContextClassLoader) - new KryoSerializerInstance(this) - } -} diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala deleted file mode 100644 index e1f8aff6f5..0000000000 --- a/core/src/main/scala/spark/deploy/DeployMessage.scala +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.deploy - -import spark.deploy.ExecutorState.ExecutorState -import spark.deploy.master.{WorkerInfo, ApplicationInfo} -import spark.deploy.worker.ExecutorRunner -import scala.collection.immutable.List -import spark.Utils - - -private[spark] sealed trait DeployMessage extends Serializable - -// Worker to Master - -private[spark] -case class RegisterWorker( - id: String, - host: String, - port: Int, - cores: Int, - memory: Int, - webUiPort: Int, - publicAddress: String) - extends DeployMessage { - Utils.checkHost(host, "Required hostname") - assert (port > 0) -} - -private[spark] -case class ExecutorStateChanged( - appId: String, - execId: Int, - state: ExecutorState, - message: Option[String], - exitStatus: Option[Int]) - extends DeployMessage - -private[spark] case class Heartbeat(workerId: String) extends DeployMessage - -// Master to Worker - -private[spark] case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage -private[spark] case class RegisterWorkerFailed(message: String) extends DeployMessage -private[spark] case class KillExecutor(appId: String, execId: Int) extends DeployMessage - -private[spark] case class LaunchExecutor( - appId: String, - execId: Int, - appDesc: ApplicationDescription, - cores: Int, - memory: Int, - sparkHome: String) - extends DeployMessage - -// Client to Master - -private[spark] case class RegisterApplication(appDescription: ApplicationDescription) - extends DeployMessage - -// Master to Client - -private[spark] -case class RegisteredApplication(appId: String) extends DeployMessage - -private[spark] -case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) { - Utils.checkHostPort(hostPort, "Required hostport") -} - -private[spark] -case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String], - exitStatus: Option[Int]) - -private[spark] -case class ApplicationRemoved(message: String) - -// Internal message in Client - -private[spark] case object StopClient - -// MasterWebUI To Master - -private[spark] case object RequestMasterState - -// Master to MasterWebUI - -private[spark] -case class MasterState(host: String, port: Int, workers: Array[WorkerInfo], - activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) { - - Utils.checkHost(host, "Required hostname") - assert (port > 0) - - def uri = "spark://" + host + ":" + port -} - -// WorkerWebUI to Worker -private[spark] case object RequestWorkerState - -// Worker to WorkerWebUI - -private[spark] -case class WorkerState(host: String, port: Int, workerId: String, executors: List[ExecutorRunner], - finishedExecutors: List[ExecutorRunner], masterUrl: String, cores: Int, memory: Int, - coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) { - - Utils.checkHost(host, "Required hostname") - assert (port > 0) -} diff --git a/core/src/main/scala/spark/package.scala b/core/src/main/scala/spark/package.scala deleted file mode 100644 index b244bfbf06..0000000000 --- a/core/src/main/scala/spark/package.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Core Spark functionality. [[spark.SparkContext]] serves as the main entry point to Spark, while - * [[spark.RDD]] is the data type representing a distributed collection, and provides most - * parallel operations. - * - * In addition, [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value - * pairs, such as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations - * available only on RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations - * available on RDDs that can be saved as SequenceFiles. These operations are automatically - * available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit conversions when - * you `import spark.SparkContext._`. - */ -package object spark { - // For package docs only -} diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala deleted file mode 100644 index 2b5bf18541..0000000000 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.rdd - -import spark.{Dependency, OneToOneDependency, NarrowDependency, RDD, Partition, TaskContext} -import java.io.{ObjectOutputStream, IOException} - -private[spark] case class CoalescedRDDPartition( - index: Int, - @transient rdd: RDD[_], - parentsIndices: Array[Int] - ) extends Partition { - var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_)) - - @throws(classOf[IOException]) - private def writeObject(oos: ObjectOutputStream) { - // Update the reference to parent split at the time of task serialization - parents = parentsIndices.map(rdd.partitions(_)) - oos.defaultWriteObject() - } -} - -/** - * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of - * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the - * parent had more than this many partitions, or fewer if the parent had fewer. - * - * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, - * or to avoid having a large number of small tasks when processing a directory with many files. - */ -class CoalescedRDD[T: ClassManifest]( - @transient var prev: RDD[T], - maxPartitions: Int) - extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies - - override def getPartitions: Array[Partition] = { - val prevSplits = prev.partitions - if (prevSplits.length < maxPartitions) { - prevSplits.map(_.index).map{idx => new CoalescedRDDPartition(idx, prev, Array(idx)) } - } else { - (0 until maxPartitions).map { i => - val rangeStart = ((i.toLong * prevSplits.length) / maxPartitions).toInt - val rangeEnd = (((i.toLong + 1) * prevSplits.length) / maxPartitions).toInt - new CoalescedRDDPartition(i, prev, (rangeStart until rangeEnd).toArray) - }.toArray - } - } - - override def compute(split: Partition, context: TaskContext): Iterator[T] = { - split.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentSplit => - firstParent[T].iterator(parentSplit, context) - } - } - - override def getDependencies: Seq[Dependency[_]] = { - Seq(new NarrowDependency(prev) { - def getParents(id: Int): Seq[Int] = - partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices - }) - } - - override def clearDependencies() { - super.clearDependencies() - prev = null - } -} diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala deleted file mode 100644 index 7c10074dc7..0000000000 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ /dev/null @@ -1,631 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.scheduler.cluster - -import java.lang.{Boolean => JBoolean} - -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashMap -import scala.collection.mutable.HashSet - -import spark._ -import spark.TaskState.TaskState -import spark.scheduler._ -import java.nio.ByteBuffer -import java.util.concurrent.atomic.AtomicLong -import java.util.{TimerTask, Timer} - -/** - * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call - * start(), then submit task sets through the runTasks method. - */ -private[spark] class ClusterScheduler(val sc: SparkContext) - extends TaskScheduler - with Logging { - - // How often to check for speculative tasks - val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong - // Threshold above which we warn user initial TaskSet may be starved - val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong - // How often to revive offers in case there are pending tasks - that is how often to try to get - // tasks scheduled in case there are nodes available : default 0 is to disable it - to preserve existing behavior - // Note that this is required due to delayed scheduling due to data locality waits, etc. - // TODO: rename property ? - val TASK_REVIVAL_INTERVAL = System.getProperty("spark.tasks.revive.interval", "0").toLong - - /* - This property controls how aggressive we should be to modulate waiting for node local task scheduling. - To elaborate, currently there is a time limit (3 sec def) to ensure that spark attempts to wait for node locality of tasks before - scheduling on other nodes. We have modified this in yarn branch such that offers to task set happen in prioritized order : - node-local, rack-local and then others - But once all available node local (and no pref) tasks are scheduled, instead of waiting for 3 sec before - scheduling to other nodes (which degrades performance for time sensitive tasks and on larger clusters), we can - modulate that : to also allow rack local nodes or any node. The default is still set to HOST - so that previous behavior is - maintained. This is to allow tuning the tension between pulling rdd data off node and scheduling computation asap. - - TODO: rename property ? The value is one of - - NODE_LOCAL (default, no change w.r.t current behavior), - - RACK_LOCAL and - - ANY - - Note that this property makes more sense when used in conjugation with spark.tasks.revive.interval > 0 : else it is not very effective. - - Additional Note: For non trivial clusters, there is a 4x - 5x reduction in running time (in some of our experiments) based on whether - it is left at default NODE_LOCAL, RACK_LOCAL (if cluster is configured to be rack aware) or ANY. - If cluster is rack aware, then setting it to RACK_LOCAL gives best tradeoff and a 3x - 4x performance improvement while minimizing IO impact. - Also, it brings down the variance in running time drastically. - */ - val TASK_SCHEDULING_AGGRESSION = TaskLocality.parse(System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL")) - - val activeTaskSets = new HashMap[String, TaskSetManager] - - val taskIdToTaskSetId = new HashMap[Long, String] - val taskIdToExecutorId = new HashMap[Long, String] - val taskSetTaskIds = new HashMap[String, HashSet[Long]] - - @volatile private var hasReceivedTask = false - @volatile private var hasLaunchedTask = false - private val starvationTimer = new Timer(true) - - // Incrementing Mesos task IDs - val nextTaskId = new AtomicLong(0) - - // Which executor IDs we have executors on - val activeExecutorIds = new HashSet[String] - - // TODO: We might want to remove this and merge it with execId datastructures - but later. - // Which hosts in the cluster are alive (contains hostPort's) - used for process local and node local task locality. - private val hostPortsAlive = new HashSet[String] - private val hostToAliveHostPorts = new HashMap[String, HashSet[String]] - - // The set of executors we have on each host; this is used to compute hostsAlive, which - // in turn is used to decide when we can attain data locality on a given host - private val executorsByHostPort = new HashMap[String, HashSet[String]] - - private val executorIdToHostPort = new HashMap[String, String] - - // JAR server, if any JARs were added by the user to the SparkContext - var jarServer: HttpServer = null - - // URIs of JARs to pass to executor - var jarUris: String = "" - - // Listener object to pass upcalls into - var listener: TaskSchedulerListener = null - - var backend: SchedulerBackend = null - - val mapOutputTracker = SparkEnv.get.mapOutputTracker - - var schedulableBuilder: SchedulableBuilder = null - var rootPool: Pool = null - - override def setListener(listener: TaskSchedulerListener) { - this.listener = listener - } - - def initialize(context: SchedulerBackend) { - backend = context - //default scheduler is FIFO - val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO") - //temporarily set rootPool name to empty - rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0) - schedulableBuilder = { - schedulingMode match { - case "FIFO" => - new FIFOSchedulableBuilder(rootPool) - case "FAIR" => - new FairSchedulableBuilder(rootPool) - } - } - schedulableBuilder.buildPools() - // resolve executorId to hostPort mapping. - def executorToHostPort(executorId: String, defaultHostPort: String): String = { - executorIdToHostPort.getOrElse(executorId, defaultHostPort) - } - - // Unfortunately, this means that SparkEnv is indirectly referencing ClusterScheduler - // Will that be a design violation ? - SparkEnv.get.executorIdToHostPort = Some(executorToHostPort) - } - - - def newTaskId(): Long = nextTaskId.getAndIncrement() - - override def start() { - backend.start() - - if (JBoolean.getBoolean("spark.speculation")) { - new Thread("ClusterScheduler speculation check") { - setDaemon(true) - - override def run() { - logInfo("Starting speculative execution thread") - while (true) { - try { - Thread.sleep(SPECULATION_INTERVAL) - } catch { - case e: InterruptedException => {} - } - checkSpeculatableTasks() - } - } - }.start() - } - - - // Change to always run with some default if TASK_REVIVAL_INTERVAL <= 0 ? - if (TASK_REVIVAL_INTERVAL > 0) { - new Thread("ClusterScheduler task offer revival check") { - setDaemon(true) - - override def run() { - logInfo("Starting speculative task offer revival thread") - while (true) { - try { - Thread.sleep(TASK_REVIVAL_INTERVAL) - } catch { - case e: InterruptedException => {} - } - - if (hasPendingTasks()) backend.reviveOffers() - } - } - }.start() - } - } - - override def submitTasks(taskSet: TaskSet) { - val tasks = taskSet.tasks - logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks") - this.synchronized { - val manager = new ClusterTaskSetManager(this, taskSet) - activeTaskSets(taskSet.id) = manager - schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties) - taskSetTaskIds(taskSet.id) = new HashSet[Long]() - - if (hasReceivedTask == false) { - starvationTimer.scheduleAtFixedRate(new TimerTask() { - override def run() { - if (!hasLaunchedTask) { - logWarning("Initial job has not accepted any resources; " + - "check your cluster UI to ensure that workers are registered") - } else { - this.cancel() - } - } - }, STARVATION_TIMEOUT, STARVATION_TIMEOUT) - } - hasReceivedTask = true; - } - backend.reviveOffers() - } - - def taskSetFinished(manager: TaskSetManager) { - this.synchronized { - activeTaskSets -= manager.taskSet.id - manager.parent.removeSchedulable(manager) - logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name)) - taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id) - taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id) - taskSetTaskIds.remove(manager.taskSet.id) - } - } - - /** - * Called by cluster manager to offer resources on slaves. We respond by asking our active task - * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so - * that tasks are balanced across the cluster. - */ - def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = { - synchronized { - SparkEnv.set(sc.env) - // Mark each slave as alive and remember its hostname - for (o <- offers) { - // DEBUG Code - Utils.checkHostPort(o.hostPort) - - executorIdToHostPort(o.executorId) = o.hostPort - if (! executorsByHostPort.contains(o.hostPort)) { - executorsByHostPort(o.hostPort) = new HashSet[String]() - } - - hostPortsAlive += o.hostPort - hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(o.hostPort)._1, new HashSet[String]).add(o.hostPort) - executorGained(o.executorId, o.hostPort) - } - // Build a list of tasks to assign to each slave - val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores)) - // merge availableCpus into nodeToAvailableCpus block ? - val availableCpus = offers.map(o => o.cores).toArray - val nodeToAvailableCpus = { - val map = new HashMap[String, Int]() - for (offer <- offers) { - val hostPort = offer.hostPort - val cores = offer.cores - // DEBUG code - Utils.checkHostPort(hostPort) - - val host = Utils.parseHostPort(hostPort)._1 - - map.put(host, map.getOrElse(host, 0) + cores) - } - - map - } - var launchedTask = false - val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue() - for (manager <- sortedTaskSetQueue) - { - logInfo("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks)) - } - for (manager <- sortedTaskSetQueue) { - - // Split offers based on node local, rack local and off-rack tasks. - val processLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val nodeLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val rackLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val otherOffers = new HashMap[String, ArrayBuffer[Int]]() - - for (i <- 0 until offers.size) { - val hostPort = offers(i).hostPort - // DEBUG code - Utils.checkHostPort(hostPort) - - val numProcessLocalTasks = math.max(0, math.min(manager.numPendingTasksForHostPort(hostPort), availableCpus(i))) - if (numProcessLocalTasks > 0){ - val list = processLocalOffers.getOrElseUpdate(hostPort, new ArrayBuffer[Int]) - for (j <- 0 until numProcessLocalTasks) list += i - } - - val host = Utils.parseHostPort(hostPort)._1 - val numNodeLocalTasks = math.max(0, - // Remove process local tasks (which are also host local btw !) from this - math.min(manager.numPendingTasksForHost(hostPort) - numProcessLocalTasks, nodeToAvailableCpus(host))) - if (numNodeLocalTasks > 0){ - val list = nodeLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - for (j <- 0 until numNodeLocalTasks) list += i - } - - val numRackLocalTasks = math.max(0, - // Remove node local tasks (which are also rack local btw !) from this - math.min(manager.numRackLocalPendingTasksForHost(hostPort) - numProcessLocalTasks - numNodeLocalTasks, nodeToAvailableCpus(host))) - if (numRackLocalTasks > 0){ - val list = rackLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - for (j <- 0 until numRackLocalTasks) list += i - } - if (numNodeLocalTasks <= 0 && numRackLocalTasks <= 0){ - // add to others list - spread even this across cluster. - val list = otherOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - list += i - } - } - - val offersPriorityList = new ArrayBuffer[Int]( - processLocalOffers.size + nodeLocalOffers.size + rackLocalOffers.size + otherOffers.size) - - // First process local, then host local, then rack, then others - - // numNodeLocalOffers contains count of both process local and host offers. - val numNodeLocalOffers = { - val processLocalPriorityList = ClusterScheduler.prioritizeContainers(processLocalOffers) - offersPriorityList ++= processLocalPriorityList - - val nodeLocalPriorityList = ClusterScheduler.prioritizeContainers(nodeLocalOffers) - offersPriorityList ++= nodeLocalPriorityList - - processLocalPriorityList.size + nodeLocalPriorityList.size - } - val numRackLocalOffers = { - val rackLocalPriorityList = ClusterScheduler.prioritizeContainers(rackLocalOffers) - offersPriorityList ++= rackLocalPriorityList - rackLocalPriorityList.size - } - offersPriorityList ++= ClusterScheduler.prioritizeContainers(otherOffers) - - var lastLoop = false - val lastLoopIndex = TASK_SCHEDULING_AGGRESSION match { - case TaskLocality.NODE_LOCAL => numNodeLocalOffers - case TaskLocality.RACK_LOCAL => numRackLocalOffers + numNodeLocalOffers - case TaskLocality.ANY => offersPriorityList.size - } - - do { - launchedTask = false - var loopCount = 0 - for (i <- offersPriorityList) { - val execId = offers(i).executorId - val hostPort = offers(i).hostPort - - // If last loop and within the lastLoopIndex, expand scope - else use null (which will use default/existing) - val overrideLocality = if (lastLoop && loopCount < lastLoopIndex) TASK_SCHEDULING_AGGRESSION else null - - // If last loop, override waiting for host locality - we scheduled all local tasks already and there might be more available ... - loopCount += 1 - - manager.slaveOffer(execId, hostPort, availableCpus(i), overrideLocality) match { - case Some(task) => - tasks(i) += task - val tid = task.taskId - taskIdToTaskSetId(tid) = manager.taskSet.id - taskSetTaskIds(manager.taskSet.id) += tid - taskIdToExecutorId(tid) = execId - activeExecutorIds += execId - executorsByHostPort(hostPort) += execId - availableCpus(i) -= 1 - launchedTask = true - - case None => {} - } - } - // Loop once more - when lastLoop = true, then we try to schedule task on all nodes irrespective of - // data locality (we still go in order of priority : but that would not change anything since - // if data local tasks had been available, we would have scheduled them already) - if (lastLoop) { - // prevent more looping - launchedTask = false - } else if (!lastLoop && !launchedTask) { - // Do this only if TASK_SCHEDULING_AGGRESSION != NODE_LOCAL - if (TASK_SCHEDULING_AGGRESSION != TaskLocality.NODE_LOCAL) { - // fudge launchedTask to ensure we loop once more - launchedTask = true - // dont loop anymore - lastLoop = true - } - } - } while (launchedTask) - } - - if (tasks.size > 0) { - hasLaunchedTask = true - } - return tasks - } - } - - def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { - var taskSetToUpdate: Option[TaskSetManager] = None - var failedExecutor: Option[String] = None - var taskFailed = false - synchronized { - try { - if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) { - // We lost this entire executor, so remember that it's gone - val execId = taskIdToExecutorId(tid) - if (activeExecutorIds.contains(execId)) { - removeExecutor(execId) - failedExecutor = Some(execId) - } - } - taskIdToTaskSetId.get(tid) match { - case Some(taskSetId) => - if (activeTaskSets.contains(taskSetId)) { - taskSetToUpdate = Some(activeTaskSets(taskSetId)) - } - if (TaskState.isFinished(state)) { - taskIdToTaskSetId.remove(tid) - if (taskSetTaskIds.contains(taskSetId)) { - taskSetTaskIds(taskSetId) -= tid - } - taskIdToExecutorId.remove(tid) - } - if (state == TaskState.FAILED) { - taskFailed = true - } - case None => - logInfo("Ignoring update from TID " + tid + " because its task set is gone") - } - } catch { - case e: Exception => logError("Exception in statusUpdate", e) - } - } - // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock - if (taskSetToUpdate != None) { - taskSetToUpdate.get.statusUpdate(tid, state, serializedData) - } - if (failedExecutor != None) { - listener.executorLost(failedExecutor.get) - backend.reviveOffers() - } - if (taskFailed) { - - // Also revive offers if a task had failed for some reason other than host lost - backend.reviveOffers() - } - } - - def error(message: String) { - synchronized { - if (activeTaskSets.size > 0) { - // Have each task set throw a SparkException with the error - for ((taskSetId, manager) <- activeTaskSets) { - try { - manager.error(message) - } catch { - case e: Exception => logError("Exception in error callback", e) - } - } - } else { - // No task sets are active but we still got an error. Just exit since this - // must mean the error is during registration. - // It might be good to do something smarter here in the future. - logError("Exiting due to error from cluster scheduler: " + message) - System.exit(1) - } - } - } - - override def stop() { - if (backend != null) { - backend.stop() - } - if (jarServer != null) { - jarServer.stop() - } - - // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out. - // TODO: Do something better ! - Thread.sleep(5000L) - } - - override def defaultParallelism() = backend.defaultParallelism() - - - // Check for speculatable tasks in all our active jobs. - def checkSpeculatableTasks() { - var shouldRevive = false - synchronized { - shouldRevive = rootPool.checkSpeculatableTasks() - } - if (shouldRevive) { - backend.reviveOffers() - } - } - - // Check for pending tasks in all our active jobs. - def hasPendingTasks(): Boolean = { - synchronized { - rootPool.hasPendingTasks() - } - } - - def executorLost(executorId: String, reason: ExecutorLossReason) { - var failedExecutor: Option[String] = None - - synchronized { - if (activeExecutorIds.contains(executorId)) { - val hostPort = executorIdToHostPort(executorId) - logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason)) - removeExecutor(executorId) - failedExecutor = Some(executorId) - } else { - // We may get multiple executorLost() calls with different loss reasons. For example, one - // may be triggered by a dropped connection from the slave while another may be a report - // of executor termination from Mesos. We produce log messages for both so we eventually - // report the termination reason. - logError("Lost an executor " + executorId + " (already removed): " + reason) - } - } - // Call listener.executorLost without holding the lock on this to prevent deadlock - if (failedExecutor != None) { - listener.executorLost(failedExecutor.get) - backend.reviveOffers() - } - } - - /** Remove an executor from all our data structures and mark it as lost */ - private def removeExecutor(executorId: String) { - activeExecutorIds -= executorId - val hostPort = executorIdToHostPort(executorId) - if (hostPortsAlive.contains(hostPort)) { - // DEBUG Code - Utils.checkHostPort(hostPort) - - hostPortsAlive -= hostPort - hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(hostPort)._1, new HashSet[String]).remove(hostPort) - } - - val execs = executorsByHostPort.getOrElse(hostPort, new HashSet) - execs -= executorId - if (execs.isEmpty) { - executorsByHostPort -= hostPort - } - executorIdToHostPort -= executorId - rootPool.executorLost(executorId, hostPort) - } - - def executorGained(execId: String, hostPort: String) { - listener.executorGained(execId, hostPort) - } - - def getExecutorsAliveOnHost(host: String): Option[Set[String]] = { - Utils.checkHost(host) - - val retval = hostToAliveHostPorts.get(host) - if (retval.isDefined) { - return Some(retval.get.toSet) - } - - None - } - - def isExecutorAliveOnHostPort(hostPort: String): Boolean = { - // Even if hostPort is a host, it does not matter - it is just a specific check. - // But we do have to ensure that only hostPort get into hostPortsAlive ! - // So no check against Utils.checkHostPort - hostPortsAlive.contains(hostPort) - } - - // By default, rack is unknown - def getRackForHost(value: String): Option[String] = None - - // By default, (cached) hosts for rack is unknown - def getCachedHostsForRack(rack: String): Option[Set[String]] = None -} - - -object ClusterScheduler { - - // Used to 'spray' available containers across the available set to ensure too many containers on same host - // are not used up. Used in yarn mode and in task scheduling (when there are multiple containers available - // to execute a task) - // For example: yarn can returns more containers than we would have requested under ANY, this method - // prioritizes how to use the allocated containers. - // flatten the map such that the array buffer entries are spread out across the returned value. - // given <host, list[container]> == <h1, [c1 .. c5]>, <h2, [c1 .. c3]>, <h3, [c1, c2]>, <h4, c1>, <h5, c1>, i - // the return value would be something like : h1c1, h2c1, h3c1, h4c1, h5c1, h1c2, h2c2, h3c2, h1c3, h2c3, h1c4, h1c5 - // We then 'use' the containers in this order (consuming only the top K from this list where - // K = number to be user). This is to ensure that if we have multiple eligible allocations, - // they dont end up allocating all containers on a small number of hosts - increasing probability of - // multiple container failure when a host goes down. - // Note, there is bias for keys with higher number of entries in value to be picked first (by design) - // Also note that invocation of this method is expected to have containers of same 'type' - // (host-local, rack-local, off-rack) and not across types : so that reordering is simply better from - // the available list - everything else being same. - // That is, we we first consume data local, then rack local and finally off rack nodes. So the - // prioritization from this method applies to within each category - def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = { - val _keyList = new ArrayBuffer[K](map.size) - _keyList ++= map.keys - - // order keyList based on population of value in map - val keyList = _keyList.sortWith( - (left, right) => map.get(left).getOrElse(Set()).size > map.get(right).getOrElse(Set()).size - ) - - val retval = new ArrayBuffer[T](keyList.size * 2) - var index = 0 - var found = true - - while (found){ - found = false - for (key <- keyList) { - val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null) - assert(containerList != null) - // Get the index'th entry for this host - if present - if (index < containerList.size){ - retval += containerList.apply(index) - found = true - } - } - index += 1 - } - - retval.toList - } -} diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala deleted file mode 100644 index 3d06520675..0000000000 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ /dev/null @@ -1,765 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.scheduler.cluster - -import java.util.{HashMap => JHashMap, NoSuchElementException, Arrays} - -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashMap -import scala.collection.mutable.HashSet -import scala.math.max -import scala.math.min - -import spark._ -import spark.scheduler._ -import spark.TaskState.TaskState -import java.nio.ByteBuffer - -private[spark] object TaskLocality extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") with Logging { - - // process local is expected to be used ONLY within tasksetmanager for now. - val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value - - type TaskLocality = Value - - def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = { - - // Must not be the constraint. - assert (constraint != TaskLocality.PROCESS_LOCAL) - - constraint match { - case TaskLocality.NODE_LOCAL => condition == TaskLocality.NODE_LOCAL - case TaskLocality.RACK_LOCAL => condition == TaskLocality.NODE_LOCAL || condition == TaskLocality.RACK_LOCAL - // For anything else, allow - case _ => true - } - } - - def parse(str: String): TaskLocality = { - // better way to do this ? - try { - val retval = TaskLocality.withName(str) - // Must not specify PROCESS_LOCAL ! - assert (retval != TaskLocality.PROCESS_LOCAL) - - retval - } catch { - case nEx: NoSuchElementException => { - logWarning("Invalid task locality specified '" + str + "', defaulting to NODE_LOCAL"); - // default to preserve earlier behavior - NODE_LOCAL - } - } - } -} - -/** - * Schedules the tasks within a single TaskSet in the ClusterScheduler. - */ -private[spark] class ClusterTaskSetManager( - sched: ClusterScheduler, - val taskSet: TaskSet) - extends TaskSetManager - with Logging { - - // Maximum time to wait to run a task in a preferred location (in ms) - val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong - - // CPUs to request per task - val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble - - // Maximum times a task is allowed to fail before failing the job - val MAX_TASK_FAILURES = 4 - - // Quantile of tasks at which to start speculation - val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble - val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble - - // Serializer for closures and tasks. - val ser = SparkEnv.get.closureSerializer.newInstance() - - val tasks = taskSet.tasks - val numTasks = tasks.length - val copiesRunning = new Array[Int](numTasks) - val finished = new Array[Boolean](numTasks) - val numFailures = new Array[Int](numTasks) - val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil) - var tasksFinished = 0 - - var weight = 1 - var minShare = 0 - var runningTasks = 0 - var priority = taskSet.priority - var stageId = taskSet.stageId - var name = "TaskSet_"+taskSet.stageId.toString - var parent:Schedulable = null - - // Last time when we launched a preferred task (for delay scheduling) - var lastPreferredLaunchTime = System.currentTimeMillis - - // List of pending tasks for each node (process local to container). These collections are actually - // treated as stacks, in which new tasks are added to the end of the - // ArrayBuffer and removed from the end. This makes it faster to detect - // tasks that repeatedly fail because whenever a task failed, it is put - // back at the head of the stack. They are also only cleaned up lazily; - // when a task is launched, it remains in all the pending lists except - // the one that it was launched from, but gets removed from them later. - private val pendingTasksForHostPort = new HashMap[String, ArrayBuffer[Int]] - - // List of pending tasks for each node. - // Essentially, similar to pendingTasksForHostPort, except at host level - private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]] - - // List of pending tasks for each node based on rack locality. - // Essentially, similar to pendingTasksForHost, except at rack level - private val pendingRackLocalTasksForHost = new HashMap[String, ArrayBuffer[Int]] - - // List containing pending tasks with no locality preferences - val pendingTasksWithNoPrefs = new ArrayBuffer[Int] - - // List containing all pending tasks (also used as a stack, as above) - val allPendingTasks = new ArrayBuffer[Int] - - // Tasks that can be speculated. Since these will be a small fraction of total - // tasks, we'll just hold them in a HashSet. - val speculatableTasks = new HashSet[Int] - - // Task index, start and finish time for each task attempt (indexed by task ID) - val taskInfos = new HashMap[Long, TaskInfo] - - // Did the job fail? - var failed = false - var causeOfFailure = "" - - // How frequently to reprint duplicate exceptions in full, in milliseconds - val EXCEPTION_PRINT_INTERVAL = - System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong - // Map of recent exceptions (identified by string representation and - // top stack frame) to duplicate count (how many times the same - // exception has appeared) and time the full exception was - // printed. This should ideally be an LRU map that can drop old - // exceptions automatically. - val recentExceptions = HashMap[String, (Int, Long)]() - - // Figure out the current map output tracker generation and set it on all tasks - val generation = sched.mapOutputTracker.getGeneration - logDebug("Generation for " + taskSet.id + ": " + generation) - for (t <- tasks) { - t.generation = generation - } - - // Add all our tasks to the pending lists. We do this in reverse order - // of task index so that tasks with low indices get launched first. - for (i <- (0 until numTasks).reverse) { - addPendingTask(i) - } - - // Note that it follows the hierarchy. - // if we search for NODE_LOCAL, the output will include PROCESS_LOCAL and - // if we search for RACK_LOCAL, it will include PROCESS_LOCAL & NODE_LOCAL - private def findPreferredLocations(_taskPreferredLocations: Seq[String], scheduler: ClusterScheduler, - taskLocality: TaskLocality.TaskLocality): HashSet[String] = { - - if (TaskLocality.PROCESS_LOCAL == taskLocality) { - // straight forward comparison ! Special case it. - val retval = new HashSet[String]() - scheduler.synchronized { - for (location <- _taskPreferredLocations) { - if (scheduler.isExecutorAliveOnHostPort(location)) { - retval += location - } - } - } - - return retval - } - - val taskPreferredLocations = - if (TaskLocality.NODE_LOCAL == taskLocality) { - _taskPreferredLocations - } else { - assert (TaskLocality.RACK_LOCAL == taskLocality) - // Expand set to include all 'seen' rack local hosts. - // This works since container allocation/management happens within master - so any rack locality information is updated in msater. - // Best case effort, and maybe sort of kludge for now ... rework it later ? - val hosts = new HashSet[String] - _taskPreferredLocations.foreach(h => { - val rackOpt = scheduler.getRackForHost(h) - if (rackOpt.isDefined) { - val hostsOpt = scheduler.getCachedHostsForRack(rackOpt.get) - if (hostsOpt.isDefined) { - hosts ++= hostsOpt.get - } - } - - // Ensure that irrespective of what scheduler says, host is always added ! - hosts += h - }) - - hosts - } - - val retval = new HashSet[String] - scheduler.synchronized { - for (prefLocation <- taskPreferredLocations) { - val aliveLocationsOpt = scheduler.getExecutorsAliveOnHost(Utils.parseHostPort(prefLocation)._1) - if (aliveLocationsOpt.isDefined) { - retval ++= aliveLocationsOpt.get - } - } - } - - retval - } - - // Add a task to all the pending-task lists that it should be on. - private def addPendingTask(index: Int) { - // We can infer hostLocalLocations from rackLocalLocations by joining it against tasks(index).preferredLocations (with appropriate - // hostPort <-> host conversion). But not doing it for simplicity sake. If this becomes a performance issue, modify it. - val processLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.PROCESS_LOCAL) - val hostLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL) - val rackLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL) - - if (rackLocalLocations.size == 0) { - // Current impl ensures this. - assert (processLocalLocations.size == 0) - assert (hostLocalLocations.size == 0) - pendingTasksWithNoPrefs += index - } else { - - // process local locality - for (hostPort <- processLocalLocations) { - // DEBUG Code - Utils.checkHostPort(hostPort) - - val hostPortList = pendingTasksForHostPort.getOrElseUpdate(hostPort, ArrayBuffer()) - hostPortList += index - } - - // host locality (includes process local) - for (hostPort <- hostLocalLocations) { - // DEBUG Code - Utils.checkHostPort(hostPort) - - val host = Utils.parseHostPort(hostPort)._1 - val hostList = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer()) - hostList += index - } - - // rack locality (includes process local and host local) - for (rackLocalHostPort <- rackLocalLocations) { - // DEBUG Code - Utils.checkHostPort(rackLocalHostPort) - - val rackLocalHost = Utils.parseHostPort(rackLocalHostPort)._1 - val list = pendingRackLocalTasksForHost.getOrElseUpdate(rackLocalHost, ArrayBuffer()) - list += index - } - } - - allPendingTasks += index - } - - // Return the pending tasks list for a given host port (process local), or an empty list if - // there is no map entry for that host - private def getPendingTasksForHostPort(hostPort: String): ArrayBuffer[Int] = { - // DEBUG Code - Utils.checkHostPort(hostPort) - pendingTasksForHostPort.getOrElse(hostPort, ArrayBuffer()) - } - - // Return the pending tasks list for a given host, or an empty list if - // there is no map entry for that host - private def getPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = { - val host = Utils.parseHostPort(hostPort)._1 - pendingTasksForHost.getOrElse(host, ArrayBuffer()) - } - - // Return the pending tasks (rack level) list for a given host, or an empty list if - // there is no map entry for that host - private def getRackLocalPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = { - val host = Utils.parseHostPort(hostPort)._1 - pendingRackLocalTasksForHost.getOrElse(host, ArrayBuffer()) - } - - // Number of pending tasks for a given host Port (which would be process local) - def numPendingTasksForHostPort(hostPort: String): Int = { - getPendingTasksForHostPort(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) ) - } - - // Number of pending tasks for a given host (which would be data local) - def numPendingTasksForHost(hostPort: String): Int = { - getPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) ) - } - - // Number of pending rack local tasks for a given host - def numRackLocalPendingTasksForHost(hostPort: String): Int = { - getRackLocalPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) ) - } - - - // Dequeue a pending task from the given list and return its index. - // Return None if the list is empty. - // This method also cleans up any tasks in the list that have already - // been launched, since we want that to happen lazily. - private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = { - while (!list.isEmpty) { - val index = list.last - list.trimEnd(1) - if (copiesRunning(index) == 0 && !finished(index)) { - return Some(index) - } - } - return None - } - - // Return a speculative task for a given host if any are available. The task should not have an - // attempt running on this host, in case the host is slow. In addition, if locality is set, the - // task must have a preference for this host/rack/no preferred locations at all. - private def findSpeculativeTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = { - - assert (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) - speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set - - if (speculatableTasks.size > 0) { - val localTask = speculatableTasks.find { - index => - val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL) - val attemptLocs = taskAttempts(index).map(_.hostPort) - (locations.size == 0 || locations.contains(hostPort)) && !attemptLocs.contains(hostPort) - } - - if (localTask != None) { - speculatableTasks -= localTask.get - return localTask - } - - // check for rack locality - if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { - val rackTask = speculatableTasks.find { - index => - val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL) - val attemptLocs = taskAttempts(index).map(_.hostPort) - locations.contains(hostPort) && !attemptLocs.contains(hostPort) - } - - if (rackTask != None) { - speculatableTasks -= rackTask.get - return rackTask - } - } - - // Any task ... - if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { - // Check for attemptLocs also ? - val nonLocalTask = speculatableTasks.find(i => !taskAttempts(i).map(_.hostPort).contains(hostPort)) - if (nonLocalTask != None) { - speculatableTasks -= nonLocalTask.get - return nonLocalTask - } - } - } - return None - } - - // Dequeue a pending task for a given node and return its index. - // If localOnly is set to false, allow non-local tasks as well. - private def findTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = { - val processLocalTask = findTaskFromList(getPendingTasksForHostPort(hostPort)) - if (processLocalTask != None) { - return processLocalTask - } - - val localTask = findTaskFromList(getPendingTasksForHost(hostPort)) - if (localTask != None) { - return localTask - } - - if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { - val rackLocalTask = findTaskFromList(getRackLocalPendingTasksForHost(hostPort)) - if (rackLocalTask != None) { - return rackLocalTask - } - } - - // Look for no pref tasks AFTER rack local tasks - this has side effect that we will get to failed tasks later rather than sooner. - // TODO: That code path needs to be revisited (adding to no prefs list when host:port goes down). - val noPrefTask = findTaskFromList(pendingTasksWithNoPrefs) - if (noPrefTask != None) { - return noPrefTask - } - - if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { - val nonLocalTask = findTaskFromList(allPendingTasks) - if (nonLocalTask != None) { - return nonLocalTask - } - } - - // Finally, if all else has failed, find a speculative task - return findSpeculativeTask(hostPort, locality) - } - - private def isProcessLocalLocation(task: Task[_], hostPort: String): Boolean = { - Utils.checkHostPort(hostPort) - - val locs = task.preferredLocations - - locs.contains(hostPort) - } - - private def isHostLocalLocation(task: Task[_], hostPort: String): Boolean = { - val locs = task.preferredLocations - - // If no preference, consider it as host local - if (locs.isEmpty) return true - - val host = Utils.parseHostPort(hostPort)._1 - locs.find(h => Utils.parseHostPort(h)._1 == host).isDefined - } - - // Does a host count as a rack local preferred location for a task? (assumes host is NOT preferred location). - // This is true if either the task has preferred locations and this host is one, or it has - // no preferred locations (in which we still count the launch as preferred). - private def isRackLocalLocation(task: Task[_], hostPort: String): Boolean = { - - val locs = task.preferredLocations - - val preferredRacks = new HashSet[String]() - for (preferredHost <- locs) { - val rack = sched.getRackForHost(preferredHost) - if (None != rack) preferredRacks += rack.get - } - - if (preferredRacks.isEmpty) return false - - val hostRack = sched.getRackForHost(hostPort) - - return None != hostRack && preferredRacks.contains(hostRack.get) - } - - // Respond to an offer of a single slave from the scheduler by finding a task - def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = { - - if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { - // If explicitly specified, use that - val locality = if (overrideLocality != null) overrideLocality else { - // expand only if we have waited for more than LOCALITY_WAIT for a host local task ... - val time = System.currentTimeMillis - if (time - lastPreferredLaunchTime < LOCALITY_WAIT) TaskLocality.NODE_LOCAL else TaskLocality.ANY - } - - findTask(hostPort, locality) match { - case Some(index) => { - // Found a task; do some bookkeeping and return a Mesos task for it - val task = tasks(index) - val taskId = sched.newTaskId() - // Figure out whether this should count as a preferred launch - val taskLocality = - if (isProcessLocalLocation(task, hostPort)) TaskLocality.PROCESS_LOCAL - else if (isHostLocalLocation(task, hostPort)) TaskLocality.NODE_LOCAL - else if (isRackLocalLocation(task, hostPort)) TaskLocality.RACK_LOCAL - else TaskLocality.ANY - val prefStr = taskLocality.toString - logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format( - taskSet.id, index, taskId, execId, hostPort, prefStr)) - // Do various bookkeeping - copiesRunning(index) += 1 - val time = System.currentTimeMillis - val info = new TaskInfo(taskId, index, time, execId, hostPort, taskLocality) - taskInfos(taskId) = info - taskAttempts(index) = info :: taskAttempts(index) - if (taskLocality == TaskLocality.PROCESS_LOCAL || taskLocality == TaskLocality.NODE_LOCAL) { - lastPreferredLaunchTime = time - } - // Serialize and return the task - val startTime = System.currentTimeMillis - val serializedTask = Task.serializeWithDependencies( - task, sched.sc.addedFiles, sched.sc.addedJars, ser) - val timeTaken = System.currentTimeMillis - startTime - increaseRunningTasks(1) - logInfo("Serialized task %s:%d as %d bytes in %d ms".format( - taskSet.id, index, serializedTask.limit, timeTaken)) - val taskName = "task %s:%d".format(taskSet.id, index) - return Some(new TaskDescription(taskId, execId, taskName, serializedTask)) - } - case _ => - } - } - return None - } - - def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { - state match { - case TaskState.FINISHED => - taskFinished(tid, state, serializedData) - case TaskState.LOST => - taskLost(tid, state, serializedData) - case TaskState.FAILED => - taskLost(tid, state, serializedData) - case TaskState.KILLED => - taskLost(tid, state, serializedData) - case _ => - } - } - - def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) { - val info = taskInfos(tid) - if (info.failed) { - // We might get two task-lost messages for the same task in coarse-grained Mesos mode, - // or even from Mesos itself when acks get delayed. - return - } - val index = info.index - info.markSuccessful() - decreaseRunningTasks(1) - if (!finished(index)) { - tasksFinished += 1 - logInfo("Finished TID %s in %d ms (progress: %d/%d)".format( - tid, info.duration, tasksFinished, numTasks)) - // Deserialize task result and pass it to the scheduler - try { - val result = ser.deserialize[TaskResult[_]](serializedData) - result.metrics.resultSize = serializedData.limit() - sched.listener.taskEnded(tasks(index), Success, result.value, result.accumUpdates, info, result.metrics) - } catch { - case cnf: ClassNotFoundException => - val loader = Thread.currentThread().getContextClassLoader - throw new SparkException("ClassNotFound with classloader: " + loader, cnf) - case ex => throw ex - } - // Mark finished and stop if we've finished all the tasks - finished(index) = true - if (tasksFinished == numTasks) { - sched.taskSetFinished(this) - } - } else { - logInfo("Ignoring task-finished event for TID " + tid + - " because task " + index + " is already finished") - } - } - - def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) { - val info = taskInfos(tid) - if (info.failed) { - // We might get two task-lost messages for the same task in coarse-grained Mesos mode, - // or even from Mesos itself when acks get delayed. - return - } - val index = info.index - info.markFailed() - decreaseRunningTasks(1) - if (!finished(index)) { - logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index)) - copiesRunning(index) -= 1 - // Check if the problem is a map output fetch failure. In that case, this - // task will never succeed on any node, so tell the scheduler about it. - if (serializedData != null && serializedData.limit() > 0) { - val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader) - reason match { - case fetchFailed: FetchFailed => - logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress) - sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null) - finished(index) = true - tasksFinished += 1 - sched.taskSetFinished(this) - decreaseRunningTasks(runningTasks) - return - - case taskResultTooBig: TaskResultTooBigFailure => - logInfo("Loss was due to task %s result exceeding Akka frame size; " + - "aborting job".format(tid)) - abort("Task %s result exceeded Akka frame size".format(tid)) - return - - case ef: ExceptionFailure => - sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null)) - val key = ef.description - val now = System.currentTimeMillis - val (printFull, dupCount) = { - if (recentExceptions.contains(key)) { - val (dupCount, printTime) = recentExceptions(key) - if (now - printTime > EXCEPTION_PRINT_INTERVAL) { - recentExceptions(key) = (0, now) - (true, 0) - } else { - recentExceptions(key) = (dupCount + 1, printTime) - (false, dupCount + 1) - } - } else { - recentExceptions(key) = (0, now) - (true, 0) - } - } - if (printFull) { - val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString)) - logInfo("Loss was due to %s\n%s\n%s".format( - ef.className, ef.description, locs.mkString("\n"))) - } else { - logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount)) - } - - case _ => {} - } - } - // On non-fetch failures, re-enqueue the task as pending for a max number of retries - addPendingTask(index) - // Count failed attempts only on FAILED and LOST state (not on KILLED) - if (state == TaskState.FAILED || state == TaskState.LOST) { - numFailures(index) += 1 - if (numFailures(index) > MAX_TASK_FAILURES) { - logError("Task %s:%d failed more than %d times; aborting job".format( - taskSet.id, index, MAX_TASK_FAILURES)) - abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES)) - } - } - } else { - logInfo("Ignoring task-lost event for TID " + tid + - " because task " + index + " is already finished") - } - } - - def error(message: String) { - // Save the error message - abort("Error: " + message) - } - - def abort(message: String) { - failed = true - causeOfFailure = message - // TODO: Kill running tasks if we were not terminated due to a Mesos error - sched.listener.taskSetFailed(taskSet, message) - decreaseRunningTasks(runningTasks) - sched.taskSetFinished(this) - } - - override def increaseRunningTasks(taskNum: Int) { - runningTasks += taskNum - if (parent != null) { - parent.increaseRunningTasks(taskNum) - } - } - - override def decreaseRunningTasks(taskNum: Int) { - runningTasks -= taskNum - if (parent != null) { - parent.decreaseRunningTasks(taskNum) - } - } - - //TODO: for now we just find Pool not TaskSetManager, we can extend this function in future if needed - override def getSchedulableByName(name: String): Schedulable = { - return null - } - - override def addSchedulable(schedulable:Schedulable) { - //nothing - } - - override def removeSchedulable(schedulable:Schedulable) { - //nothing - } - - override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { - var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] - sortedTaskSetQueue += this - return sortedTaskSetQueue - } - - override def executorLost(execId: String, hostPort: String) { - logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id) - - // If some task has preferred locations only on hostname, and there are no more executors there, - // put it in the no-prefs list to avoid the wait from delay scheduling - - // host local tasks - should we push this to rack local or no pref list ? For now, preserving behavior and moving to - // no prefs list. Note, this was done due to impliations related to 'waiting' for data local tasks, etc. - // Note: NOT checking process local list - since host local list is super set of that. We need to ad to no prefs only if - // there is no host local node for the task (not if there is no process local node for the task) - for (index <- getPendingTasksForHost(Utils.parseHostPort(hostPort)._1)) { - // val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL) - val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL) - if (newLocs.isEmpty) { - pendingTasksWithNoPrefs += index - } - } - - // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage - if (tasks(0).isInstanceOf[ShuffleMapTask]) { - for ((tid, info) <- taskInfos if info.executorId == execId) { - val index = taskInfos(tid).index - if (finished(index)) { - finished(index) = false - copiesRunning(index) -= 1 - tasksFinished -= 1 - addPendingTask(index) - // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our - // stage finishes when a total of tasks.size tasks finish. - sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null) - } - } - } - // Also re-enqueue any tasks that were running on the node - for ((tid, info) <- taskInfos if info.running && info.executorId == execId) { - taskLost(tid, TaskState.KILLED, null) - } - } - - /** - * Check for tasks to be speculated and return true if there are any. This is called periodically - * by the ClusterScheduler. - * - * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that - * we don't scan the whole task set. It might also help to make this sorted by launch time. - */ - override def checkSpeculatableTasks(): Boolean = { - // Can't speculate if we only have one task, or if all tasks have finished. - if (numTasks == 1 || tasksFinished == numTasks) { - return false - } - var foundTasks = false - val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt - logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation) - if (tasksFinished >= minFinishedForSpeculation) { - val time = System.currentTimeMillis() - val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray - Arrays.sort(durations) - val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1)) - val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100) - // TODO: Threshold should also look at standard deviation of task durations and have a lower - // bound based on that. - logDebug("Task length threshold for speculation: " + threshold) - for ((tid, info) <- taskInfos) { - val index = info.index - if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold && - !speculatableTasks.contains(index)) { - logInfo( - "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format( - taskSet.id, index, info.hostPort, threshold)) - speculatableTasks += index - foundTasks = true - } - } - } - return foundTasks - } - - override def hasPendingTasks(): Boolean = { - numTasks > 0 && tasksFinished < numTasks - } -} diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala deleted file mode 100644 index ac9e5ef94d..0000000000 --- a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.scheduler.cluster - -import spark.TaskState.TaskState -import java.nio.ByteBuffer -import spark.util.SerializableBuffer -import spark.Utils - -private[spark] sealed trait StandaloneClusterMessage extends Serializable - -// Driver to executors -private[spark] -case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage - -private[spark] -case class RegisteredExecutor(sparkProperties: Seq[(String, String)]) - extends StandaloneClusterMessage - -private[spark] -case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage - -// Executors to driver -private[spark] -case class RegisterExecutor(executorId: String, hostPort: String, cores: Int) - extends StandaloneClusterMessage { - Utils.checkHostPort(hostPort, "Expected host port") -} - -private[spark] -case class StatusUpdate(executorId: String, taskId: Long, state: TaskState, data: SerializableBuffer) - extends StandaloneClusterMessage - -private[spark] -object StatusUpdate { - /** Alternate factory method that takes a ByteBuffer directly for the data field */ - def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer): StatusUpdate = { - StatusUpdate(executorId, taskId, state, new SerializableBuffer(data)) - } -} - -// Internal messages in driver -private[spark] case object ReviveOffers extends StandaloneClusterMessage -private[spark] case object StopDriver extends StandaloneClusterMessage - -private[spark] case class RemoveExecutor(executorId: String, reason: String) - extends StandaloneClusterMessage diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala deleted file mode 100644 index 01de4ccb8f..0000000000 --- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.storage - -import java.io.{Externalizable, ObjectInput, ObjectOutput} - -import akka.actor.ActorRef - - -////////////////////////////////////////////////////////////////////////////////// -// Messages from the master to slaves. -////////////////////////////////////////////////////////////////////////////////// -private[spark] -sealed trait ToBlockManagerSlave - -// Remove a block from the slaves that have it. This can only be used to remove -// blocks that the master knows about. -private[spark] -case class RemoveBlock(blockId: String) extends ToBlockManagerSlave - -// Remove all blocks belonging to a specific RDD. -private[spark] case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave - - -////////////////////////////////////////////////////////////////////////////////// -// Messages from slaves to the master. -////////////////////////////////////////////////////////////////////////////////// -private[spark] -sealed trait ToBlockManagerMaster - -private[spark] -case class RegisterBlockManager( - blockManagerId: BlockManagerId, - maxMemSize: Long, - sender: ActorRef) - extends ToBlockManagerMaster - -private[spark] -case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster - -private[spark] -class UpdateBlockInfo( - var blockManagerId: BlockManagerId, - var blockId: String, - var storageLevel: StorageLevel, - var memSize: Long, - var diskSize: Long) - extends ToBlockManagerMaster - with Externalizable { - - def this() = this(null, null, null, 0, 0) // For deserialization only - - override def writeExternal(out: ObjectOutput) { - blockManagerId.writeExternal(out) - out.writeUTF(blockId) - storageLevel.writeExternal(out) - out.writeLong(memSize) - out.writeLong(diskSize) - } - - override def readExternal(in: ObjectInput) { - blockManagerId = BlockManagerId(in) - blockId = in.readUTF() - storageLevel = StorageLevel(in) - memSize = in.readLong() - diskSize = in.readLong() - } -} - -private[spark] -object UpdateBlockInfo { - def apply(blockManagerId: BlockManagerId, - blockId: String, - storageLevel: StorageLevel, - memSize: Long, - diskSize: Long): UpdateBlockInfo = { - new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize) - } - - // For pattern-matching - def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = { - Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize)) - } -} - -private[spark] -case class GetLocations(blockId: String) extends ToBlockManagerMaster - -private[spark] -case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster - -private[spark] -case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster - -private[spark] -case class RemoveExecutor(execId: String) extends ToBlockManagerMaster - -private[spark] -case object StopBlockManagerMaster extends ToBlockManagerMaster - -private[spark] -case object GetMemoryStatus extends ToBlockManagerMaster - -private[spark] -case object ExpireDeadHosts extends ToBlockManagerMaster - -private[spark] -case object GetStorageStatus extends ToBlockManagerMaster diff --git a/core/src/main/scala/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/spark/ui/jobs/IndexPage.scala deleted file mode 100644 index f31af3cda6..0000000000 --- a/core/src/main/scala/spark/ui/jobs/IndexPage.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.ui.jobs - -import java.util.Date - -import javax.servlet.http.HttpServletRequest - -import scala.Some -import scala.xml.{NodeSeq, Node} - -import spark.scheduler.Stage -import spark.ui.UIUtils._ -import spark.ui.Page._ -import spark.storage.StorageLevel - -/** Page showing list of all ongoing and recently finished stages */ -private[spark] class IndexPage(parent: JobProgressUI) { - def listener = parent.listener - val dateFmt = parent.dateFmt - - def render(request: HttpServletRequest): Seq[Node] = { - val activeStages = listener.activeStages.toSeq - val completedStages = listener.completedStages.reverse.toSeq - val failedStages = listener.failedStages.reverse.toSeq - - /** Special table which merges two header cells. */ - def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = { - <table class="table table-bordered table-striped table-condensed sortable"> - <thead> - <th>Stage Id</th> - <th>Origin</th> - <th>Submitted</th> - <th>Duration</th> - <th colspan="2">Tasks: Complete/Total</th> - <th>Shuffle Activity</th> - <th>Stored RDD</th> - </thead> - <tbody> - {rows.map(r => makeRow(r))} - </tbody> - </table> - } - - val activeStageTable: NodeSeq = stageTable(stageRow, activeStages) - val completedStageTable = stageTable(stageRow, completedStages) - val failedStageTable: NodeSeq = stageTable(stageRow, failedStages) - - val content = <h2>Active Stages</h2> ++ activeStageTable ++ - <h2>Completed Stages</h2> ++ completedStageTable ++ - <h2>Failed Stages</h2> ++ failedStageTable - - headerSparkPage(content, parent.sc, "Spark Stages", Jobs) - } - - def getElapsedTime(submitted: Option[Long], completed: Long): String = { - submitted match { - case Some(t) => parent.formatDuration(completed - t) - case _ => "Unknown" - } - } - - def makeProgressBar(completed: Int, total: Int): Seq[Node] = { - val width=130 - val height=15 - val completeWidth = (completed.toDouble / total) * width - - <svg width={width.toString} height={height.toString}> - <rect width={width.toString} height={height.toString} - fill="white" stroke="rgb(51,51,51)" stroke-width="1" /> - <rect width={completeWidth.toString} height={height.toString} - fill="rgb(0,136,204)" stroke="black" stroke-width="1" /> - </svg> - } - - - def stageRow(s: Stage): Seq[Node] = { - val submissionTime = s.submissionTime match { - case Some(t) => dateFmt.format(new Date(t)) - case None => "Unknown" - } - val (read, write) = (listener.hasShuffleRead(s.id), listener.hasShuffleWrite(s.id)) - val shuffleInfo = (read, write) match { - case (true, true) => "Read/Write" - case (true, false) => "Read" - case (false, true) => "Write" - case _ => "" - } - val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0) - val totalTasks = s.numPartitions - - <tr> - <td>{s.id}</td> - <td><a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a></td> - <td>{submissionTime}</td> - <td>{getElapsedTime(s.submissionTime, - s.completionTime.getOrElse(System.currentTimeMillis()))}</td> - <td class="progress-cell">{makeProgressBar(completedTasks, totalTasks)}</td> - <td style="border-left: 0; text-align: center;">{completedTasks} / {totalTasks} - {listener.stageToTasksFailed.getOrElse(s.id, 0) match { - case f if f > 0 => "(%s failed)".format(f) - case _ => - }} - </td> - <td>{shuffleInfo}</td> - <td>{if (s.rdd.getStorageLevel != StorageLevel.NONE) { - <a href={"/storage/rdd?id=%s".format(s.rdd.id)}> - {Option(s.rdd.name).getOrElse(s.rdd.id)} - </a> - }} - </td> - </tr> - } -} diff --git a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala deleted file mode 100644 index 44dcf82d11..0000000000 --- a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.ui.jobs - -import akka.util.Duration - -import java.text.SimpleDateFormat - -import javax.servlet.http.HttpServletRequest - -import org.eclipse.jetty.server.Handler - -import scala.Seq -import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer} - -import spark.ui.JettyUtils._ -import spark.{ExceptionFailure, SparkContext, Success, Utils} -import spark.scheduler._ -import spark.scheduler.cluster.TaskInfo -import spark.executor.TaskMetrics -import collection.mutable - -/** Web UI showing progress status of all jobs in the given SparkContext. */ -private[spark] class JobProgressUI(val sc: SparkContext) { - private var _listener: Option[JobProgressListener] = None - def listener = _listener.get - val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") - - private val indexPage = new IndexPage(this) - private val stagePage = new StagePage(this) - - def start() { - _listener = Some(new JobProgressListener) - sc.addSparkListener(listener) - } - - def formatDuration(ms: Long) = Utils.msDurationToString(ms) - - def getHandlers = Seq[(String, Handler)]( - ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)), - ("/stages", (request: HttpServletRequest) => indexPage.render(request)) - ) -} - -private[spark] class JobProgressListener extends SparkListener { - // How many stages to remember - val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt - - val activeStages = HashSet[Stage]() - val completedStages = ListBuffer[Stage]() - val failedStages = ListBuffer[Stage]() - - val stageToTasksComplete = HashMap[Int, Int]() - val stageToTasksFailed = HashMap[Int, Int]() - val stageToTaskInfos = - HashMap[Int, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]() - - override def onJobStart(jobStart: SparkListenerJobStart) {} - - override def onStageCompleted(stageCompleted: StageCompleted) = { - val stage = stageCompleted.stageInfo.stage - activeStages -= stage - completedStages += stage - trimIfNecessary(completedStages) - } - - /** If stages is too large, remove and garbage collect old stages */ - def trimIfNecessary(stages: ListBuffer[Stage]) { - if (stages.size > RETAINED_STAGES) { - val toRemove = RETAINED_STAGES / 10 - stages.takeRight(toRemove).foreach( s => { - stageToTaskInfos.remove(s.id) - }) - stages.trimEnd(toRemove) - } - } - - override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = - activeStages += stageSubmitted.stage - - override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { - val sid = taskEnd.task.stageId - val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) = - taskEnd.reason match { - case e: ExceptionFailure => - stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1 - (Some(e), e.metrics) - case _ => - stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1 - (None, Some(taskEnd.taskMetrics)) - } - val taskList = stageToTaskInfos.getOrElse( - sid, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]()) - taskList += ((taskEnd.taskInfo, metrics, failureInfo)) - stageToTaskInfos(sid) = taskList - } - - override def onJobEnd(jobEnd: SparkListenerJobEnd) { - jobEnd match { - case end: SparkListenerJobEnd => - end.jobResult match { - case JobFailed(ex, Some(stage)) => - activeStages -= stage - failedStages += stage - trimIfNecessary(failedStages) - case _ => - } - case _ => - } - } - - /** Is this stage's input from a shuffle read. */ - def hasShuffleRead(stageID: Int): Boolean = { - // This is written in a slightly complicated way to avoid having to scan all tasks - for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) { - if (s._2 != null) return s._2.flatMap(m => m.shuffleReadMetrics).isDefined - } - return false // No tasks have finished for this stage - } - - /** Is this stage's output to a shuffle write. */ - def hasShuffleWrite(stageID: Int): Boolean = { - // This is written in a slightly complicated way to avoid having to scan all tasks - for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) { - if (s._2 != null) return s._2.flatMap(m => m.shuffleWriteMetrics).isDefined - } - return false // No tasks have finished for this stage - } -} diff --git a/core/src/main/scala/spark/ui/jobs/StagePage.scala b/core/src/main/scala/spark/ui/jobs/StagePage.scala deleted file mode 100644 index 292966f23a..0000000000 --- a/core/src/main/scala/spark/ui/jobs/StagePage.scala +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.ui.jobs - -import java.util.Date - -import javax.servlet.http.HttpServletRequest - -import scala.xml.Node - -import spark.ui.UIUtils._ -import spark.ui.Page._ -import spark.util.Distribution -import spark.{ExceptionFailure, Utils} -import spark.scheduler.cluster.TaskInfo -import spark.executor.TaskMetrics - -/** Page showing statistics and task list for a given stage */ -private[spark] class StagePage(parent: JobProgressUI) { - def listener = parent.listener - val dateFmt = parent.dateFmt - - def render(request: HttpServletRequest): Seq[Node] = { - val stageId = request.getParameter("id").toInt - - if (!listener.stageToTaskInfos.contains(stageId)) { - val content = - <div> - <h2>Summary Metrics</h2> No tasks have finished yet - <h2>Tasks</h2> No tasks have finished yet - </div> - return headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs) - } - - val tasks = listener.stageToTaskInfos(stageId) - - val shuffleRead = listener.hasShuffleRead(stageId) - val shuffleWrite = listener.hasShuffleWrite(stageId) - - val taskHeaders: Seq[String] = - Seq("Task ID", "Duration", "Locality Level", "Worker", "Launch Time") ++ - {if (shuffleRead) Seq("Shuffle Read") else Nil} ++ - {if (shuffleWrite) Seq("Shuffle Write") else Nil} ++ - Seq("Details") - - val taskTable = listingTable(taskHeaders, taskRow, tasks) - - // Excludes tasks which failed and have incomplete metrics - val validTasks = tasks.filter(t => Option(t._2).isDefined) - - val summaryTable: Option[Seq[Node]] = - if (validTasks.size == 0) { - None - } - else { - val serviceTimes = validTasks.map{case (info, metrics, exception) => - metrics.get.executorRunTime.toDouble} - val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map( - ms => parent.formatDuration(ms.toLong)) - - def getQuantileCols(data: Seq[Double]) = - Distribution(data).get.getQuantiles().map(d => Utils.memoryBytesToString(d.toLong)) - - val shuffleReadSizes = validTasks.map { - case(info, metrics, exception) => - metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble - } - val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes) - - val shuffleWriteSizes = validTasks.map { - case(info, metrics, exception) => - metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble - } - val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes) - - val listings: Seq[Seq[String]] = Seq(serviceQuantiles, - if (shuffleRead) shuffleReadQuantiles else Nil, - if (shuffleWrite) shuffleWriteQuantiles else Nil) - - val quantileHeaders = Seq("Metric", "Min", "25%", "50%", "75%", "Max") - def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr> - Some(listingTable(quantileHeaders, quantileRow, listings)) - } - - val content = - <h2>Summary Metrics</h2> ++ summaryTable.getOrElse(Nil) ++ <h2>Tasks</h2> ++ taskTable; - - headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs) - } - - - def taskRow(taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = { - def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] = - trace.map(e => <span style="display:block;">{e.toString}</span>) - val (info, metrics, exception) = taskData - <tr> - <td>{info.taskId}</td> - <td sorttable_customkey={metrics.map{m => m.executorRunTime.toString}.getOrElse("1")}> - {metrics.map{m => parent.formatDuration(m.executorRunTime)}.getOrElse("")} - </td> - <td>{info.taskLocality}</td> - <td>{info.hostPort}</td> - <td>{dateFmt.format(new Date(info.launchTime))}</td> - {metrics.flatMap{m => m.shuffleReadMetrics}.map{s => - <td>{Utils.memoryBytesToString(s.remoteBytesRead)}</td>}.getOrElse("")} - {metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => - <td>{Utils.memoryBytesToString(s.shuffleBytesWritten)}</td>}.getOrElse("")} - <td>{exception.map(e => - <span> - {e.className} ({e.description})<br/> - {fmtStackTrace(e.stackTrace)} - </span>).getOrElse("")} - </td> - </tr> - } -} diff --git a/core/src/test/resources/test_metrics_config.properties b/core/src/test/resources/test_metrics_config.properties new file mode 100644 index 0000000000..056a158456 --- /dev/null +++ b/core/src/test/resources/test_metrics_config.properties @@ -0,0 +1,6 @@ +*.sink.console.period = 10 +*.sink.console.unit = seconds +*.source.jvm.class = org.apache.spark.metrics.source.JvmSource +master.sink.console.period = 20 +master.sink.console.unit = minutes + diff --git a/core/src/test/resources/test_metrics_system.properties b/core/src/test/resources/test_metrics_system.properties new file mode 100644 index 0000000000..6f5ecea93a --- /dev/null +++ b/core/src/test/resources/test_metrics_system.properties @@ -0,0 +1,7 @@ +*.sink.console.period = 10 +*.sink.console.unit = seconds +test.sink.console.class = org.apache.spark.metrics.sink.ConsoleSink +test.sink.dummy.class = org.apache.spark.metrics.sink.DummySink +test.source.dummy.class = org.apache.spark.metrics.source.DummySource +test.sink.console.period = 20 +test.sink.console.unit = minutes diff --git a/core/src/test/scala/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala index 0af175f316..4434f3b87c 100644 --- a/core/src/test/scala/spark/AccumulatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers @@ -23,7 +23,7 @@ import collection.mutable import java.util.Random import scala.math.exp import scala.math.signum -import spark.SparkContext._ +import org.apache.spark.SparkContext._ class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkContext { diff --git a/core/src/test/scala/spark/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/BroadcastSuite.scala index 785721ece8..b3a53d928b 100644 --- a/core/src/test/scala/spark/BroadcastSuite.scala +++ b/core/src/test/scala/org/apache/spark/BroadcastSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index a84c89e3c9..d9103aebb7 100644 --- a/core/src/test/scala/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -15,13 +15,14 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite import java.io.File -import spark.rdd._ -import spark.SparkContext._ +import org.apache.spark.rdd._ +import org.apache.spark.SparkContext._ import storage.StorageLevel +import org.apache.spark.util.Utils class CheckpointSuite extends FunSuite with LocalSparkContext with Logging { initLogging() @@ -99,7 +100,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging { test("ShuffledRDD") { testCheckpointing(rdd => { // Creating ShuffledRDD directly as PairRDDFunctions.combineByKey produces a MapPartitionedRDD - new ShuffledRDD(rdd.map(x => (x % 2, 1)), partitioner) + new ShuffledRDD[Int, Int, (Int, Int)](rdd.map(x => (x % 2, 1)), partitioner) }) } diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala index e11efe459c..7a856d4081 100644 --- a/core/src/test/scala/spark/DistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import network.ConnectionManagerId import org.scalatest.FunSuite diff --git a/core/src/test/scala/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala index ed16b9d8ef..01a72d8401 100644 --- a/core/src/test/scala/spark/DriverSuite.scala +++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io.File @@ -26,6 +26,7 @@ import org.scalatest.FunSuite import org.scalatest.concurrent.Timeouts import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.time.SpanSugar._ +import org.apache.spark.util.Utils class DriverSuite extends FunSuite with Timeouts { test("driver should exit after finishing") { @@ -34,7 +35,7 @@ class DriverSuite extends FunSuite with Timeouts { val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]")) forAll(masters) { (master: String) => failAfter(30 seconds) { - Utils.execute(Seq("./run", "spark.DriverWithoutCleanup", master), + Utils.execute(Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master), new File(System.getenv("SPARK_HOME"))) } } diff --git a/core/src/test/scala/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala index 6c847b8fef..af448fcb37 100644 --- a/core/src/test/scala/spark/FailureSuite.scala +++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala @@ -15,14 +15,12 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite -import org.scalatest.prop.Checkers - -import scala.collection.mutable.ArrayBuffer import SparkContext._ +import org.apache.spark.util.NonSerializable // Common state shared by FailureSuite-launched tasks. We use a global object // for this because any local variables used in the task closures will rightfully @@ -40,7 +38,7 @@ object FailureSuiteState { } class FailureSuite extends FunSuite with LocalSparkContext { - + // Run a 3-task map job in which task 1 deterministically fails once, and check // whether the job completes successfully and we ran 4 tasks in total. test("failure in a single-stage job") { @@ -66,7 +64,7 @@ class FailureSuite extends FunSuite with LocalSparkContext { test("failure in a two-stage job") { sc = new SparkContext("local[1,1]", "test") val results = sc.makeRDD(1 to 3).map(x => (x, x)).groupByKey(3).map { - case (k, v) => + case (k, v) => FailureSuiteState.synchronized { FailureSuiteState.tasksRun += 1 if (k == 1 && FailureSuiteState.tasksFailed == 0) { @@ -87,12 +85,40 @@ class FailureSuite extends FunSuite with LocalSparkContext { sc = new SparkContext("local[1,1]", "test") val results = sc.makeRDD(1 to 3).map(x => new NonSerializable) - val thrown = intercept[spark.SparkException] { + val thrown = intercept[SparkException] { results.collect() } - assert(thrown.getClass === classOf[spark.SparkException]) + assert(thrown.getClass === classOf[SparkException]) + assert(thrown.getMessage.contains("NotSerializableException")) + + FailureSuiteState.clear() + } + + test("failure because task closure is not serializable") { + sc = new SparkContext("local[1,1]", "test") + val a = new NonSerializable + + // Non-serializable closure in the final result stage + val thrown = intercept[SparkException] { + sc.parallelize(1 to 10, 2).map(x => a).count() + } + assert(thrown.getClass === classOf[SparkException]) assert(thrown.getMessage.contains("NotSerializableException")) + // Non-serializable closure in an earlier stage + val thrown1 = intercept[SparkException] { + sc.parallelize(1 to 10, 2).map(x => (x, a)).partitionBy(new HashPartitioner(3)).count() + } + assert(thrown1.getClass === classOf[SparkException]) + assert(thrown1.getMessage.contains("NotSerializableException")) + + // Non-serializable closure in foreach function + val thrown2 = intercept[SparkException] { + sc.parallelize(1 to 10, 2).foreach(x => println(a)) + } + assert(thrown2.getClass === classOf[SparkException]) + assert(thrown2.getMessage.contains("NotSerializableException")) + FailureSuiteState.clear() } diff --git a/core/src/test/scala/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala index 242ae971f8..35d1d41af1 100644 --- a/core/src/test/scala/spark/FileServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import com.google.common.io.Files import org.scalatest.FunSuite diff --git a/core/src/test/scala/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index 1e2c257c4b..7b82a4cdd9 100644 --- a/core/src/test/scala/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.io.{FileWriter, PrintWriter, File} diff --git a/core/src/test/scala/spark/JavaAPISuite.java b/core/src/test/scala/org/apache/spark/JavaAPISuite.java index aaf03e683b..8a869c9005 100644 --- a/core/src/test/scala/spark/JavaAPISuite.java +++ b/core/src/test/scala/org/apache/spark/JavaAPISuite.java @@ -15,13 +15,14 @@ * limitations under the License. */ -package spark; +package org.apache.spark; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.util.*; +import com.google.common.base.Optional; import scala.Tuple2; import com.google.common.base.Charsets; @@ -37,15 +38,15 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import spark.api.java.JavaDoubleRDD; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.*; -import spark.partial.BoundedDouble; -import spark.partial.PartialResult; -import spark.storage.StorageLevel; -import spark.util.StatCounter; +import org.apache.spark.api.java.JavaDoubleRDD; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.*; +import org.apache.spark.partial.BoundedDouble; +import org.apache.spark.partial.PartialResult; +import org.apache.spark.storage.StorageLevel; +import org.apache.spark.util.StatCounter; // The test suite itself is Serializable so that anonymous Function implementations can be @@ -198,6 +199,35 @@ public class JavaAPISuite implements Serializable { } @Test + public void leftOuterJoin() { + JavaPairRDD<Integer, Integer> rdd1 = sc.parallelizePairs(Arrays.asList( + new Tuple2<Integer, Integer>(1, 1), + new Tuple2<Integer, Integer>(1, 2), + new Tuple2<Integer, Integer>(2, 1), + new Tuple2<Integer, Integer>(3, 1) + )); + JavaPairRDD<Integer, Character> rdd2 = sc.parallelizePairs(Arrays.asList( + new Tuple2<Integer, Character>(1, 'x'), + new Tuple2<Integer, Character>(2, 'y'), + new Tuple2<Integer, Character>(2, 'z'), + new Tuple2<Integer, Character>(4, 'w') + )); + List<Tuple2<Integer,Tuple2<Integer,Optional<Character>>>> joined = + rdd1.leftOuterJoin(rdd2).collect(); + Assert.assertEquals(5, joined.size()); + Tuple2<Integer,Tuple2<Integer,Optional<Character>>> firstUnmatched = + rdd1.leftOuterJoin(rdd2).filter( + new Function<Tuple2<Integer, Tuple2<Integer, Optional<Character>>>, Boolean>() { + @Override + public Boolean call(Tuple2<Integer, Tuple2<Integer, Optional<Character>>> tup) + throws Exception { + return !tup._2()._2().isPresent(); + } + }).first(); + Assert.assertEquals(3, firstUnmatched._1().intValue()); + } + + @Test public void foldReduce() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); Function2<Integer, Integer, Integer> add = new Function2<Integer, Integer, Integer>() { @@ -326,7 +356,9 @@ public class JavaAPISuite implements Serializable { Assert.assertEquals(20/6.0, rdd.mean(), 0.01); Assert.assertEquals(20/6.0, rdd.mean(), 0.01); Assert.assertEquals(6.22222, rdd.variance(), 0.01); + Assert.assertEquals(7.46667, rdd.sampleVariance(), 0.01); Assert.assertEquals(2.49444, rdd.stdev(), 0.01); + Assert.assertEquals(2.73252, rdd.sampleStdev(), 0.01); Double first = rdd.first(); List<Double> take = rdd.take(5); @@ -716,7 +748,7 @@ public class JavaAPISuite implements Serializable { } }; - JavaRDD<Integer> sizes = rdd1.zipPartitions(sizesFn, rdd2); + JavaRDD<Integer> sizes = rdd1.zipPartitions(rdd2, sizesFn); Assert.assertEquals("[3, 2, 3, 2]", sizes.collect().toString()); } diff --git a/core/src/test/scala/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala index ddc212d290..6ec124da9c 100644 --- a/core/src/test/scala/spark/LocalSparkContext.scala +++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.Suite import org.scalatest.BeforeAndAfterEach diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index ce6cec0451..6013320eaa 100644 --- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite import akka.actor._ -import spark.scheduler.MapStatus -import spark.storage.BlockManagerId -import spark.util.AkkaUtils +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage.BlockManagerId +import org.apache.spark.util.AkkaUtils class MapOutputTrackerSuite extends FunSuite with LocalSparkContext { @@ -112,22 +112,22 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext { "akka://spark@localhost:" + boundPort + "/user/MapOutputTracker") masterTracker.registerShuffle(10, 1) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) } val compressedSize1000 = MapOutputTracker.compressSize(1000L) val size1000 = MapOutputTracker.decompressSize(compressedSize1000) masterTracker.registerMapOutput(10, 0, new MapStatus( BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000))) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) assert(slaveTracker.getServerStatuses(10, 0).toSeq === Seq((BlockManagerId("a", "hostA", 1000, 0), size1000))) masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0)) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) } // failure should be cached diff --git a/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala b/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala new file mode 100644 index 0000000000..5a18dd13ff --- /dev/null +++ b/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala @@ -0,0 +1,28 @@ +package org.apache.spark + +import org.scalatest.FunSuite +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.{RDD, PartitionPruningRDD} + + +class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext { + + test("Pruned Partitions inherit locality prefs correctly") { + class TestPartition(i: Int) extends Partition { + def index = i + } + val rdd = new RDD[Int](sc, Nil) { + override protected def getPartitions = { + Array[Partition]( + new TestPartition(1), + new TestPartition(2), + new TestPartition(3)) + } + def compute(split: Partition, context: TaskContext) = {Iterator()} + } + val prunedRDD = PartitionPruningRDD.create(rdd, {x => if (x==2) true else false}) + val p = prunedRDD.partitions(0) + assert(p.index == 2) + assert(prunedRDD.partitions.length == 1) + } +} diff --git a/core/src/test/scala/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala index b1e0b2b4d0..7d938917f2 100644 --- a/core/src/test/scala/spark/PartitioningSuite.scala +++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala @@ -15,13 +15,16 @@ * limitations under the License. */ -package spark +package org.apache.spark -import org.scalatest.FunSuite -import scala.collection.mutable.ArrayBuffer -import SparkContext._ -import spark.util.StatCounter import scala.math.abs +import scala.collection.mutable.ArrayBuffer + +import org.scalatest.FunSuite + +import org.apache.spark.SparkContext._ +import org.apache.spark.util.StatCounter +import org.apache.spark.rdd.RDD class PartitioningSuite extends FunSuite with SharedSparkContext { diff --git a/core/src/test/scala/spark/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala index 35c04710a3..2e851d892d 100644 --- a/core/src/test/scala/spark/PipedRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite import SparkContext._ diff --git a/core/src/test/scala/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala index 70c24515be..97cbca09bf 100644 --- a/core/src/test/scala/spark/SharedSparkContext.scala +++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.Suite import org.scalatest.BeforeAndAfterAll diff --git a/core/src/test/scala/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala index 6bad6c1d13..e121b162ad 100644 --- a/core/src/test/scala/spark/ShuffleNettySuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.BeforeAndAfterAll diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala index 3a56c26bef..db717865db 100644 --- a/core/src/test/scala/spark/ShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala @@ -15,27 +15,22 @@ * limitations under the License. */ -package spark - -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashSet +package org.apache.spark import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers -import org.scalatest.prop.Checkers -import org.scalacheck.Arbitrary._ -import org.scalacheck.Gen -import org.scalacheck.Prop._ -import com.google.common.io.Files +import org.apache.spark.SparkContext._ +import org.apache.spark.ShuffleSuite.NonJavaSerializableClass +import org.apache.spark.rdd.{RDD, SubtractedRDD, CoGroupedRDD, OrderedRDDFunctions, ShuffledRDD} +import org.apache.spark.util.MutablePair +import org.apache.spark.serializer.KryoSerializer -import spark.rdd.ShuffledRDD -import spark.SparkContext._ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { - test("groupByKey with compression") { + test("groupByKey without compression") { try { - System.setProperty("spark.shuffle.compress", "true") + System.setProperty("spark.shuffle.compress", "false") sc = new SparkContext("local", "test") val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4) val groups = pairs.groupByKey(4).collect() @@ -45,7 +40,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { val valuesFor2 = groups.find(_._1 == 2).get._2 assert(valuesFor2.toList.sorted === List(1)) } finally { - System.setProperty("spark.blockManager.compress", "false") + System.setProperty("spark.shuffle.compress", "true") } } @@ -55,12 +50,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { val a = sc.parallelize(1 to 10, 2) val b = a.map { x => - (x, new ShuffleSuite.NonJavaSerializableClass(x * 2)) + (x, new NonJavaSerializableClass(x * 2)) } // If the Kryo serializer is not used correctly, the shuffle would fail because the // default Java serializer cannot handle the non serializable class. - val c = new ShuffledRDD(b, new HashPartitioner(NUM_BLOCKS), - classOf[spark.KryoSerializer].getName) + val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)]( + b, new HashPartitioner(NUM_BLOCKS)).setSerializer(classOf[KryoSerializer].getName) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId assert(c.count === 10) @@ -77,11 +72,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { sc = new SparkContext("local-cluster[2,1,512]", "test") val a = sc.parallelize(1 to 10, 2) val b = a.map { x => - (x, new ShuffleSuite.NonJavaSerializableClass(x * 2)) + (x, new NonJavaSerializableClass(x * 2)) } // If the Kryo serializer is not used correctly, the shuffle would fail because the // default Java serializer cannot handle the non serializable class. - val c = new ShuffledRDD(b, new HashPartitioner(3), classOf[spark.KryoSerializer].getName) + val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)]( + b, new HashPartitioner(3)).setSerializer(classOf[KryoSerializer].getName) assert(c.count === 10) } @@ -96,7 +92,8 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { // NOTE: The default Java serializer doesn't create zero-sized blocks. // So, use Kryo - val c = new ShuffledRDD(b, new HashPartitioner(10), classOf[spark.KryoSerializer].getName) + val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10)) + .setSerializer(classOf[KryoSerializer].getName) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId assert(c.count === 4) @@ -121,7 +118,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { val b = a.map(x => (x, x*2)) // NOTE: The default Java serializer should create zero-sized blocks - val c = new ShuffledRDD(b, new HashPartitioner(10)) + val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10)) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId assert(c.count === 4) @@ -135,6 +132,72 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { // We should have at most 4 non-zero sized partitions assert(nonEmptyBlocks.size <= 4) } + + test("shuffle using mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1)) + val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2) + val results = new ShuffledRDD[Int, Int, MutablePair[Int, Int]](pairs, new HashPartitioner(2)) + .collect() + + data.foreach { pair => results should contain (pair) } + } + + test("sorting using mutable pairs") { + // This is not in SortingSuite because of the local cluster setup. + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data = Array(p(1, 11), p(3, 33), p(100, 100), p(2, 22)) + val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2) + val results = new OrderedRDDFunctions[Int, Int, MutablePair[Int, Int]](pairs) + .sortByKey().collect() + results(0) should be (p(1, 11)) + results(1) should be (p(2, 22)) + results(2) should be (p(3, 33)) + results(3) should be (p(100, 100)) + } + + test("cogroup using mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1)) + val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"), p(3, "3")) + val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2) + val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2) + val results = new CoGroupedRDD[Int](Seq(pairs1, pairs2), new HashPartitioner(2)).collectAsMap() + + assert(results(1)(0).length === 3) + assert(results(1)(0).contains(1)) + assert(results(1)(0).contains(2)) + assert(results(1)(0).contains(3)) + assert(results(1)(1).length === 2) + assert(results(1)(1).contains("11")) + assert(results(1)(1).contains("12")) + assert(results(2)(0).length === 1) + assert(results(2)(0).contains(1)) + assert(results(2)(1).length === 1) + assert(results(2)(1).contains("22")) + assert(results(3)(0).length === 0) + assert(results(3)(1).contains("3")) + } + + test("subtract mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1), p(3, 33)) + val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22")) + val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2) + val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2) + val results = new SubtractedRDD(pairs1, pairs2, new HashPartitioner(2)).collect() + results should have length (1) + // substracted rdd return results as Tuple2 + results(0) should be ((3, 33)) + } } object ShuffleSuite { diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala new file mode 100644 index 0000000000..939fe51801 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.scalatest.FunSuite +import org.apache.spark.SparkContext._ + +class SparkContextInfoSuite extends FunSuite with LocalSparkContext { + test("getPersistentRDDs only returns RDDs that are marked as cached") { + sc = new SparkContext("local", "test") + assert(sc.getPersistentRDDs.isEmpty === true) + + val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2) + assert(sc.getPersistentRDDs.isEmpty === true) + + rdd.cache() + assert(sc.getPersistentRDDs.size === 1) + assert(sc.getPersistentRDDs.values.head === rdd) + } + + test("getPersistentRDDs returns an immutable map") { + sc = new SparkContext("local", "test") + val rdd1 = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() + + val myRdds = sc.getPersistentRDDs + assert(myRdds.size === 1) + assert(myRdds.values.head === rdd1) + + val rdd2 = sc.makeRDD(Array(5, 6, 7, 8), 1).cache() + + // getPersistentRDDs should have 2 RDDs, but myRdds should not change + assert(sc.getPersistentRDDs.size === 2) + assert(myRdds.size === 1) + } + + test("getRDDStorageInfo only reports on RDDs that actually persist data") { + sc = new SparkContext("local", "test") + val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() + + assert(sc.getRDDStorageInfo.size === 0) + + rdd.collect() + assert(sc.getRDDStorageInfo.size === 1) + } +} diff --git a/core/src/test/scala/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala index f2acd0bd3c..69383ddfb8 100644 --- a/core/src/test/scala/spark/ThreadingSuite.scala +++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import java.util.concurrent.Semaphore import java.util.concurrent.atomic.AtomicBoolean diff --git a/core/src/test/scala/spark/UnpersistSuite.scala b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala index 93977d16f4..46a2da1724 100644 --- a/core/src/test/scala/spark/UnpersistSuite.scala +++ b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.FunSuite import org.scalatest.concurrent.Timeouts._ import org.scalatest.time.{Span, Millis} -import spark.SparkContext._ +import org.apache.spark.SparkContext._ class UnpersistSuite extends FunSuite with LocalSparkContext { test("unpersist RDD") { diff --git a/core/src/test/scala/spark/ZippedPartitionsSuite.scala b/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala index 5e6d7b09d8..618b9c113b 100644 --- a/core/src/test/scala/spark/ZippedPartitionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark import scala.collection.immutable.NumericRange @@ -40,7 +40,7 @@ class ZippedPartitionsSuite extends FunSuite with SharedSparkContext { val data2 = sc.makeRDD(Array("1", "2", "3", "4", "5", "6"), 2) val data3 = sc.makeRDD(Array(1.0, 2.0), 2) - val zippedRDD = data1.zipPartitions(ZippedPartitionsSuite.procZippedData, data2, data3) + val zippedRDD = data1.zipPartitions(data2, data3)(ZippedPartitionsSuite.procZippedData) val obtainedSizes = zippedRDD.collect() val expectedSizes = Array(2, 3, 1, 2, 3, 1) diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala new file mode 100644 index 0000000000..fd6f69041a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.io + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + +import org.scalatest.FunSuite + + +class CompressionCodecSuite extends FunSuite { + + def testCodec(codec: CompressionCodec) { + // Write 1000 integers to the output stream, compressed. + val outputStream = new ByteArrayOutputStream() + val out = codec.compressedOutputStream(outputStream) + for (i <- 1 until 1000) { + out.write(i % 256) + } + out.close() + + // Read the 1000 integers back. + val inputStream = new ByteArrayInputStream(outputStream.toByteArray) + val in = codec.compressedInputStream(inputStream) + for (i <- 1 until 1000) { + assert(in.read() === i % 256) + } + in.close() + } + + test("default compression codec") { + val codec = CompressionCodec.createCodec() + assert(codec.getClass === classOf[SnappyCompressionCodec]) + testCodec(codec) + } + + test("lzf compression codec") { + val codec = CompressionCodec.createCodec(classOf[LZFCompressionCodec].getName) + assert(codec.getClass === classOf[LZFCompressionCodec]) + testCodec(codec) + } + + test("snappy compression codec") { + val codec = CompressionCodec.createCodec(classOf[SnappyCompressionCodec].getName) + assert(codec.getClass === classOf[SnappyCompressionCodec]) + testCodec(codec) + } +} diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala new file mode 100644 index 0000000000..58c94a162d --- /dev/null +++ b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics + +import org.scalatest.{BeforeAndAfter, FunSuite} + +class MetricsConfigSuite extends FunSuite with BeforeAndAfter { + var filePath: String = _ + + before { + filePath = getClass.getClassLoader.getResource("test_metrics_config.properties").getFile() + } + + test("MetricsConfig with default properties") { + val conf = new MetricsConfig(Option("dummy-file")) + conf.initialize() + + assert(conf.properties.size() === 5) + assert(conf.properties.getProperty("test-for-dummy") === null) + + val property = conf.getInstance("random") + assert(property.size() === 3) + assert(property.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet") + assert(property.getProperty("sink.servlet.uri") === "/metrics/json") + assert(property.getProperty("sink.servlet.sample") === "false") + } + + test("MetricsConfig with properties set") { + val conf = new MetricsConfig(Option(filePath)) + conf.initialize() + + val masterProp = conf.getInstance("master") + assert(masterProp.size() === 6) + assert(masterProp.getProperty("sink.console.period") === "20") + assert(masterProp.getProperty("sink.console.unit") === "minutes") + assert(masterProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource") + assert(masterProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet") + assert(masterProp.getProperty("sink.servlet.uri") === "/metrics/master/json") + assert(masterProp.getProperty("sink.servlet.sample") === "false") + + val workerProp = conf.getInstance("worker") + assert(workerProp.size() === 6) + assert(workerProp.getProperty("sink.console.period") === "10") + assert(workerProp.getProperty("sink.console.unit") === "seconds") + assert(workerProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource") + assert(workerProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet") + assert(workerProp.getProperty("sink.servlet.uri") === "/metrics/json") + assert(workerProp.getProperty("sink.servlet.sample") === "false") + } + + test("MetricsConfig with subProperties") { + val conf = new MetricsConfig(Option(filePath)) + conf.initialize() + + val propCategories = conf.propertyCategories + assert(propCategories.size === 3) + + val masterProp = conf.getInstance("master") + val sourceProps = conf.subProperties(masterProp, MetricsSystem.SOURCE_REGEX) + assert(sourceProps.size === 1) + assert(sourceProps("jvm").getProperty("class") === "org.apache.spark.metrics.source.JvmSource") + + val sinkProps = conf.subProperties(masterProp, MetricsSystem.SINK_REGEX) + assert(sinkProps.size === 2) + assert(sinkProps.contains("console")) + assert(sinkProps.contains("servlet")) + + val consoleProps = sinkProps("console") + assert(consoleProps.size() === 2) + + val servletProps = sinkProps("servlet") + assert(servletProps.size() === 3) + } +} diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala new file mode 100644 index 0000000000..7181333adf --- /dev/null +++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics + +import org.scalatest.{BeforeAndAfter, FunSuite} +import org.apache.spark.deploy.master.MasterSource + +class MetricsSystemSuite extends FunSuite with BeforeAndAfter { + var filePath: String = _ + + before { + filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile() + System.setProperty("spark.metrics.conf", filePath) + } + + test("MetricsSystem with default config") { + val metricsSystem = MetricsSystem.createMetricsSystem("default") + val sources = metricsSystem.sources + val sinks = metricsSystem.sinks + + assert(sources.length === 0) + assert(sinks.length === 0) + assert(!metricsSystem.getServletHandlers.isEmpty) + } + + test("MetricsSystem with sources add") { + val metricsSystem = MetricsSystem.createMetricsSystem("test") + val sources = metricsSystem.sources + val sinks = metricsSystem.sinks + + assert(sources.length === 0) + assert(sinks.length === 1) + assert(!metricsSystem.getServletHandlers.isEmpty) + + val source = new MasterSource(null) + metricsSystem.registerSource(source) + assert(sources.length === 1) + } +} diff --git a/core/src/test/scala/spark/rdd/JdbcRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala index dc8ca941c1..3d39a31252 100644 --- a/core/src/test/scala/spark/rdd/JdbcRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark +package org.apache.spark import org.scalatest.{ BeforeAndAfter, FunSuite } -import spark.SparkContext._ -import spark.rdd.JdbcRDD +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.JdbcRDD import java.sql._ class JdbcRDDSuite extends FunSuite with BeforeAndAfter with LocalSparkContext { diff --git a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index b102eaf4e6..31f97fc139 100644 --- a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -15,21 +15,17 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashSet import org.scalatest.FunSuite -import org.scalatest.prop.Checkers -import org.scalacheck.Arbitrary._ -import org.scalacheck.Gen -import org.scalacheck.Prop._ import com.google.common.io.Files +import org.apache.spark.SparkContext._ +import org.apache.spark.{Partitioner, SharedSparkContext} -import spark.rdd.ShuffledRDD -import spark.SparkContext._ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { test("groupByKey") { diff --git a/core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala index d1276d541f..a80afdee7e 100644 --- a/core/src/test/scala/spark/rdd/ParallelCollectionSplitSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.rdd +package org.apache.spark.rdd import scala.collection.immutable.NumericRange diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index cbddf4e523..adc971050e 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -15,14 +15,17 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import scala.collection.mutable.HashMap import org.scalatest.FunSuite import org.scalatest.concurrent.Timeouts._ import org.scalatest.time.{Span, Millis} -import spark.SparkContext._ -import spark.rdd.{CoalescedRDD, CoGroupedRDD, EmptyRDD, PartitionPruningRDD, ShuffledRDD} +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd._ +import scala.collection.parallel.mutable +import org.apache.spark._ +import org.apache.spark.rdd.CoalescedRDDPartition class RDDSuite extends FunSuite with SharedSparkContext { @@ -170,9 +173,69 @@ class RDDSuite extends FunSuite with SharedSparkContext { // we can optionally shuffle to keep the upstream parallel val coalesced5 = data.coalesce(1, shuffle = true) - assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _]] != + assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _, _]] != null) } + test("cogrouped RDDs with locality") { + val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b")))) + val coal3 = data3.coalesce(3) + val list3 = coal3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition].preferredLocation) + assert(list3.sorted === Array("a","b","c"), "Locality preferences are dropped") + + // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 + val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i+2)).map{ j => "m" + (j%6)}))) + val coalesced1 = data.coalesce(3) + assert(coalesced1.collect().toList.sorted === (1 to 9).toList, "Data got *lost* in coalescing") + + val splits = coalesced1.glom().collect().map(_.toList).toList + assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length) + + assert(splits.forall(_.length >= 1) === true, "Some partitions were empty") + + // If we try to coalesce into more partitions than the original RDD, it should just + // keep the original number of partitions. + val coalesced4 = data.coalesce(20) + val listOfLists = coalesced4.glom().collect().map(_.toList).toList + val sortedList = listOfLists.sortWith{ (x, y) => !x.isEmpty && (y.isEmpty || (x(0) < y(0))) } + assert( sortedList === (1 to 9). + map{x => List(x)}.toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") + } + + test("cogrouped RDDs with locality, large scale (10K partitions)") { + // large scale experiment + import collection.mutable + val rnd = scala.util.Random + val partitions = 10000 + val numMachines = 50 + val machines = mutable.ListBuffer[String]() + (1 to numMachines).foreach(machines += "m"+_) + + val blocks = (1 to partitions).map(i => + { (i, Array.fill(3)(machines(rnd.nextInt(machines.size))).toList) } ) + + val data2 = sc.makeRDD(blocks) + val coalesced2 = data2.coalesce(numMachines*2) + + // test that you get over 90% locality in each group + val minLocality = coalesced2.partitions + .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction) + .foldLeft(1.)((perc, loc) => math.min(perc,loc)) + assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%") + + // test that the groups are load balanced with 100 +/- 20 elements in each + val maxImbalance = coalesced2.partitions + .map(part => part.asInstanceOf[CoalescedRDDPartition].parents.size) + .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) + assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) + + val data3 = sc.makeRDD(blocks).map(i => i*2) // derived RDD to test *current* pref locs + val coalesced3 = data3.coalesce(numMachines*2) + val minLocality2 = coalesced3.partitions + .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction) + .foldLeft(1.)((perc, loc) => math.min(perc,loc)) + assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " + + (minLocality2*100.).toInt + "%") + } test("zipped RDDs") { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) diff --git a/core/src/test/scala/spark/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala index b933c4aab8..2f7bd370fc 100644 --- a/core/src/test/scala/spark/SortingSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala @@ -15,12 +15,14 @@ * limitations under the License. */ -package spark +package org.apache.spark.rdd import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter import org.scalatest.matchers.ShouldMatchers -import SparkContext._ + +import org.apache.spark.{Logging, SharedSparkContext} +import org.apache.spark.SparkContext._ class SortingSuite extends FunSuite with SharedSparkContext with ShouldMatchers with Logging { diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index f802b66cf1..94f66c94c6 100644 --- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -15,29 +15,26 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import scala.collection.mutable.{Map, HashMap} import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter -import spark.LocalSparkContext +import org.apache.spark.LocalSparkContext +import org.apache.spark.MapOutputTracker +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext +import org.apache.spark.Partition +import org.apache.spark.TaskContext +import org.apache.spark.{Dependency, ShuffleDependency, OneToOneDependency} +import org.apache.spark.{FetchFailed, Success, TaskEndReason} +import org.apache.spark.storage.{BlockManagerId, BlockManagerMaster} -import spark.storage.BlockManager -import spark.storage.BlockManagerId -import spark.storage.BlockManagerMaster -import spark.{Dependency, ShuffleDependency, OneToOneDependency} -import spark.FetchFailedException -import spark.MapOutputTracker -import spark.RDD -import spark.SparkContext -import spark.SparkException -import spark.Partition -import spark.TaskContext -import spark.TaskEndReason - -import spark.{FetchFailed, Success} +import org.apache.spark.scheduler.cluster.Pool +import org.apache.spark.scheduler.cluster.SchedulingMode +import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode /** * Tests for DAGScheduler. These tests directly call the event processing functions in DAGScheduler @@ -56,11 +53,13 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont /** Set of TaskSets the DAGScheduler has requested executed. */ val taskSets = scala.collection.mutable.Buffer[TaskSet]() val taskScheduler = new TaskScheduler() { + override def rootPool: Pool = null + override def schedulingMode: SchedulingMode = SchedulingMode.NONE override def start() = {} override def stop() = {} override def submitTasks(taskSet: TaskSet) = { // normally done by TaskSetManager - taskSet.tasks.foreach(_.generation = mapOutputTracker.getGeneration) + taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch) taskSets += taskSet } override def setListener(listener: TaskSchedulerListener) = {} @@ -300,10 +299,10 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont val reduceRdd = makeRdd(2, List(shuffleDep)) submit(reduceRdd, Array(0, 1)) // pretend we were told hostA went away - val oldGeneration = mapOutputTracker.getGeneration + val oldEpoch = mapOutputTracker.getEpoch runEvent(ExecutorLost("exec-hostA")) - val newGeneration = mapOutputTracker.getGeneration - assert(newGeneration > oldGeneration) + val newEpoch = mapOutputTracker.getEpoch + assert(newEpoch > oldEpoch) val noAccum = Map[Long, Any]() val taskSet = taskSets(0) // should be ignored for being too old @@ -312,8 +311,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum, null, null)) // should be ignored for being too old runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum, null, null)) - // should work because it's a new generation - taskSet.tasks(1).generation = newGeneration + // should work because it's a new epoch + taskSet.tasks(1).epoch = newEpoch runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum, null, null)) assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) === Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA"))) @@ -402,12 +401,14 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont assert(results === Map(0 -> 42)) } - /** Assert that the supplied TaskSet has exactly the given preferredLocations. Note, converts taskSet's locations to host only. */ - private def assertLocations(taskSet: TaskSet, locations: Seq[Seq[String]]) { - assert(locations.size === taskSet.tasks.size) - for ((expectLocs, taskLocs) <- - taskSet.tasks.map(_.preferredLocations).zip(locations)) { - assert(expectLocs.map(loc => spark.Utils.parseHostPort(loc)._1) === taskLocs) + /** + * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations. + * Note that this checks only the host and not the executor ID. + */ + private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]) { + assert(hosts.size === taskSet.tasks.size) + for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) { + assert(taskLocs.map(_.host) === expectedLocs) } } diff --git a/core/src/test/scala/spark/scheduler/JobLoggerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala index 0f855c38da..cece60dda7 100644 --- a/core/src/test/scala/spark/scheduler/JobLoggerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala @@ -15,15 +15,19 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import java.util.Properties import java.util.concurrent.LinkedBlockingQueue + +import scala.collection.mutable + import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers -import scala.collection.mutable -import spark._ -import spark.SparkContext._ + +import org.apache.spark._ +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.RDD class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers { @@ -57,7 +61,7 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers val shuffleMapStage = new Stage(1, parentRdd, Some(shuffleDep), Nil, jobID, None) val rootStage = new Stage(0, rootRdd, None, List(shuffleMapStage), jobID, None) - joblogger.onStageSubmitted(SparkListenerStageSubmitted(rootStage, 4)) + joblogger.onStageSubmitted(SparkListenerStageSubmitted(rootStage, 4, null)) joblogger.getRddNameTest(parentRdd) should be (parentRdd.getClass.getName) parentRdd.setName("MyRDD") joblogger.getRddNameTest(parentRdd) should be ("MyRDD") diff --git a/core/src/test/scala/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index 392d67d67b..aac7c207cb 100644 --- a/core/src/test/scala/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import org.scalatest.FunSuite -import spark.{SparkContext, LocalSparkContext} +import org.apache.spark.{SparkContext, LocalSparkContext} import scala.collection.mutable import org.scalatest.matchers.ShouldMatchers -import spark.SparkContext._ +import org.apache.spark.SparkContext._ /** * diff --git a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala index 95a6eee2fc..e31a116a75 100644 --- a/core/src/test/scala/spark/scheduler/TaskContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala @@ -15,15 +15,15 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter -import spark.TaskContext -import spark.RDD -import spark.SparkContext -import spark.Partition -import spark.LocalSparkContext +import org.apache.spark.TaskContext +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext +import org.apache.spark.Partition +import org.apache.spark.LocalSparkContext class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkContext { diff --git a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala index 8f81d0b6ee..92ad9f09b2 100644 --- a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala @@ -15,25 +15,25 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler.cluster import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter -import spark._ -import spark.scheduler._ -import spark.scheduler.cluster._ +import org.apache.spark._ +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster._ import scala.collection.mutable.ArrayBuffer import java.util.Properties -class DummyTaskSetManager( +class FakeTaskSetManager( initPriority: Int, initStageId: Int, initNumTasks: Int, clusterScheduler: ClusterScheduler, taskSet: TaskSet) - extends ClusterTaskSetManager(clusterScheduler,taskSet) { + extends ClusterTaskSetManager(clusterScheduler, taskSet) { parent = null weight = 1 @@ -72,10 +72,16 @@ class DummyTaskSetManager( override def executorLost(executorId: String, host: String): Unit = { } - override def slaveOffer(execId: String, host: String, avaiableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = { + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) + : Option[TaskDescription] = + { if (tasksFinished + runningTasks < numTasks) { increaseRunningTasks(1) - return Some(new TaskDescription(0, execId, "task 0:0", null)) + return Some(new TaskDescription(0, execId, "task 0:0", 0, null)) } return None } @@ -98,17 +104,10 @@ class DummyTaskSetManager( } } -class DummyTask(stageId: Int) extends Task[Int](stageId) -{ - def run(attemptId: Long): Int = { - return 0 - } -} - class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging { - def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): DummyTaskSetManager = { - new DummyTaskSetManager(priority, stage, numTasks, cs , taskSet) + def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): FakeTaskSetManager = { + new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet) } def resourceOffer(rootPool: Pool): Int = { @@ -118,7 +117,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging logInfo("parentName:%s, parent running tasks:%d, name:%s,runningTasks:%d".format(manager.parent.name, manager.parent.runningTasks, manager.name, manager.runningTasks)) } for (taskSet <- taskSetQueue) { - taskSet.slaveOffer("execId_1", "hostname_1", 1) match { + taskSet.resourceOffer("execId_1", "hostname_1", 1, TaskLocality.ANY) match { case Some(task) => return taskSet.stageId case None => {} @@ -135,7 +134,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) @@ -162,7 +161,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) @@ -219,7 +218,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala new file mode 100644 index 0000000000..ff70a2cdf0 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable + +import org.scalatest.FunSuite + +import org.apache.spark._ +import org.apache.spark.scheduler._ +import org.apache.spark.executor.TaskMetrics +import java.nio.ByteBuffer +import org.apache.spark.util.{Utils, FakeClock} + +/** + * A mock ClusterScheduler implementation that just remembers information about tasks started and + * feedback received from the TaskSetManagers. Note that it's important to initialize this with + * a list of "live" executors and their hostnames for isExecutorAlive and hasExecutorsAliveOnHost + * to work, and these are required for locality in ClusterTaskSetManager. + */ +class FakeClusterScheduler(sc: SparkContext, liveExecutors: (String, String)* /* execId, host */) + extends ClusterScheduler(sc) +{ + val startedTasks = new ArrayBuffer[Long] + val endedTasks = new mutable.HashMap[Long, TaskEndReason] + val finishedManagers = new ArrayBuffer[TaskSetManager] + + val executors = new mutable.HashMap[String, String] ++ liveExecutors + + listener = new TaskSchedulerListener { + def taskStarted(task: Task[_], taskInfo: TaskInfo) { + startedTasks += taskInfo.index + } + + def taskEnded( + task: Task[_], + reason: TaskEndReason, + result: Any, + accumUpdates: mutable.Map[Long, Any], + taskInfo: TaskInfo, + taskMetrics: TaskMetrics) + { + endedTasks(taskInfo.index) = reason + } + + def executorGained(execId: String, host: String) {} + + def executorLost(execId: String) {} + + def taskSetFailed(taskSet: TaskSet, reason: String) {} + } + + def removeExecutor(execId: String): Unit = executors -= execId + + override def taskSetFinished(manager: TaskSetManager): Unit = finishedManagers += manager + + override def isExecutorAlive(execId: String): Boolean = executors.contains(execId) + + override def hasExecutorsAliveOnHost(host: String): Boolean = executors.values.exists(_ == host) +} + +class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging { + import TaskLocality.{ANY, PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL} + + val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong + + test("TaskSet with no preferences") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1")) + val taskSet = createTaskSet(1) + val manager = new ClusterTaskSetManager(sched, taskSet) + + // Offer a host with no CPUs + assert(manager.resourceOffer("exec1", "host1", 0, ANY) === None) + + // Offer a host with process-local as the constraint; this should work because the TaskSet + // above won't have any locality preferences + val taskOption = manager.resourceOffer("exec1", "host1", 2, TaskLocality.PROCESS_LOCAL) + assert(taskOption.isDefined) + val task = taskOption.get + assert(task.executorId === "exec1") + assert(sched.startedTasks.contains(0)) + + // Re-offer the host -- now we should get no more tasks + assert(manager.resourceOffer("exec1", "host1", 2, PROCESS_LOCAL) === None) + + // Tell it the task has finished + manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0)) + assert(sched.endedTasks(0) === Success) + assert(sched.finishedManagers.contains(manager)) + } + + test("multiple offers with no preferences") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1")) + val taskSet = createTaskSet(3) + val manager = new ClusterTaskSetManager(sched, taskSet) + + // First three offers should all find tasks + for (i <- 0 until 3) { + val taskOption = manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) + assert(taskOption.isDefined) + val task = taskOption.get + assert(task.executorId === "exec1") + } + assert(sched.startedTasks.toSet === Set(0, 1, 2)) + + // Re-offer the host -- now we should get no more tasks + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + // Finish the first two tasks + manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0)) + manager.statusUpdate(1, TaskState.FINISHED, createTaskResult(1)) + assert(sched.endedTasks(0) === Success) + assert(sched.endedTasks(1) === Success) + assert(!sched.finishedManagers.contains(manager)) + + // Finish the last task + manager.statusUpdate(2, TaskState.FINISHED, createTaskResult(2)) + assert(sched.endedTasks(2) === Success) + assert(sched.finishedManagers.contains(manager)) + } + + test("basic delay scheduling") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) + val taskSet = createTaskSet(4, + Seq(TaskLocation("host1", "exec1")), + Seq(TaskLocation("host2", "exec2")), + Seq(TaskLocation("host1"), TaskLocation("host2", "exec2")), + Seq() // Last task has no locality prefs + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1, exec1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1, exec1 again: the last task, which has no prefs, should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 3) + + // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + // Offer host1, exec1 again, at NODE_LOCAL level: we should choose task 2 + assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL).get.index == 2) + + // Offer host1, exec1 again, at NODE_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL) === None) + + // Offer host1, exec1 again, at ANY level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1, exec1 again, at ANY level: task 1 should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Offer host1, exec1 again, at ANY level: nothing should be chosen as we've launched all tasks + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + } + + test("delay scheduling with fallback") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, + ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3")) + val taskSet = createTaskSet(5, + Seq(TaskLocation("host1")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host3")), + Seq(TaskLocation("host2")) + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1 again: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1 again: second task (on host2) should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Offer host1 again: third task (on host2) should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2) + + // Offer host2: fifth task (also on host2) should get chosen + assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 4) + + // Now that we've launched a local task, we should no longer launch the task for host3 + assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // After another delay, we can go ahead and launch that task non-locally + assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 3) + } + + test("delay scheduling with failed hosts") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) + val taskSet = createTaskSet(3, + Seq(TaskLocation("host1")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host3")) + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1 again: third task should be chosen immediately because host3 is not up + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2) + + // After this, nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + // Now mark host2 as dead + sched.removeExecutor("exec2") + manager.executorLost("exec2", "host2") + + // Task 1 should immediately be launched on host1 because its original host is gone + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Now that all tasks have launched, nothing new should be launched anywhere else + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None) + } + + /** + * Utility method to create a TaskSet, potentially setting a particular sequence of preferred + * locations for each task (given as varargs) if this sequence is not empty. + */ + def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { + if (prefLocs.size != 0 && prefLocs.size != numTasks) { + throw new IllegalArgumentException("Wrong number of task locations") + } + val tasks = Array.tabulate[Task[_]](numTasks) { i => + new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil) + } + new TaskSet(tasks, 0, 0, 0, null) + } + + def createTaskResult(id: Int): ByteBuffer = { + ByteBuffer.wrap(Utils.serialize(new TaskResult[Int](id, mutable.Map.empty, new TaskMetrics))) + } +} diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala new file mode 100644 index 0000000000..2f12aaed18 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster + +import org.apache.spark.scheduler.{TaskLocation, Task} + +class FakeTask(stageId: Int, prefLocs: Seq[TaskLocation] = Nil) extends Task[Int](stageId) { + override def run(attemptId: Long): Int = 0 + + override def preferredLocations: Seq[TaskLocation] = prefLocs +} diff --git a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala index 14bb58731b..111340a65c 100644 --- a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.scheduler +package org.apache.spark.scheduler.local import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter -import spark._ -import spark.scheduler._ -import spark.scheduler.cluster._ +import org.apache.spark._ +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.cluster._ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.{ConcurrentMap, HashMap} import java.util.concurrent.Semaphore @@ -57,23 +57,23 @@ object TaskThreadInfo { * 1. each thread contains one job. * 2. each job contains one stage. * 3. each stage only contains one task. - * 4. each task(launched) must be lanched orderly(using threadToStarted) to make sure - * it will get cpu core resource, and will wait to finished after user manually - * release "Lock" and then cluster will contain another free cpu cores. - * 5. each task(pending) must use "sleep" to make sure it has been added to taskSetManager queue, + * 4. each task(launched) must be lanched orderly(using threadToStarted) to make sure + * it will get cpu core resource, and will wait to finished after user manually + * release "Lock" and then cluster will contain another free cpu cores. + * 5. each task(pending) must use "sleep" to make sure it has been added to taskSetManager queue, * thus it will be scheduled later when cluster has free cpu cores. */ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { def createThread(threadIndex: Int, poolName: String, sc: SparkContext, sem: Semaphore) { - + TaskThreadInfo.threadToRunning(threadIndex) = false val nums = sc.parallelize(threadIndex to threadIndex, 1) TaskThreadInfo.threadToLock(threadIndex) = new Lock() TaskThreadInfo.threadToStarted(threadIndex) = new CountDownLatch(1) new Thread { if (poolName != null) { - sc.addLocalProperties("spark.scheduler.cluster.fair.pool",poolName) + sc.setLocalProperty("spark.scheduler.cluster.fair.pool", poolName) } override def run() { val ans = nums.map(number => { @@ -88,7 +88,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { } }.start() } - + test("Local FIFO scheduler end-to-end test") { System.setProperty("spark.cluster.schedulingmode", "FIFO") sc = new SparkContext("local[4]", "test") @@ -103,8 +103,8 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { createThread(4,null,sc,sem) TaskThreadInfo.threadToStarted(4).await() // thread 5 and 6 (stage pending)must meet following two points - // 1. stages (taskSetManager) of jobs in thread 5 and 6 should be add to taskSetManager - // queue before executing TaskThreadInfo.threadToLock(1).jobFinished() + // 1. stages (taskSetManager) of jobs in thread 5 and 6 should be add to taskSetManager + // queue before executing TaskThreadInfo.threadToLock(1).jobFinished() // 2. priority of stage in thread 5 should be prior to priority of stage in thread 6 // So I just use "sleep" 1s here for each thread. // TODO: any better solution? @@ -112,24 +112,24 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { Thread.sleep(1000) createThread(6,null,sc,sem) Thread.sleep(1000) - + assert(TaskThreadInfo.threadToRunning(1) === true) assert(TaskThreadInfo.threadToRunning(2) === true) assert(TaskThreadInfo.threadToRunning(3) === true) assert(TaskThreadInfo.threadToRunning(4) === true) assert(TaskThreadInfo.threadToRunning(5) === false) assert(TaskThreadInfo.threadToRunning(6) === false) - + TaskThreadInfo.threadToLock(1).jobFinished() TaskThreadInfo.threadToStarted(5).await() - + assert(TaskThreadInfo.threadToRunning(1) === false) assert(TaskThreadInfo.threadToRunning(2) === true) assert(TaskThreadInfo.threadToRunning(3) === true) assert(TaskThreadInfo.threadToRunning(4) === true) assert(TaskThreadInfo.threadToRunning(5) === true) assert(TaskThreadInfo.threadToRunning(6) === false) - + TaskThreadInfo.threadToLock(3).jobFinished() TaskThreadInfo.threadToStarted(6).await() @@ -139,7 +139,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { assert(TaskThreadInfo.threadToRunning(4) === true) assert(TaskThreadInfo.threadToRunning(5) === true) assert(TaskThreadInfo.threadToRunning(6) === true) - + TaskThreadInfo.threadToLock(2).jobFinished() TaskThreadInfo.threadToLock(4).jobFinished() TaskThreadInfo.threadToLock(5).jobFinished() @@ -160,18 +160,18 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { TaskThreadInfo.threadToStarted(20).await() createThread(30,"3",sc,sem) TaskThreadInfo.threadToStarted(30).await() - + assert(TaskThreadInfo.threadToRunning(10) === true) assert(TaskThreadInfo.threadToRunning(20) === true) assert(TaskThreadInfo.threadToRunning(30) === true) - + createThread(11,"1",sc,sem) TaskThreadInfo.threadToStarted(11).await() createThread(21,"2",sc,sem) TaskThreadInfo.threadToStarted(21).await() createThread(31,"3",sc,sem) TaskThreadInfo.threadToStarted(31).await() - + assert(TaskThreadInfo.threadToRunning(11) === true) assert(TaskThreadInfo.threadToRunning(21) === true) assert(TaskThreadInfo.threadToRunning(31) === true) @@ -185,19 +185,19 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { assert(TaskThreadInfo.threadToRunning(12) === true) assert(TaskThreadInfo.threadToRunning(22) === true) assert(TaskThreadInfo.threadToRunning(32) === false) - + TaskThreadInfo.threadToLock(10).jobFinished() TaskThreadInfo.threadToStarted(32).await() - + assert(TaskThreadInfo.threadToRunning(32) === true) - //1. Similar with above scenario, sleep 1s for stage of 23 and 33 to be added to taskSetManager + //1. Similar with above scenario, sleep 1s for stage of 23 and 33 to be added to taskSetManager // queue so that cluster will assign free cpu core to stage 23 after stage 11 finished. //2. priority of 23 and 33 will be meaningless as using fair scheduler here. createThread(23,"2",sc,sem) createThread(33,"3",sc,sem) Thread.sleep(1000) - + TaskThreadInfo.threadToLock(11).jobFinished() TaskThreadInfo.threadToStarted(23).await() @@ -206,7 +206,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { TaskThreadInfo.threadToLock(12).jobFinished() TaskThreadInfo.threadToStarted(33).await() - + assert(TaskThreadInfo.threadToRunning(33) === true) TaskThreadInfo.threadToLock(20).jobFinished() @@ -217,7 +217,7 @@ class LocalSchedulerSuite extends FunSuite with LocalSparkContext { TaskThreadInfo.threadToLock(31).jobFinished() TaskThreadInfo.threadToLock(32).jobFinished() TaskThreadInfo.threadToLock(33).jobFinished() - - sem.acquire(11) + + sem.acquire(11) } } diff --git a/core/src/test/scala/spark/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index c3323dcbb3..0164dda0ba 100644 --- a/core/src/test/scala/spark/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -15,17 +15,17 @@ * limitations under the License. */ -package spark +package org.apache.spark.serializer import scala.collection.mutable -import scala.collection.immutable -import org.scalatest.FunSuite -import com.esotericsoftware.kryo._ +import com.esotericsoftware.kryo.Kryo -import SparkContext._ +import org.scalatest.FunSuite +import org.apache.spark.SharedSparkContext +import org.apache.spark.serializer.KryoTest._ -class KryoSerializerSuite extends FunSuite { +class KryoSerializerSuite extends FunSuite with SharedSparkContext { test("basic types") { val ser = (new KryoSerializer).newInstance() def check[T](t: T) { @@ -53,6 +53,7 @@ class KryoSerializerSuite extends FunSuite { check(Array(true, false, true)) check(Array('a', 'b', 'c')) check(Array[Int]()) + check(Array(Array("1", "2"), Array("1", "2", "3", "4"))) } test("pairs") { @@ -103,7 +104,6 @@ class KryoSerializerSuite extends FunSuite { } test("custom registrator") { - import spark.test._ System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName) val ser = (new KryoSerializer).newInstance() @@ -123,14 +123,65 @@ class KryoSerializerSuite extends FunSuite { val hashMap = new java.util.HashMap[String, String] hashMap.put("foo", "bar") check(hashMap) - + + System.clearProperty("spark.kryo.registrator") + } + + test("kryo with collect") { + val control = 1 :: 2 :: Nil + val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_)).collect().map(_.x) + assert(control === result.toSeq) + } + + test("kryo with parallelize") { + val control = 1 :: 2 :: Nil + val result = sc.parallelize(control.map(new ClassWithoutNoArgConstructor(_))).map(_.x).collect() + assert (control === result.toSeq) + } + + test("kryo with parallelize for specialized tuples") { + assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).count === 3) + } + + test("kryo with parallelize for primitive arrays") { + assert (sc.parallelize( Array(1, 2, 3) ).count === 3) + } + + test("kryo with collect for specialized tuples") { + assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === (1, 11)) + } + + test("kryo with reduce") { + val control = 1 :: 2 :: Nil + val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_)) + .reduce((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x + assert(control.sum === result) + } + + // TODO: this still doesn't work + ignore("kryo with fold") { + val control = 1 :: 2 :: Nil + val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_)) + .fold(new ClassWithoutNoArgConstructor(10))((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x + assert(10 + control.sum === result) + } + + override def beforeAll() { + System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName) + super.beforeAll() + } + + override def afterAll() { + super.afterAll() System.clearProperty("spark.kryo.registrator") + System.clearProperty("spark.serializer") } } -package test { +object KryoTest { case class CaseClass(i: Int, s: String) {} - + class ClassWithNoArgConstructor { var x: Int = 0 override def equals(other: Any) = other match { diff --git a/core/src/test/scala/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index b719d65342..038a9acb85 100644 --- a/core/src/test/scala/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.storage +package org.apache.spark.storage import java.nio.ByteBuffer @@ -29,11 +29,8 @@ import org.scalatest.concurrent.Timeouts._ import org.scalatest.matchers.ShouldMatchers._ import org.scalatest.time.SpanSugar._ -import spark.JavaSerializer -import spark.KryoSerializer -import spark.SizeEstimator -import spark.util.AkkaUtils -import spark.util.ByteBufferInputStream +import org.apache.spark.util.{SizeEstimator, Utils, AkkaUtils, ByteBufferInputStream} +import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester { @@ -56,7 +53,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT System.setProperty("spark.hostPort", "localhost:" + boundPort) master = new BlockManagerMaster( - actorSystem.actorOf(Props(new spark.storage.BlockManagerMasterActor(true)))) + actorSystem.actorOf(Props(new BlockManagerMasterActor(true)))) // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case oldArch = System.setProperty("os.arch", "amd64") @@ -65,7 +62,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT val initialize = PrivateMethod[Unit]('initialize) SizeEstimator invokePrivate initialize() // Set some value ... - System.setProperty("spark.hostPort", spark.Utils.localHostName() + ":" + 1111) + System.setProperty("spark.hostPort", Utils.localHostName() + ":" + 1111) } after { @@ -105,10 +102,10 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT assert(level2 === level1, "level2 is not same as level1") assert(level2.eq(level1), "level2 is not the same object as level1") assert(level3 != level1, "level3 is same as level1") - val bytes1 = spark.Utils.serialize(level1) - val level1_ = spark.Utils.deserialize[StorageLevel](bytes1) - val bytes2 = spark.Utils.serialize(level2) - val level2_ = spark.Utils.deserialize[StorageLevel](bytes2) + val bytes1 = Utils.serialize(level1) + val level1_ = Utils.deserialize[StorageLevel](bytes1) + val bytes2 = Utils.serialize(level2) + val level2_ = Utils.deserialize[StorageLevel](bytes2) assert(level1_ === level1, "Deserialized level1 not same as original level1") assert(level1_.eq(level1), "Deserialized level1 not the same object as original level2") assert(level2_ === level2, "Deserialized level2 not same as original level2") @@ -122,10 +119,10 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT assert(id2 === id1, "id2 is not same as id1") assert(id2.eq(id1), "id2 is not the same object as id1") assert(id3 != id1, "id3 is same as id1") - val bytes1 = spark.Utils.serialize(id1) - val id1_ = spark.Utils.deserialize[BlockManagerId](bytes1) - val bytes2 = spark.Utils.serialize(id2) - val id2_ = spark.Utils.deserialize[BlockManagerId](bytes2) + val bytes1 = Utils.serialize(id1) + val id1_ = Utils.deserialize[BlockManagerId](bytes1) + val bytes2 = Utils.serialize(id2) + val id2_ = Utils.deserialize[BlockManagerId](bytes2) assert(id1_ === id1, "Deserialized id1 is not same as original id1") assert(id1_.eq(id1), "Deserialized id1 is not the same object as original id1") assert(id2_ === id2, "Deserialized id2 is not same as original id2") diff --git a/core/src/test/scala/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index 56c1fed6ad..3321fb5eb7 100644 --- a/core/src/test/scala/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.ui +package org.apache.spark.ui import scala.util.{Failure, Success, Try} import java.net.ServerSocket @@ -24,14 +24,15 @@ import org.eclipse.jetty.server.Server class UISuite extends FunSuite { test("jetty port increases under contention") { - val startPort = 33333 + val startPort = 3030 val server = new Server(startPort) server.start() val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq()) val (jettyServer2, boundPort2) = JettyUtils.startJettyServer("localhost", startPort, Seq()) - assert(boundPort1 === startPort + 1) - assert(boundPort2 === startPort + 2) + // Allow some wiggle room in case ports on the machine are under contention + assert(boundPort1 > startPort && boundPort1 < startPort + 10) + assert(boundPort2 > boundPort1 && boundPort2 < boundPort1 + 10) } test("jetty binds to port 0 correctly") { diff --git a/core/src/test/scala/spark/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala index 7d2831e19c..0ed366fb70 100644 --- a/core/src/test/scala/spark/ClosureCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala @@ -15,13 +15,14 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import java.io.NotSerializableException import org.scalatest.FunSuite -import spark.LocalSparkContext._ -import SparkContext._ + +import org.apache.spark.SparkContext +import org.apache.spark.LocalSparkContext._ class ClosureCleanerSuite extends FunSuite { test("closures inside an object") { diff --git a/core/src/test/scala/spark/util/DistributionSuite.scala b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala index 6578b55e82..63642461e4 100644 --- a/core/src/test/scala/spark/util/DistributionSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers diff --git a/core/src/test/scala/org/apache/spark/util/FakeClock.scala b/core/src/test/scala/org/apache/spark/util/FakeClock.scala new file mode 100644 index 0000000000..0a45917b08 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/FakeClock.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +class FakeClock extends Clock { + private var time = 0L + + def advance(millis: Long): Unit = time += millis + + def getTime(): Long = time +} diff --git a/core/src/test/scala/spark/util/NextIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala index fdbd43d941..45867463a5 100644 --- a/core/src/test/scala/spark/util/NextIteratorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers diff --git a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala index 4c0044202f..a9dd0b1a5b 100644 --- a/core/src/test/scala/spark/util/RateLimitedOutputStreamSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.util +package org.apache.spark.util import org.scalatest.FunSuite import java.io.ByteArrayOutputStream diff --git a/core/src/test/scala/spark/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala index 1ef812dfbd..4e40dcbdee 100644 --- a/core/src/test/scala/spark/SizeEstimatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import org.scalatest.FunSuite import org.scalatest.BeforeAndAfterAll diff --git a/core/src/test/scala/spark/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 31c3b25c50..e2859caf58 100644 --- a/core/src/test/scala/spark/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark +package org.apache.spark.util import com.google.common.base.Charsets import com.google.common.io.Files @@ -26,14 +26,14 @@ import scala.util.Random class UtilsSuite extends FunSuite { - test("memoryBytesToString") { - assert(Utils.memoryBytesToString(10) === "10.0 B") - assert(Utils.memoryBytesToString(1500) === "1500.0 B") - assert(Utils.memoryBytesToString(2000000) === "1953.1 KB") - assert(Utils.memoryBytesToString(2097152) === "2.0 MB") - assert(Utils.memoryBytesToString(2306867) === "2.2 MB") - assert(Utils.memoryBytesToString(5368709120L) === "5.0 GB") - assert(Utils.memoryBytesToString(5L * 1024L * 1024L * 1024L * 1024L) === "5.0 TB") + test("bytesToString") { + assert(Utils.bytesToString(10) === "10.0 B") + assert(Utils.bytesToString(1500) === "1500.0 B") + assert(Utils.bytesToString(2000000) === "1953.1 KB") + assert(Utils.bytesToString(2097152) === "2.0 MB") + assert(Utils.bytesToString(2306867) === "2.2 MB") + assert(Utils.bytesToString(5368709120L) === "5.0 GB") + assert(Utils.bytesToString(5L * 1024L * 1024L * 1024L * 1024L) === "5.0 TB") } test("copyStream") { diff --git a/docs/README.md b/docs/README.md index c2b3497bb3..dfcf753553 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ Welcome to the Spark documentation! -This readme will walk you through navigating and building the Spark documentation, which is included here with the Spark source code. You can also find documentation specific to release versions of Spark at http://spark-project.org/documentation.html. +This readme will walk you through navigating and building the Spark documentation, which is included here with the Spark source code. You can also find documentation specific to release versions of Spark at http://spark.incubator.apache.org/documentation.html. Read on to learn more about viewing documentation in plain text (i.e., markdown) or building the documentation yourself. Why build it yourself? So that you have the docs that corresponds to whichever version of Spark you currently have checked out of revision control. diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index f06ab2d5b0..84749fda4e 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -61,19 +61,25 @@ <a href="#" class="dropdown-toggle" data-toggle="dropdown">Programming Guides<b class="caret"></b></a> <ul class="dropdown-menu"> <li><a href="quick-start.html">Quick Start</a></li> - <li><a href="scala-programming-guide.html">Scala</a></li> - <li><a href="java-programming-guide.html">Java</a></li> - <li><a href="python-programming-guide.html">Python</a></li> + <li><a href="scala-programming-guide.html">Spark in Scala</a></li> + <li><a href="java-programming-guide.html">Spark in Java</a></li> + <li><a href="python-programming-guide.html">Spark in Python</a></li> + <li class="divider"></li> <li><a href="streaming-programming-guide.html">Spark Streaming</a></li> + <li><a href="mllib-guide.html">MLlib (Machine Learning)</a></li> + <li><a href="bagel-programming-guide.html">Bagel (Pregel on Spark)</a></li> </ul> </li> <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a> <ul class="dropdown-menu"> - <li><a href="api/core/index.html">Spark Java/Scala (Scaladoc)</a></li> - <li><a href="api/pyspark/index.html">Spark Python (Epydoc)</a></li> - <li><a href="api/streaming/index.html">Spark Streaming Java/Scala (Scaladoc) </a></li> + <li><a href="api/core/index.html">Spark Core for Java/Scala</a></li> + <li><a href="api/pyspark/index.html">Spark Core for Python</a></li> + <li class="divider"></li> + <li><a href="api/streaming/index.html">Spark Streaming</a></li> + <li><a href="api/mllib/index.html">MLlib (Machine Learning)</a></li> + <li><a href="api/bagel/index.html">Bagel (Pregel on Spark)</a></li> </ul> </li> @@ -90,10 +96,10 @@ <li class="dropdown"> <a href="api.html" class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a> <ul class="dropdown-menu"> - <li><a href="building-with-maven.html">Building Spark with Maven</a></li> <li><a href="configuration.html">Configuration</a></li> <li><a href="tuning.html">Tuning Guide</a></li> - <li><a href="bagel-programming-guide.html">Bagel (Pregel on Spark)</a></li> + <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li> + <li><a href="building-with-maven.html">Building Spark with Maven</a></li> <li><a href="contributing-to-spark.html">Contributing to Spark</a></li> </ul> </li> @@ -135,9 +141,15 @@ <hr>--> - <!--<footer> - <p></p> - </footer>--> + <footer> + <hr> + <p style="text-align: center; veritcal-align: middle; color: #999;"> + Apache Spark is an effort undergoing incubation at the Apache Software Foundation. + <a href="http://incubator.apache.org"> + <img style="margin-left: 20px;" src="img/incubator-logo.png" /> + </a> + </p> + </footer> </div> <!-- /container --> diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index 45ef4bba82..c574ea7f5c 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -18,9 +18,9 @@ require 'fileutils' include FileUtils -if ENV['SKIP_API'] != '1' +if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') # Build Scaladoc for Java/Scala - projects = ["core", "examples", "repl", "bagel", "streaming"] + projects = ["core", "examples", "repl", "bagel", "streaming", "mllib"] puts "Moving to project root and building scaladoc." curr_dir = pwd diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md index 8a0fa42d94..583684913d 100644 --- a/docs/bagel-programming-guide.md +++ b/docs/bagel-programming-guide.md @@ -9,16 +9,15 @@ In the Pregel programming model, jobs run as a sequence of iterations called _su This guide shows the programming model and features of Bagel by walking through an example implementation of PageRank on Bagel. -## Linking with Bagel +# Linking with Bagel -To write a Bagel application, you will need to add Spark, its dependencies, and Bagel to your CLASSPATH: +To use Bagel in your program, add the following SBT or Maven dependency: -1. Run `sbt/sbt update` to fetch Spark's dependencies, if you haven't already done so. -2. Run `sbt/sbt assembly` to build Spark and its dependencies into one JAR (`core/target/spark-core-assembly-{{site.SPARK_VERSION}}.jar`) -3. Run `sbt/sbt package` build the Bagel JAR (`bagel/target/scala_{{site.SCALA_VERSION}}/spark-bagel_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar`). -4. Add these two JARs to your CLASSPATH. + groupId = org.apache.spark + artifactId = spark-bagel_{{site.SCALA_VERSION}} + version = {{site.SPARK_VERSION}} -## Programming Model +# Programming Model Bagel operates on a graph represented as a [distributed dataset](scala-programming-guide.html) of (K, V) pairs, where keys are vertex IDs and values are vertices plus their associated state. In each superstep, Bagel runs a user-specified compute function on each vertex that takes as input the current vertex state and a list of messages sent to that vertex during the previous superstep, and returns the new vertex state and a list of outgoing messages. @@ -29,8 +28,8 @@ representing the current PageRank of the vertex, and similarly extend the `Message` and `Edge` classes. Note that these need to be marked `@serializable` to allow Spark to transfer them across machines. We also import the Bagel types and implicit conversions. {% highlight scala %} -import spark.bagel._ -import spark.bagel.Bagel._ +import org.apache.spark.bagel._ +import org.apache.spark.bagel.Bagel._ @serializable class PREdge(val targetId: String) extends Edge @@ -89,7 +88,7 @@ Finally, we print the results. println(result.map(v => "%s\t%s\n".format(v.id, v.rank)).collect.mkString) {% endhighlight %} -### Combiners +## Combiners Sending a message to another vertex generally involves expensive communication over the network. For certain algorithms, it's possible to reduce the amount of communication using _combiners_. For example, if the compute function receives integer messages and only uses their sum, it's possible for Bagel to combine multiple messages to the same vertex by summing them. @@ -97,7 +96,7 @@ For combiner support, Bagel can optionally take a set of combiner functions that _Example: PageRank with combiners_ -### Aggregators +## Aggregators Aggregators perform a reduce across all vertices after each superstep, and provide the result to each vertex in the next superstep. @@ -105,11 +104,11 @@ For aggregator support, Bagel can optionally take an aggregator function that re _Example_ -### Operations +## Operations -Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/mesos/spark/blob/master/bagel/src/main/scala/spark/bagel/Bagel.scala) for details. +Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/apache/incubator-spark/blob/master/bagel/src/main/scala/spark/bagel/Bagel.scala) for details. -#### Actions +### Actions {% highlight scala %} /*** Full form ***/ @@ -133,7 +132,7 @@ Bagel.run(sc, vertices, messages, numSplits)(compute) // and returns (newVertex: V, outMessages: Array[M]) {% endhighlight %} -#### Types +### Types {% highlight scala %} trait Combiner[M, C] { @@ -156,6 +155,10 @@ trait Message[K] { } {% endhighlight %} -## Where to Go from Here +# Where to Go from Here -Two example jobs, PageRank and shortest path, are included in `bagel/src/main/scala/spark/bagel/examples`. You can run them by passing the class name to the `run` script included in Spark -- for example, `./run spark.bagel.examples.WikipediaPageRank`. Each example program prints usage help when run without any arguments. +Two example jobs, PageRank and shortest path, are included in `examples/src/main/scala/org/apache/spark/examples/bagel`. You can run them by passing the class name to the `run-example` script included in Spark; e.g.: + + ./run-example org.apache.spark.examples.bagel.WikipediaPageRank + +Each example program prints usage help when run without any arguments. diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index 04cd79d039..7ecb601ddd 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -8,50 +8,60 @@ title: Building Spark with Maven Building Spark using Maven Requires Maven 3 (the build process is tested with Maven 3.0.4) and Java 1.6 or newer. -Building with Maven requires that a Hadoop profile be specified explicitly at the command line, there is no default. There are two profiles to choose from, one for building for Hadoop 1 or Hadoop 2. -for Hadoop 1 (using 0.20.205.0) use: +## Setting up Maven's Memory Usage ## - $ mvn -Phadoop1 clean install +You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings: + export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m" -for Hadoop 2 (using 2.0.0-mr1-cdh4.1.1) use: +If you don't run this, you may see errors like the following: - $ mvn -Phadoop2 clean install + [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes... + [ERROR] PermGen space -> [Help 1] -It uses the scala-maven-plugin which supports incremental and continuous compilation. E.g. + [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes... + [ERROR] Java heap space -> [Help 1] - $ mvn -Phadoop2 scala:cc +You can fix this by setting the `MAVEN_OPTS` variable as discussed before. -…should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively. +## Specifying the Hadoop version ## -## Spark Tests in Maven ## +Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. -Tests are run by default via the scalatest-maven-plugin. With this you can do things like: +For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: -Skip test execution (but not compilation): + # Apache Hadoop 1.2.1 + $ mvn -Dhadoop.version=1.2.1 -DskipTests clean package - $ mvn -DskipTests -Phadoop2 clean install + # Cloudera CDH 4.2.0 with MapReduce v1 + $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package -To run a specific test suite: +For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should also enable the "hadoop2-yarn" profile: - $ mvn -Phadoop2 -Dsuites=spark.repl.ReplSuite test + # Apache Hadoop 2.0.5-alpha + $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.5-alpha -DskipTests clean package + # Cloudera CDH 4.2.0 with MapReduce v2 + $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 -DskipTests clean package -## Setting up JVM Memory Usage Via Maven ## -You might run into the following errors if you're using a vanilla installation of Maven: +## Spark Tests in Maven ## - [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes... - [ERROR] PermGen space -> [Help 1] +Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). Some of the require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time. You can then run the tests with `mvn -Dhadoop.version=... test`. - [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes... - [ERROR] Java heap space -> [Help 1] +The ScalaTest plugin also supports running only a specific test suite as follows: + + $ mvn -Dhadoop.version=... -Dsuites=spark.repl.ReplSuite test + + +## Continuous Compilation ## -To fix these, you can do the following: +We use the scala-maven-plugin which supports incremental and continuous compilation. E.g. - export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128M" + $ mvn scala:cc +should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively. ## Using With IntelliJ IDEA ## @@ -59,8 +69,8 @@ This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the ## Building Spark Debian Packages ## -It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the deb profile: +It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the following profiles: - $ mvn -Phadoop2,deb clean install + $ mvn -Prepl-bin -Pdeb clean package The debian package can then be found under repl/target. We added the short commit hash to the file name so that we can distinguish individual packages build for SNAPSHOT versions. diff --git a/docs/configuration.md b/docs/configuration.md index 5c06897cae..58e9434bdc 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -5,50 +5,14 @@ title: Spark Configuration Spark provides three main locations to configure the system: -* [Environment variables](#environment-variables) for launching Spark workers, which can - be set either in your driver program or in the `conf/spark-env.sh` script. -* [Java system properties](#system-properties), which control internal configuration parameters and can be set either - programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through the - `SPARK_JAVA_OPTS` environment variable in `spark-env.sh`. +* [Java system properties](#system-properties), which control internal configuration parameters and can be set + either programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through + JVM arguments. +* [Environment variables](#environment-variables) for configuring per-machine settings such as the IP address, + which can be set in the `conf/spark-env.sh` script. * [Logging configuration](#configuring-logging), which is done through `log4j.properties`. -# Environment Variables - -Spark determines how to initialize the JVM on worker nodes, or even on the local node when you run `spark-shell`, -by running the `conf/spark-env.sh` script in the directory where it is installed. This script does not exist by default -in the Git repository, but but you can create it by copying `conf/spark-env.sh.template`. Make sure that you make -the copy executable. - -Inside `spark-env.sh`, you *must* set at least the following two variables: - -* `SCALA_HOME`, to point to your Scala installation, or `SCALA_LIBRARY_PATH` to point to the directory for Scala - library JARs (if you install Scala as a Debian or RPM package, there is no `SCALA_HOME`, but these libraries - are in a separate path, typically /usr/share/java; look for `scala-library.jar`). -* `MESOS_NATIVE_LIBRARY`, if you are [running on a Mesos cluster](running-on-mesos.html). - -In addition, there are four other variables that control execution. These should be set *in the environment that -launches the job's driver program* instead of `spark-env.sh`, because they will be automatically propagated to -workers. Setting these per-job instead of in `spark-env.sh` ensures that different jobs can have different settings -for these variables. - -* `SPARK_JAVA_OPTS`, to add JVM options. This includes any system properties that you'd like to pass with `-D`. -* `SPARK_CLASSPATH`, to add elements to Spark's classpath. -* `SPARK_LIBRARY_PATH`, to add search directories for native libraries. -* `SPARK_MEM`, to set the amount of memory used per node. This should be in the same format as the - JVM's -Xmx option, e.g. `300m` or `1g`. Note that this option will soon be deprecated in favor of - the `spark.executor.memory` system property, so we recommend using that in new code. - -Beware that if you do set these variables in `spark-env.sh`, they will override the values set by user programs, -which is undesirable; if you prefer, you can choose to have `spark-env.sh` set them only if the user program -hasn't, as follows: - -{% highlight bash %} -if [ -z "$SPARK_JAVA_OPTS" ] ; then - SPARK_JAVA_OPTS="-verbose:gc" -fi -{% endhighlight %} - # System Properties To set a system property for configuring Spark, you need to either pass it with a -D flag to the JVM (for example `java -Dspark.cores.max=5 MyProgram`) or call `System.setProperty` in your code *before* creating your Spark context, as follows: @@ -67,18 +31,18 @@ there are at least five properties that you will commonly want to control: <td>spark.executor.memory</td> <td>512m</td> <td> - Amount of memory to use per executor process, in the same format as JVM memory strings (e.g. `512m`, `2g`). + Amount of memory to use per executor process, in the same format as JVM memory strings (e.g. <code>512m</code>, <code>2g</code>). </td> </tr> <tr> <td>spark.serializer</td> - <td>spark.JavaSerializer</td> + <td>org.apache.spark.serializer.<br />JavaSerializer</td> <td> Class to use for serializing objects that will be sent over the network or need to be cached in serialized form. The default of Java serialization works with any Serializable Java object but is - quite slow, so we recommend <a href="tuning.html">using <code>spark.KryoSerializer</code> - and configuring Kryo serialization</a> when speed is necessary. Can be any subclass of - <a href="api/core/index.html#spark.Serializer"><code>spark.Serializer</code></a>). + quite slow, so we recommend <a href="tuning.html">using <code>org.apache.spark.serializer.KryoSerializer</code> + and configuring Kryo serialization</a> when speed is necessary. Can be any subclass of + <a href="api/core/index.html#org.apache.spark.serializer.Serializer"><code>org.apache.spark.Serializer</code></a>. </td> </tr> <tr> @@ -86,8 +50,8 @@ there are at least five properties that you will commonly want to control: <td>(none)</td> <td> If you use Kryo serialization, set this class to register your custom classes with Kryo. - You need to set it to a class that extends - <a href="api/core/index.html#spark.KryoRegistrator"><code>spark.KryoRegistrator</code></a>). + It should be set to a class that extends + <a href="api/core/index.html#org.apache.spark.serializer.KryoRegistrator"><code>KryoRegistrator</code></a>. See the <a href="tuning.html#data-serialization">tuning guide</a> for more details. </td> </tr> @@ -97,7 +61,7 @@ there are at least five properties that you will commonly want to control: <td> Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored on disk. This should be on a fast, local disk in your system. It can also be a comma-separated - list of multiple directories. + list of multiple directories on different disks. </td> </tr> <tr> @@ -106,7 +70,8 @@ there are at least five properties that you will commonly want to control: <td> When running on a <a href="spark-standalone.html">standalone deploy cluster</a> or a <a href="running-on-mesos.html#mesos-run-modes">Mesos cluster in "coarse-grained" - sharing mode</a>, how many CPU cores to request at most. The default will use all available cores. + sharing mode</a>, how many CPU cores to request at most. The default will use all available cores + offered by the cluster manager. </td> </tr> </table> @@ -146,7 +111,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.ui.port</td> - <td>33000</td> + <td>3030</td> <td> Port for your application's dashboard, which shows memory and workload data </td> @@ -181,6 +146,21 @@ Apart from these, the following properties are also available, and may be useful </td> </tr> <tr> + <td>spark.io.compression.codec</td> + <td>org.apache.spark.io.<br />SnappyCompressionCodec</td> + <td> + The compression codec class to use for various compressions. By default, Spark provides two + codecs: <code>org.apache.spark.io.LZFCompressionCodec</code> and <code>org.apache.spark.io.SnappyCompressionCodec</code>. + </td> +</tr> +<tr> + <td>spark.io.compression.snappy.block.size</td> + <td>32768</td> + <td> + Block size (in bytes) used in Snappy compression, in the case when Snappy compression codec is used. + </td> +</tr> +<tr> <td>spark.reducer.maxMbInFlight</td> <td>48</td> <td> @@ -191,7 +171,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.closure.serializer</td> - <td>spark.JavaSerializer</td> + <td>org.apache.spark.serializer.<br />JavaSerializer</td> <td> Serializer class to use for closures. Generally Java is fine unless your distributed functions (e.g. map functions) reference large objects in the driver program. @@ -218,7 +198,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.broadcast.factory</td> - <td>spark.broadcast.HttpBroadcastFactory</td> + <td>org.apache.spark.broadcast.<br />HttpBroadcastFactory</td> <td> Which broadcast implementation to use. </td> @@ -228,8 +208,34 @@ Apart from these, the following properties are also available, and may be useful <td>3000</td> <td> Number of milliseconds to wait to launch a data-local task before giving up and launching it - in a non-data-local location. You should increase this if your tasks are long and you are seeing - poor data locality, but the default generally works well. + on a less-local node. The same wait will be used to step through multiple locality levels + (process-local, node-local, rack-local and then any). It is also possible to customize the + waiting time for each level by setting <code>spark.locality.wait.node</code>, etc. + You should increase this setting if your tasks are long and see poor locality, but the + default usually works well. + </td> +</tr> +<tr> + <td>spark.locality.wait.process</td> + <td>spark.locality.wait</td> + <td> + Customize the locality wait for process locality. This affects tasks that attempt to access + cached data in a particular executor process. + </td> +</tr> +<tr> + <td>spark.locality.wait.node</td> + <td>spark.locality.wait</td> + <td> + Customize the locality wait for node locality. For example, you can set this to 0 to skip + node locality and search immediately for rack locality (if your cluster has rack information). + </td> +</tr> +<tr> + <td>spark.locality.wait.rack</td> + <td>spark.locality.wait</td> + <td> + Customize the locality wait for rack locality. </td> </tr> <tr> @@ -280,7 +286,7 @@ Apart from these, the following properties are also available, and may be useful </tr> <tr> <td>spark.cleaner.ttl</td> - <td>(disable)</td> + <td>(infinite)</td> <td> Duration (seconds) of how long Spark will remember any metadata (stages generated, tasks generated, etc.). Periodic cleanups will ensure that metadata older than this duration will be forgetten. This is @@ -295,9 +301,43 @@ Apart from these, the following properties are also available, and may be useful Duration (milliseconds) of how long to batch new objects coming from network receivers. </td> </tr> +<tr> + <td>spark.task.maxFailures</td> + <td>4</td> + <td> + Number of individual task failures before giving up on the job. + Should be greater than or equal to 1. Number of allowed retries = this value - 1. + </td> +</tr> </table> +# Environment Variables + +Certain Spark settings can also be configured through environment variables, which are read from the `conf/spark-env.sh` +script in the directory where Spark is installed. These variables are meant to be for machine-specific settings, such +as library search paths. While Java system properties can also be set here, for application settings, we recommend setting +these properties within the application instead of in `spark-env.sh` so that different applications can use different +settings. + +Note that `conf/spark-env.sh` does not exist by default when Spark is installed. However, you can copy +`conf/spark-env.sh.template` to create it. Make sure you make the copy executable. + +The following variables can be set in `spark-env.sh`: + +* `SPARK_LOCAL_IP`, to configure which IP address of the machine to bind to. +* `SPARK_LIBRARY_PATH`, to add search directories for native libraries. +* `SPARK_CLASSPATH`, to add elements to Spark's classpath that you want to be present for _all_ applications. + Note that applications can also add dependencies for themselves through `SparkContext.addJar` -- we recommend + doing that when possible. +* `SPARK_JAVA_OPTS`, to add JVM options. This includes Java options like garbage collector settings and any system + properties that you'd like to pass with `-D` (e.g., `-Dspark.local.dir=/disk1,/disk2`). +* Options for the Spark [standalone cluster scripts](spark-standalone.html#cluster-launch-scripts), such as number of cores + to use on each machine and maximum memory. + +Since `spark-env.sh` is a shell script, some of these can be set programmatically -- for example, you might +compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface. + # Configuring Logging Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a `log4j.properties` diff --git a/docs/css/bootstrap.min.css b/docs/css/bootstrap.min.css index eb48138e08..3444ed79e2 100644 --- a/docs/css/bootstrap.min.css +++ b/docs/css/bootstrap.min.css @@ -6,4 +6,4 @@ * http://www.apache.org/licenses/LICENSE-2.0 * * Designed and built with all the love in the world @twitter by @mdo and @fat. - */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0098cc;background-color:#0098cc;background-image:-moz-linear-gradient(top,#0098cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0098cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0098cc,#0087b3);background-image:-o-linear-gradient(top,#0098cc,#0087b3);background-image:linear-gradient(to bottom,#0098cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0098cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0098cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0098cc,#0087b3);background-image:-moz-linear-gradient(top,#0098cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0098cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0098cc,#0087b3);background-image:-o-linear-gradient(top,#0098cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0098cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#e2f2e2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e2f2e2));background-image:-webkit-linear-gradient(top,#fff,#e2f2e2);background-image:-o-linear-gradient(top,#fff,#e2f2e2);background-image:linear-gradient(to bottom,#fff,#e2f2e2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffddff',endColorstr='#ffe2f2e2',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed} + */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0088cc;background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#d6f2e2);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#d6f2e2));background-image:-webkit-linear-gradient(top,#fff,#d2e2f2);background-image:-o-linear-gradient(top,#fff,#d2e2f2);background-image:linear-gradient(to bottom,#fff,#d2e2f2);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffd2e2f2',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed} diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md index eab8a0ff20..da0c06e2a6 100644 --- a/docs/ec2-scripts.md +++ b/docs/ec2-scripts.md @@ -4,10 +4,11 @@ title: Running Spark on EC2 --- The `spark-ec2` script, located in Spark's `ec2` directory, allows you -to launch, manage and shut down Spark clusters on Amazon EC2. It automatically sets up Mesos, Spark and HDFS -on the cluster for you. -This guide describes how to use `spark-ec2` to launch clusters, how to run jobs on them, and how to shut them down. -It assumes you've already signed up for an EC2 account on the [Amazon Web Services site](http://aws.amazon.com/). +to launch, manage and shut down Spark clusters on Amazon EC2. It automatically +sets up Spark, Shark and HDFS on the cluster for you. This guide describes +how to use `spark-ec2` to launch clusters, how to run jobs on them, and how +to shut them down. It assumes you've already signed up for an EC2 account +on the [Amazon Web Services site](http://aws.amazon.com/). `spark-ec2` is designed to manage multiple named clusters. You can launch a new cluster (telling the script its size and giving it a name), @@ -59,18 +60,22 @@ RAM). Refer to the Amazon pages about [EC2 instance types](http://aws.amazon.com/ec2/instance-types) and [EC2 pricing](http://aws.amazon.com/ec2/#pricing) for information about other instance types. +- `--region=<EC2_REGION>` specifies an EC2 region in which to launch +instances. The default region is `us-east-1`. - `--zone=<EC2_ZONE>` can be used to specify an EC2 availability zone to launch instances in. Sometimes, you will get an error because there is not enough capacity in one zone, and you should try to launch in -another. This happens mostly with the `m1.large` instance types; -extra-large (both `m1.xlarge` and `c1.xlarge`) instances tend to be more -available. +another. - `--ebs-vol-size=GB` will attach an EBS volume with a given amount of space to each node so that you can have a persistent HDFS cluster on your nodes across cluster restarts (see below). - `--spot-price=PRICE` will launch the worker nodes as [Spot Instances](http://aws.amazon.com/ec2/spot-instances/), bidding for the given maximum price (in dollars). +- `--spark-version=VERSION` will pre-load the cluster with the + specified version of Spark. VERSION can be a version number + (e.g. "0.7.3") or a specific git hash. By default, a recent + version will be used. - If one of your launches fails due to e.g. not having the right permissions on your private key file, you can run `launch` with the `--resume` option to restart the setup process on an existing cluster. @@ -99,9 +104,8 @@ permissions on your private key file, you can run `launch` with the `spark-ec2` to attach a persistent EBS volume to each node for storing the persistent HDFS. - Finally, if you get errors while running your jobs, look at the slave's logs - for that job inside of the Mesos work directory (/mnt/mesos-work). You can - also view the status of the cluster using the Mesos web UI - (`http://<master-hostname>:8080`). + for that job inside of the scheduler work directory (/root/spark/work). You can + also view the status of the cluster using the web UI: `http://<master-hostname>:8080`. # Configuration @@ -140,22 +144,14 @@ section. # Limitations -- `spark-ec2` currently only launches machines in the US-East region of EC2. - It should not be hard to make it launch VMs in other zones, but you will need - to create your own AMIs in them. - Support for "cluster compute" nodes is limited -- there's no way to specify a locality group. However, you can launch slave nodes in your `<clusterName>-slaves` group manually and then use `spark-ec2 launch --resume` to start a cluster with them. -- Support for spot instances is limited. If you have a patch or suggestion for one of these limitations, feel free to [contribute](contributing-to-spark.html) it! -# Using a Newer Spark Version - -The Spark EC2 machine images may not come with the latest version of Spark. To use a newer version, you can run `git pull` to pull in `/root/spark` to pull in the latest version of Spark from `git`, and build it using `sbt/sbt compile`. You will also need to copy it to all the other nodes in the cluster using `~/spark-ec2/copy-dir /root/spark`. - # Accessing Data in S3 Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3). diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md new file mode 100644 index 0000000000..e5f054cb14 --- /dev/null +++ b/docs/hardware-provisioning.md @@ -0,0 +1,69 @@ +--- +layout: global +title: Hardware Provisioning +--- + +A common question received by Spark developers is how to configure hardware for it. While the right +hardware will depend on the situation, we make the following recommendations. + +# Storage Systems + +Because most Spark jobs will likely have to read input data from an external storage system (e.g. +the Hadoop File System, or HBase), it is important to place it **as close to this system as +possible**. We recommend the following: + +* If at all possible, run Spark on the same nodes as HDFS. The simplest way is to set up a Spark +[standalone mode cluster](spark-standalone.html) on the same nodes, and configure Spark and +Hadoop's memory and CPU usage to avoid interference (for Hadoop, the relevant options are +`mapred.child.java.opts` for the per-task memory and `mapred.tasktracker.map.tasks.maximum` +and `mapred.tasktracker.reduce.tasks.maximum` for number of tasks). Alternatively, you can run +Hadoop and Spark on a common cluster manager like [Mesos](running-on-mesos.html) or +[Hadoop YARN](running-on-yarn.html). + +* If this is not possible, run Spark on different nodes in the same local-area network as HDFS. + +* For low-latency data stores like HBase, it may be preferrable to run computing jobs on different +nodes than the storage system to avoid interference. + +# Local Disks + +While Spark can perform a lot of its computation in memory, it still uses local disks to store +data that doesn't fit in RAM, as well as to preserve intermediate output between stages. We +recommend having **4-8 disks** per node, configured _without_ RAID (just as separate mount points). +In Linux, mount the disks with the [`noatime` option](http://www.centos.org/docs/5/html/Global_File_System/s2-manage-mountnoatime.html) +to reduce unnecessary writes. In Spark, [configure](configuration.html) the `spark.local.dir` +variable to be a comma-separated list of the local disks. If you are running HDFS, it's fine to +use the same disks as HDFS. + +# Memory + +In general, Spark can run well with anywhere from **8 GB to hundreds of gigabytes** of memory per +machine. In all cases, we recommend allocating only at most 75% of the memory for Spark; leave the +rest for the operating system and buffer cache. + +How much memory you will need will depend on your application. To determine how much your +application uses for a certain dataset size, load part of your dataset in a Spark RDD and use the +Storage tab of Spark's monitoring UI (`http://<driver-node>:3030`) to see its size in memory. +Note that memory usage is greatly affected by storage level and serialization format -- see +the [tuning guide](tuning.html) for tips on how to reduce it. + +Finally, note that the Java VM does not always behave well with more than 200 GB of RAM. If you +purchase machines with more RAM than this, you can run _multiple worker JVMs per node_. In +Spark's [standalone mode](spark-standalone.html), you can set the number of workers per node +with the `SPARK_WORKER_INSTANCES` variable in `conf/spark-env.sh`, and the number of cores +per worker with `SPARK_WORKER_CORES`. + +# Network + +In our experience, when the data is in memory, a lot of Spark applications are network-bound. +Using a **10 Gigabit** or higher network is the best way to make these applications faster. +This is especially true for "distributed reduce" applications such as group-bys, reduce-bys, and +SQL joins. In any given application, you can see how much data Spark shuffles across the network +from the application's monitoring UI (`http://<driver-node>:3030`). + +# CPU Cores + +Spark scales well to tens of CPU cores per machine because it performes minimal sharing between +threads. You should likely provision at least **8-16 cores** per machine. Depending on the CPU +cost of your workload, you may also need more: once data is in memory, most applications are +either CPU- or network-bound. diff --git a/docs/img/incubator-logo.png b/docs/img/incubator-logo.png Binary files differnew file mode 100644 index 0000000000..33ca7f6227 --- /dev/null +++ b/docs/img/incubator-logo.png diff --git a/docs/index.md b/docs/index.md index 0c4add45dc..3cf9cc1c64 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,50 +3,52 @@ layout: global title: Spark Overview --- -Spark is a MapReduce-like cluster computing framework designed for low-latency iterative jobs and interactive use from an interpreter. -It provides clean, language-integrated APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html), with a rich array of parallel operators. +Apache Spark is a fast and general-purpose cluster computing system. +It provides high-level APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html) that make parallel jobs easy to write, and an optimized engine that supports general computation graphs. Spark can run on the Apache Mesos cluster manager, Hadoop YARN, Amazon EC2, or without an independent resource manager ("standalone mode"). # Downloading -Get Spark by visiting the [downloads page](http://spark-project.org/downloads.html) of the Spark website. This documentation is for Spark version {{site.SPARK_VERSION}}. +Get Spark by visiting the [downloads page](http://spark.incubator.apache.org/downloads.html) of the Apache Spark site. This documentation is for Spark version {{site.SPARK_VERSION}}. # Building -Spark requires [Scala {{site.SCALA_VERSION}}](http://www.scala-lang.org/). You will need to have Scala's `bin` directory in your `PATH`, -or you will need to set the `SCALA_HOME` environment variable to point -to where you've installed Scala. Scala must also be accessible through one -of these methods on slave nodes on your cluster. - Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with it. To compile the code, go into the top-level Spark directory and run - sbt/sbt package + sbt/sbt assembly -Spark also supports building using Maven. If you would like to build using Maven, see the [instructions for building Spark with Maven](building-with-maven.html). +For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/). # Testing the Build -Spark comes with a number of sample programs in the `examples` directory. -To run one of the samples, use `./run <class> <params>` in the top-level Spark directory -(the `run` script sets up the appropriate paths and launches that program). -For example, `./run spark.examples.SparkPi` will run a sample program that estimates Pi. Each of the -examples prints usage help if no params are given. +Spark comes with several sample programs in the `examples` directory. +To run one of the samples, use `./run-example <class> <params>` in the top-level Spark directory +(the `run-example` script sets up the appropriate paths and launches that program). +For example, try `./run-example org.apache.spark.examples.SparkPi local`. +Each example prints usage help when run with no parameters. Note that all of the sample programs take a `<master>` parameter specifying the cluster URL to connect to. This can be a [URL for a distributed cluster](scala-programming-guide.html#master-urls), or `local` to run locally with one thread, or `local[N]` to run locally with N threads. You should start by using `local` for testing. -Finally, Spark can be used interactively from a modified version of the Scala interpreter that you can start through -`./spark-shell`. This is a great way to learn Spark. +Finally, Spark can be used interactively through modified versions of the Scala shell (`./spark-shell`) or +Python interpreter (`./pyspark`). These are a great way to learn Spark. # A Note About Hadoop Versions -Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported +Spark uses the Hadoop-client library to talk to HDFS and other Hadoop-supported storage systems. Because the HDFS protocol has changed in different versions of -Hadoop, you must build Spark against the same version that your cluster runs. -You can change the version by setting the `HADOOP_VERSION` variable at the top -of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`). +Hadoop, you must build Spark against the same version that your cluster uses. +By default, Spark links to Hadoop 1.0.4. You can change this by setting the +`SPARK_HADOOP_VERSION` variable when compiling: + + SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly + +In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set +`SPARK_YARN` to `true`: + + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly # Where to Go from Here @@ -54,15 +56,20 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`). * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here! * [Spark Programming Guide](scala-programming-guide.html): an overview of Spark concepts, and details on the Scala API -* [Java Programming Guide](java-programming-guide.html): using Spark from Java -* [Python Programming Guide](python-programming-guide.html): using Spark from Python -* [Spark Streaming Guide](streaming-programming-guide.html): using the alpha release of Spark Streaming + * [Java Programming Guide](java-programming-guide.html): using Spark from Java + * [Python Programming Guide](python-programming-guide.html): using Spark from Python +* [Spark Streaming](streaming-programming-guide.html): using the alpha release of Spark Streaming +* [MLlib (Machine Learning)](mllib-guide.html): Spark's built-in machine learning library +* [Bagel (Pregel on Spark)](bagel-programming-guide.html): simple graph processing model **API Docs:** -* [Spark Java/Scala (Scaladoc)](api/core/index.html) -* [Spark Python (Epydoc)](api/pyspark/index.html) -* [Spark Streaming Java/Scala (Scaladoc)](api/streaming/index.html) +* [Spark for Java/Scala (Scaladoc)](api/core/index.html) +* [Spark for Python (Epydoc)](api/pyspark/index.html) +* [Spark Streaming for Java/Scala (Scaladoc)](api/streaming/index.html) +* [MLlib (Machine Learning) for Java/Scala (Scaladoc)](api/mllib/index.html) +* [Bagel (Pregel on Spark) for Scala (Scaladoc)](api/bagel/index.html) + **Deployment guides:** @@ -74,27 +81,27 @@ of `project/SparkBuild.scala`, then rebuilding Spark (`sbt/sbt clean compile`). **Other documents:** -* [Building Spark With Maven](building-with-maven.html): Build Spark using the Maven build tool * [Configuration](configuration.html): customize Spark via its configuration system * [Tuning Guide](tuning.html): best practices to optimize performance and memory use -* [Bagel](bagel-programming-guide.html): an implementation of Google's Pregel on Spark +* [Hardware Provisioning](hardware-provisioning.html): recommendations for cluster hardware +* [Building Spark with Maven](building-with-maven.html): Build Spark using the Maven build tool * [Contributing to Spark](contributing-to-spark.html) **External resources:** -* [Spark Homepage](http://www.spark-project.org) -* [Mailing List](http://groups.google.com/group/spark-users): ask questions about Spark here -* [AMP Camp](http://ampcamp.berkeley.edu/): a two-day training camp at UC Berkeley that featured talks and exercises - about Spark, Shark, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/agenda-2012), +* [Spark Homepage](http://spark.incubator.apache.org) +* [Mailing Lists](http://spark.incubator.apache.org/mailing-lists.html): ask questions about Spark here +* [AMP Camps](http://ampcamp.berkeley.edu/): a series of training camps at UC Berkeley that featured talks and + exercises about Spark, Shark, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/agenda-2012), [slides](http://ampcamp.berkeley.edu/agenda-2012) and [exercises](http://ampcamp.berkeley.edu/exercises-2012) are available online for free. -* [Code Examples](http://spark-project.org/examples.html): more are also available in the [examples subfolder](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/examples) of Spark +* [Code Examples](http://spark.incubator.apache.org/examples.html): more are also available in the [examples subfolder](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/) of Spark * [Paper Describing Spark](http://www.cs.berkeley.edu/~matei/papers/2012/nsdi_spark.pdf) * [Paper Describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf) # Community -To get help using Spark or keep up with Spark development, sign up for the [spark-users mailing list](http://groups.google.com/group/spark-users). +To get help using Spark or keep up with Spark development, sign up for the [user mailing list](http://spark.incubator.apache.org/mailing-lists.html). If you're in the San Francisco Bay Area, there's a regular [Spark meetup](http://www.meetup.com/spark-users/) every few weeks. Come by to meet the developers and other users. diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md index ae8257b539..53085cc671 100644 --- a/docs/java-programming-guide.md +++ b/docs/java-programming-guide.md @@ -10,9 +10,9 @@ easy to follow even if you don't know Scala. This guide will show how to use the Spark features described there in Java. The Spark Java API is defined in the -[`spark.api.java`](api/core/index.html#spark.api.java.package) package, and includes -a [`JavaSparkContext`](api/core/index.html#spark.api.java.JavaSparkContext) for -initializing Spark and [`JavaRDD`](api/core/index.html#spark.api.java.JavaRDD) classes, +[`org.apache.spark.api.java`](api/core/index.html#org.apache.spark.api.java.package) package, and includes +a [`JavaSparkContext`](api/core/index.html#org.apache.spark.api.java.JavaSparkContext) for +initializing Spark and [`JavaRDD`](api/core/index.html#org.apache.spark.api.java.JavaRDD) classes, which support the same methods as their Scala counterparts but take Java functions and return Java data and collection types. The main differences have to do with passing functions to RDD operations (e.g. map) and handling RDDs of different types, as discussed next. @@ -23,12 +23,12 @@ There are a few key differences between the Java and Scala APIs: * Java does not support anonymous or first-class functions, so functions must be implemented by extending the - [`spark.api.java.function.Function`](api/core/index.html#spark.api.java.function.Function), - [`Function2`](api/core/index.html#spark.api.java.function.Function2), etc. + [`org.apache.spark.api.java.function.Function`](api/core/index.html#org.apache.spark.api.java.function.Function), + [`Function2`](api/core/index.html#org.apache.spark.api.java.function.Function2), etc. classes. * To maintain type safety, the Java API defines specialized Function and RDD classes for key-value pairs and doubles. For example, - [`JavaPairRDD`](api/core/index.html#spark.api.java.JavaPairRDD) + [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD) stores key-value pairs. * RDD methods like `collect()` and `countByKey()` return Java collections types, such as `java.util.List` and `java.util.Map`. @@ -44,8 +44,8 @@ In the Scala API, these methods are automatically added using Scala's [implicit conversions](http://www.scala-lang.org/node/130) mechanism. In the Java API, the extra methods are defined in the -[`JavaPairRDD`](api/core/index.html#spark.api.java.JavaPairRDD) -and [`JavaDoubleRDD`](api/core/index.html#spark.api.java.JavaDoubleRDD) +[`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD) +and [`JavaDoubleRDD`](api/core/index.html#org.apache.spark.api.java.JavaDoubleRDD) classes. RDD methods like `map` are overloaded by specialized `PairFunction` and `DoubleFunction` classes, allowing them to return RDDs of the appropriate types. Common methods like `filter` and `sample` are implemented by @@ -75,7 +75,7 @@ class has a single abstract method, `call()`, that must be implemented. ## Storage Levels RDD [storage level](scala-programming-guide.html#rdd-persistence) constants, such as `MEMORY_AND_DISK`, are -declared in the [spark.api.java.StorageLevels](api/core/index.html#spark.api.java.StorageLevels) class. To +declared in the [org.apache.spark.api.java.StorageLevels](api/core/index.html#org.apache.spark.api.java.StorageLevels) class. To define your own storage level, you can use StorageLevels.create(...). @@ -92,8 +92,8 @@ The Java API supports other Spark features, including As an example, we will implement word count using the Java API. {% highlight java %} -import spark.api.java.*; -import spark.api.java.function.*; +import org.apache.spark.api.java.*; +import org.apache.spark.api.java.function.*; JavaSparkContext sc = new JavaSparkContext(...); JavaRDD<String> lines = ctx.textFile("hdfs://..."); @@ -179,7 +179,7 @@ just a matter of style. # Javadoc We currently provide documentation for the Java API as Scaladoc, in the -[`spark.api.java` package](api/core/index.html#spark.api.java.package), because +[`org.apache.spark.api.java` package](api/core/index.html#org.apache.spark.api.java.package), because some of the classes are implemented in Scala. The main downside is that the types and function definitions show Scala syntax (for example, `def reduce(func: Function2[T, T]): T` instead of `T reduce(Function2<T, T> func)`). @@ -189,7 +189,10 @@ We hope to generate documentation with Java-style syntax in the future. # Where to Go from Here Spark includes several sample programs using the Java API in -[`examples/src/main/java`](https://github.com/mesos/spark/tree/master/examples/src/main/java/spark/examples). You can run them by passing the class name to the -`run` script included in Spark -- for example, `./run -spark.examples.JavaWordCount`. Each example program prints usage help when run +[`examples/src/main/java`](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/examples). You can run them by passing the class name to the +`run-example` script included in Spark; for example: + + ./run-example org.apache.spark.examples.JavaWordCount + +Each example program prints usage help when run without any arguments. diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md new file mode 100644 index 0000000000..c897f8b36c --- /dev/null +++ b/docs/mllib-guide.md @@ -0,0 +1,6 @@ +--- +layout: global +title: Machine Learning Library (MLlib) +--- + +Coming soon. diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md index e8aaac74d0..8a539fe774 100644 --- a/docs/python-programming-guide.md +++ b/docs/python-programming-guide.md @@ -10,14 +10,15 @@ To learn the basics of Spark, we recommend reading through the easy to follow even if you don't know Scala. This guide will show how to use the Spark features described there in Python. + # Key Differences in the Python API There are a few key differences between the Python and Scala APIs: * Python is dynamically typed, so RDDs can hold objects of different types. * PySpark does not currently support the following Spark features: - - Special functions on RDDs of doubles, such as `mean` and `stdev` - - `lookup`, `sample` and `sort` + - `lookup` + - `sort` - `persist` at storage levels other than `MEMORY_ONLY` - Execution on Windows -- this is slated for a future release @@ -50,6 +51,7 @@ PySpark will automatically ship these functions to workers, along with any objec Instances of classes will be serialized and shipped to workers by PySpark, but classes themselves cannot be automatically distributed to workers. The [Standalone Use](#standalone-use) section describes how to ship code dependencies to workers. + # Installing and Configuring PySpark PySpark requires Python 2.6 or higher. @@ -68,7 +70,7 @@ The script automatically adds the `pyspark` package to the `PYTHONPATH`. The `pyspark` script launches a Python interpreter that is configured to run PySpark jobs. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options: {% highlight bash %} -$ sbt/sbt package +$ sbt/sbt assembly $ ./pyspark {% endhighlight %} @@ -81,16 +83,41 @@ The Python shell can be used explore data interactively and is a simple way to l >>> help(pyspark) # Show all pyspark functions {% endhighlight %} -By default, the `pyspark` shell creates SparkContext that runs jobs locally. -To connect to a non-local cluster, set the `MASTER` environment variable. +By default, the `pyspark` shell creates SparkContext that runs jobs locally on a single core. +To connect to a non-local cluster, or use multiple cores, set the `MASTER` environment variable. For example, to use the `pyspark` shell with a [standalone Spark cluster](spark-standalone.html): {% highlight bash %} $ MASTER=spark://IP:PORT ./pyspark {% endhighlight %} +Or, to use four cores on the local machine: + +{% highlight bash %} +$ MASTER=local[4] ./pyspark +{% endhighlight %} + + +## IPython + +It is also possible to launch PySpark in [IPython](http://ipython.org), the enhanced Python interpreter. +To do this, simply set the `IPYTHON` variable to `1` when running `pyspark`: + +{% highlight bash %} +$ IPYTHON=1 ./pyspark +{% endhighlight %} + +Alternatively, you can customize the `ipython` command by setting `IPYTHON_OPTS`. For example, to launch +the [IPython Notebook](http://ipython.org/notebook.html) with PyLab graphing support: + +{% highlight bash %} +$ IPYTHON_OPTS="notebook --pylab inline" ./pyspark +{% endhighlight %} + +IPython also works on a cluster or on multiple cores if you set the `MASTER` environment variable. + -# Standalone Use +# Standalone Programs PySpark can also be used from standalone Python scripts by creating a SparkContext in your script and running the script using `pyspark`. The Quick Start guide includes a [complete example](quick-start.html#a-standalone-job-in-python) of a standalone Python job. @@ -105,10 +132,14 @@ sc = SparkContext("local", "Job Name", pyFiles=['MyFile.py', 'lib.zip', 'app.egg Files listed here will be added to the `PYTHONPATH` and shipped to remote worker machines. Code dependencies can be added to an existing SparkContext using its `addPyFile()` method. + # Where to Go from Here -PySpark includes several sample programs in the [`python/examples` folder](https://github.com/mesos/spark/tree/master/python/examples). -You can run them by passing the files to the `pyspark` script -- for example `./pyspark python/examples/wordcount.py`. +PySpark includes several sample programs in the [`python/examples` folder](https://github.com/apache/incubator-spark/tree/master/python/examples). +You can run them by passing the files to the `pyspark` script; e.g.: + + ./pyspark python/examples/wordcount.py + Each program prints usage help when run without arguments. We currently provide [API documentation](api/pyspark/index.html) for the Python API as Epydoc. diff --git a/docs/quick-start.md b/docs/quick-start.md index 335643536a..70c3df8095 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -12,7 +12,7 @@ See the [programming guide](scala-programming-guide.html) for a more complete re To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run: {% highlight bash %} -$ sbt/sbt package +$ sbt/sbt assembly {% endhighlight %} # Interactive Analysis with the Spark Shell @@ -53,7 +53,7 @@ scala> textFile.filter(line => line.contains("Spark")).count() // How many lines res3: Long = 15 {% endhighlight %} -## More On RDD Operations +## More on RDD Operations RDD actions and transformations can be used for more complex computations. Let's say we want to find the line with the most words: {% highlight scala %} @@ -108,8 +108,8 @@ We'll create a very simple Spark job in Scala. So simple, in fact, that it's nam {% highlight scala %} /*** SimpleJob.scala ***/ -import spark.SparkContext -import SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ object SimpleJob { def main(args: Array[String]) { @@ -126,7 +126,7 @@ object SimpleJob { This job simply counts the number of lines containing 'a' and the number containing 'b' in the Spark README. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. Unlike the earlier examples with the Spark shell, which initializes its own SparkContext, we initialize a SparkContext as part of the job. We pass the SparkContext constructor four arguments, the type of scheduler we want to use (in this case, a local scheduler), a name for the job, the directory where Spark is installed, and a name for the jar file containing the job's sources. The final two arguments are needed in a distributed setting, where Spark is running across several nodes, so we include them for completeness. Spark will automatically ship the jar files you list to slave nodes. -This file depends on the Spark API, so we'll also include an sbt configuration file, `simple.sbt` which explains that Spark is a dependency. This file also adds two repositories which host Spark dependencies: +This file depends on the Spark API, so we'll also include an sbt configuration file, `simple.sbt` which explains that Spark is a dependency. This file also adds a repository that Spark depends on: {% highlight scala %} name := "Simple Project" @@ -135,14 +135,18 @@ version := "1.0" scalaVersion := "{{site.SCALA_VERSION}}" -libraryDependencies += "org.spark-project" %% "spark-core" % "{{site.SPARK_VERSION}}" +libraryDependencies += "org.apache.spark" %% "spark-core" % "{{site.SPARK_VERSION}}" -resolvers ++= Seq( - "Akka Repository" at "http://repo.akka.io/releases/", - "Spray Repository" at "http://repo.spray.cc/") +resolvers += "Akka Repository" at "http://repo.akka.io/releases/" {% endhighlight %} -Of course, for sbt to work correctly, we'll need to layout `SimpleJob.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the job's code, then use `sbt run` to execute our example job. +If you also wish to read data from Hadoop's HDFS, you will also need to add a dependency on `hadoop-client` for your version of HDFS: + +{% highlight scala %} +libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "<your-hdfs-version>" +{% endhighlight %} + +Finally, for sbt to work correctly, we'll need to layout `SimpleJob.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the job's code, then use `sbt run` to execute our example job. {% highlight bash %} $ find . @@ -159,8 +163,6 @@ $ sbt run Lines with a: 46, Lines with b: 23 {% endhighlight %} -This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS. - # A Standalone Job In Java Now say we wanted to write a standalone job using the Java API. We will walk through doing this with Maven. If you are using other build systems, consider using the Spark assembly JAR described in the developer guide. @@ -168,8 +170,8 @@ We'll create a very simple Spark job, `SimpleJob.java`: {% highlight java %} /*** SimpleJob.java ***/ -import spark.api.java.*; -import spark.api.java.function.Function; +import org.apache.spark.api.java.*; +import org.apache.spark.api.java.function.Function; public class SimpleJob { public static void main(String[] args) { @@ -205,17 +207,13 @@ To build the job, we also write a Maven `pom.xml` file that lists Spark as a dep <version>1.0</version> <repositories> <repository> - <id>Spray.cc repository</id> - <url>http://repo.spray.cc</url> - </repository> - <repository> <id>Akka repository</id> <url>http://repo.akka.io/releases</url> </repository> </repositories> <dependencies> <dependency> <!-- Spark dependency --> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-core_{{site.SCALA_VERSION}}</artifactId> <version>{{site.SPARK_VERSION}}</version> </dependency> @@ -223,6 +221,16 @@ To build the job, we also write a Maven `pom.xml` file that lists Spark as a dep </project> {% endhighlight %} +If you also wish to read data from Hadoop's HDFS, you will also need to add a dependency on `hadoop-client` for your version of HDFS: + +{% highlight xml %} + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>...</version> + </dependency> +{% endhighlight %} + We lay out these files according to the canonical Maven directory structure: {% highlight bash %} $ find . @@ -242,8 +250,6 @@ $ mvn exec:java -Dexec.mainClass="SimpleJob" Lines with a: 46, Lines with b: 23 {% endhighlight %} -This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS. - # A Standalone Job In Python Now we will show how to write a standalone job using the Python API (PySpark). @@ -280,4 +286,33 @@ $ ./pyspark SimpleJob.py Lines with a: 46, Lines with b: 23 {% endhighlight python %} -This example only runs the job locally; for a tutorial on running jobs across several machines, see the [Standalone Mode](spark-standalone.html) documentation, and consider using a distributed input source, such as HDFS. +# Running Jobs on a Cluster + +There are a few additional considerations when running jobs on a +[Spark](spark-standalone.html), [YARN](running-on-yarn.html), or +[Mesos](running-on-mesos.html) cluster. + +### Including Your Dependencies +If your code depends on other projects, you will need to ensure they are also +present on the slave nodes. A popular approach is to create an +assembly jar (or "uber" jar) containing your code and its dependencies. Both +[sbt](https://github.com/sbt/sbt-assembly) and +[Maven](http://maven.apache.org/plugins/maven-assembly-plugin/) +have assembly plugins. When creating assembly jars, list Spark +itself as a `provided` dependency; it need not be bundled since it is +already present on the slaves. Once you have an assembled jar, +add it to the SparkContext as shown here. It is also possible to submit +your dependent jars one-by-one when creating a SparkContext. + +### Setting Configuration Options +Spark includes several configuration options which influence the behavior +of your job. These should be set as +[JVM system properties](configuration.html#system-properties) in your +program. The options will be captured and shipped to all slave nodes. + +### Accessing Hadoop Filesystems + +The examples here access a local file. To read data from a distributed +filesystem, such as HDFS, include +[Hadoop version information](index.html#a-note-about-hadoop-versions) +in your build file. By default, Spark builds against HDFS 1.0.4. diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index f4a3eb667c..b31f78e8bf 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -9,9 +9,8 @@ Spark can run on private clusters managed by the [Apache Mesos](http://incubator 2. Download Mesos {{site.MESOS_VERSION}} from a [mirror](http://www.apache.org/dyn/closer.cgi/incubator/mesos/mesos-{{site.MESOS_VERSION}}/). 3. Configure Mesos using the `configure` script, passing the location of your `JAVA_HOME` using `--with-java-home`. Mesos comes with "template" configure scripts for different platforms, such as `configure.macosx`, that you can run. See the README file in Mesos for other options. **Note:** If you want to run Mesos without installing it into the default paths on your system (e.g. if you don't have administrative privileges to install it), you should also pass the `--prefix` option to `configure` to tell it where to install. For example, pass `--prefix=/home/user/mesos`. By default the prefix is `/usr/local`. 4. Build Mesos using `make`, and then install it using `make install`. -5. Create a file called `spark-env.sh` in Spark's `conf` directory, by copying `conf/spark-env.sh.template`, and add the following lines in it: +5. Create a file called `spark-env.sh` in Spark's `conf` directory, by copying `conf/spark-env.sh.template`, and add the following lines it: * `export MESOS_NATIVE_LIBRARY=<path to libmesos.so>`. This path is usually `<prefix>/lib/libmesos.so` (where the prefix is `/usr/local` by default). Also, on Mac OS X, the library is called `libmesos.dylib` instead of `.so`. - * `export SCALA_HOME=<path to Scala directory>`. 6. Copy Spark and Mesos to the _same_ paths on all the nodes in the cluster (or, for Mesos, `make install` on every node). 7. Configure Mesos for deployment: * On your master node, edit `<prefix>/var/mesos/deploy/masters` to list your master and `<prefix>/var/mesos/deploy/slaves` to list the slaves, where `<prefix>` is the prefix where you installed Mesos (`/usr/local` by default). diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 66fb8d73e8..fe5334ffdc 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -3,50 +3,33 @@ layout: global title: Launching Spark on YARN --- -Experimental support for running over a [YARN (Hadoop +Support for running on [YARN (Hadoop NextGen)](http://hadoop.apache.org/docs/r2.0.2-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html) -cluster was added to Spark in version 0.6.0. This was merged into master as part of 0.7 effort. -To build spark core with YARN support, please use the hadoop2-yarn profile. -Ex: mvn -Phadoop2-yarn clean install +was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0. -# Building spark core consolidated jar. +# Building a YARN-Enabled Assembly JAR -We need a consolidated spark core jar (which bundles all the required dependencies) to run Spark jobs on a yarn cluster. -This can be built either through sbt or via maven. +We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster. +This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows: -- Building spark assembled jar via sbt. - It is a manual process of enabling it in project/SparkBuild.scala. -Please comment out the - HADOOP_VERSION, HADOOP_MAJOR_VERSION and HADOOP_YARN -variables before the line 'For Hadoop 2 YARN support' -Next, uncomment the subsequent 3 variable declaration lines (for these three variables) which enable hadoop yarn support. + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly -Assembly of the jar Ex: +The assembled JAR will be something like this: +`./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`. - ./sbt/sbt clean assembly - -The assembled jar would typically be something like : -`./core/target/spark-core-assembly-0.8.0-SNAPSHOT.jar` - - -- Building spark assembled jar via Maven. - Use the hadoop2-yarn profile and execute the package target. - -Something like this. Ex: - - mvn -Phadoop2-yarn clean package -DskipTests=true +# Preparations -This will build the shaded (consolidated) jar. Typically something like : -`./repl-bin/target/spark-repl-bin-<VERSION>-shaded-hadoop2-yarn.jar` +- Building a YARN-enabled assembly (see above). +- Your application code must be packaged into a separate JAR file. +If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. -# Preparations +# Configuration -- Building spark core assembled jar (see above). -- Your application code must be packaged into a separate JAR file. +Most of the configs are the same for Spark on YARN as other deploys. See the Configuration page for more information on those. These are configs that are specific to SPARK on YARN. -If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt package`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. +* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV="JAVA_HOME=/jdk64,FOO=bar"`. # Launching Spark on YARN @@ -55,7 +38,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t The command to launch the YARN Client is as follows: - SPARK_JAR=<SPARK_YAR_FILE> ./run spark.deploy.yarn.Client \ + SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class spark.deploy.yarn.Client \ --jar <YOUR_APP_JAR_FILE> \ --class <APP_MAIN_CLASS> \ --args <APP_MAIN_ARGUMENTS> \ @@ -63,12 +46,11 @@ The command to launch the YARN Client is as follows: --master-memory <MEMORY_FOR_MASTER> \ --worker-memory <MEMORY_PER_WORKER> \ --worker-cores <CORES_PER_WORKER> \ - --user <hadoop_user> \ --queue <queue_name> For example: - SPARK_JAR=./core/target/spark-core-assembly-{{site.SPARK_VERSION}}.jar ./run spark.deploy.yarn.Client \ + SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class spark.deploy.yarn.Client \ --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \ --class spark.examples.SparkPi \ --args yarn-standalone \ @@ -83,5 +65,4 @@ The above starts a YARN Client programs which periodically polls the Application - When your application instantiates a Spark context it must use a special "yarn-standalone" master url. This starts the scheduler without forcing it to connect to a cluster. A good way to handle this is to pass "yarn-standalone" as an argument to your program, as shown in the example above. - We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed. -- Currently, we have not yet integrated with hadoop security. If --user is present, the hadoop_user specified will be used to run the tasks on the cluster. If unspecified, current user will be used (which should be valid in cluster). - Once hadoop security support is added, and if hadoop cluster is enabled with security, additional restrictions would apply via delegation tokens passed. +- The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored. diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md index e9cf9ef36f..f7768e55fc 100644 --- a/docs/scala-programming-guide.md +++ b/docs/scala-programming-guide.md @@ -17,19 +17,27 @@ This guide shows each of these features and walks through some samples. It assum # Linking with Spark -To write a Spark application, you will need to add both Spark and its dependencies to your CLASSPATH. If you use sbt or Maven, Spark is available through Maven Central at: +Spark {{site.SPARK_VERSION}} uses Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you'll need to use this same version of Scala in your program -- newer major versions may not work. - groupId = org.spark-project +To write a Spark application, you need to add a dependency on Spark. If you use SBT or Maven, Spark is available through Maven Central at: + + groupId = org.apache.spark artifactId = spark-core_{{site.SCALA_VERSION}} version = {{site.SPARK_VERSION}} -For other build systems or environments, you can run `sbt/sbt assembly` to build both Spark and its dependencies into one JAR (`core/target/spark-core-assembly-0.6.0.jar`), then add this to your CLASSPATH. +In addition, if you wish to access an HDFS cluster, you need to add a dependency on `hadoop-client` for your version of HDFS: + + groupId = org.apache.hadoop + artifactId = hadoop-client + version = <your-hdfs-version> + +For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions). -In addition, you'll need to import some Spark classes and implicit conversions. Add the following lines at the top of your program: +Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines: {% highlight scala %} -import spark.SparkContext -import SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ {% endhighlight %} # Initializing Spark @@ -134,7 +142,7 @@ All transformations in Spark are <i>lazy</i>, in that they do not compute their By default, each transformed RDD is recomputed each time you run an action on it. However, you may also *persist* an RDD in memory using the `persist` (or `cache`) method, in which case Spark will keep the elements around on the cluster for much faster access the next time you query it. There is also support for persisting datasets on disk, or replicated across the cluster. The next section in this document describes these options. -The following tables list the transformations and actions currently supported (see also the [RDD API doc](api/core/index.html#spark.RDD) for details): +The following tables list the transformations and actions currently supported (see also the [RDD API doc](api/core/index.html#org.apache.spark.RDD) for details): ### Transformations @@ -203,7 +211,7 @@ The following tables list the transformations and actions currently supported (s </tr> </table> -A complete list of transformations is available in the [RDD API doc](api/core/index.html#spark.RDD). +A complete list of transformations is available in the [RDD API doc](api/core/index.html#org.apache.spark.RDD). ### Actions @@ -251,7 +259,7 @@ A complete list of transformations is available in the [RDD API doc](api/core/in </tr> </table> -A complete list of actions is available in the [RDD API doc](api/core/index.html#spark.RDD). +A complete list of actions is available in the [RDD API doc](api/core/index.html#org.apache.spark.RDD). ## RDD Persistence @@ -259,7 +267,7 @@ One of the most important capabilities in Spark is *persisting* (or *caching*) a You can mark an RDD to be persisted using the `persist()` or `cache()` methods on it. The first time it is computed in an action, it will be kept in memory on the nodes. The cache is fault-tolerant -- if any partition of an RDD is lost, it will automatically be recomputed using the transformations that originally created it. -In addition, each RDD can be stored using a different *storage level*, allowing you, for example, to persist the dataset on disk, or persist it in memory but as serialized Java objects (to save space), or even replicate it across nodes. These levels are chosen by passing a [`spark.storage.StorageLevel`](api/core/index.html#spark.storage.StorageLevel) object to `persist()`. The `cache()` method is a shorthand for using the default storage level, which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The complete set of available storage levels is: +In addition, each RDD can be stored using a different *storage level*, allowing you, for example, to persist the dataset on disk, or persist it in memory but as serialized Java objects (to save space), or even replicate it across nodes. These levels are chosen by passing a [`org.apache.spark.storage.StorageLevel`](api/core/index.html#org.apache.spark.storage.StorageLevel) object to `persist()`. The `cache()` method is a shorthand for using the default storage level, which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The complete set of available storage levels is: <table class="table"> <tr><th style="width:23%">Storage Level</th><th>Meaning</th></tr> @@ -310,7 +318,7 @@ We recommend going through the following process to select one: application). *All* the storage levels provide full fault tolerance by recomputing lost data, but the replicated ones let you continue running tasks on the RDD without waiting to recompute a lost partition. -If you want to define your own storage level (say, with replication factor of 3 instead of 2), then use the function factor method `apply()` of the [`StorageLevel`](api/core/index.html#spark.storage.StorageLevel$) singleton object. +If you want to define your own storage level (say, with replication factor of 3 instead of 2), then use the function factor method `apply()` of the [`StorageLevel`](api/core/index.html#org.apache.spark.storage.StorageLevel$) singleton object. # Shared Variables @@ -356,7 +364,11 @@ res2: Int = 10 # Where to Go from Here You can see some [example Spark programs](http://www.spark-project.org/examples.html) on the Spark website. -In addition, Spark includes several sample programs in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `run` script included in Spark -- for example, `./run spark.examples.SparkPi`. Each example program prints usage help when run without any arguments. +In addition, Spark includes several samples in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `run-example` script included in Spark; for example: + + ./run-example org.apache.spark.examples.SparkPi + +Each example program prints usage help when run without any arguments. For help on optimizing your program, the [configuration](configuration.html) and [tuning](tuning.html) guides provide information on best practices. They are especially important for diff --git a/docs/spark-debugger.md b/docs/spark-debugger.md index f6f0988858..d6315d97f4 100644 --- a/docs/spark-debugger.md +++ b/docs/spark-debugger.md @@ -2,7 +2,7 @@ layout: global title: The Spark Debugger --- -**Summary:** The Spark debugger provides replay debugging for deterministic (logic) errors in Spark programs. It's currently in development, but you can try it out in the [arthur branch](https://github.com/mesos/spark/tree/arthur). +**Summary:** The Spark debugger provides replay debugging for deterministic (logic) errors in Spark programs. It's currently in development, but you can try it out in the [arthur branch](https://github.com/apache/incubator-spark/tree/arthur). ## Introduction @@ -19,7 +19,7 @@ For deterministic errors, debugging a Spark program is now as easy as debugging ## Approach -As your Spark program runs, the slaves report key events back to the master -- for example, RDD creations, RDD contents, and uncaught exceptions. (A full list of event types is in [EventLogging.scala](https://github.com/mesos/spark/blob/arthur/core/src/main/scala/spark/EventLogging.scala).) The master logs those events, and you can load the event log into the debugger after your program is done running. +As your Spark program runs, the slaves report key events back to the master -- for example, RDD creations, RDD contents, and uncaught exceptions. (A full list of event types is in [EventLogging.scala](https://github.com/apache/incubator-spark/blob/arthur/core/src/main/scala/spark/EventLogging.scala).) The master logs those events, and you can load the event log into the debugger after your program is done running. _A note on nondeterminism:_ For fault recovery, Spark requires RDD transformations (for example, the function passed to `RDD.map`) to be deterministic. The Spark debugger also relies on this property, and it can also warn you if your transformation is nondeterministic. This works by checksumming the contents of each RDD and comparing the checksums from the original execution to the checksums after recomputing the RDD in the debugger. diff --git a/docs/spark-simple-tutorial.md b/docs/spark-simple-tutorial.md deleted file mode 100644 index 9875de62bd..0000000000 --- a/docs/spark-simple-tutorial.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -layout: global -title: Tutorial - Running a Simple Spark Application ---- - -1. Create directory for spark demo: - - ~$ mkdir SparkTest - -2. Copy the sbt files in ~/spark/sbt directory: - - ~/SparkTest$ cp -r ../spark/sbt . - -3. Edit the ~/SparkTest/sbt/sbt file to look like this: - - #!/bin/bash - java -Xmx800M -XX:MaxPermSize=150m -jar $(dirname $0)/sbt-launch-*.jar "$@" - -4. To build a Spark application, you need Spark and its dependencies in a single Java archive (JAR) file. Create this JAR in Spark's main directory with sbt as: - - ~/spark$ sbt/sbt assembly - -5. create a source file in ~/SparkTest/src/main/scala directory: - - ~/SparkTest/src/main/scala$ vi Test1.scala - -6. Make the contain of the Test1.scala file like this: - - import spark.SparkContext - import spark.SparkContext._ - object Test1 { - def main(args: Array[String]) { - val sc = new SparkContext("local", "SparkTest") - println(sc.parallelize(1 to 10).reduce(_ + _)) - System.exit(0) - } - } - -7. Run the Test1.scala file: - - ~/SparkTest$ sbt/sbt run diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 3986c0c79d..9ab6ba0830 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -3,32 +3,21 @@ layout: global title: Spark Standalone Mode --- -{% comment %} -TODO(andyk): - - Add a table of contents - - Move configuration towards the end so that it doesn't come first - - Say the scripts will guess the resource amounts (i.e. # cores) automatically -{% endcomment %} - -In addition to running on top of [Mesos](https://github.com/mesos/mesos), Spark also supports a standalone mode, consisting of one Spark master and several Spark worker processes. You can run the Spark standalone mode either locally (for testing) or on a cluster. If you wish to run on a cluster, we have provided [a set of deploy scripts](#cluster-launch-scripts) to launch a whole cluster. - -# Getting Started - -Compile Spark with `sbt package` as described in the [Getting Started Guide](index.html). You do not need to install Mesos on your machine if you are using the standalone mode. +In addition to running on the Mesos or YARN cluster managers, Spark also provides a simple standalone deploy mode. You can launch a standalone cluster either manually, by starting a master and workers by hand, or use our provided [deploy scripts](#cluster-launch-scripts). It is also possible to run these daemons on a single machine for testing. # Starting a Cluster Manually You can start a standalone master server by executing: - ./run spark.deploy.master.Master + ./spark-class spark.deploy.master.Master -Once started, the master will print out a `spark://IP:PORT` URL for itself, which you can use to connect workers to it, -or pass as the "master" argument to `SparkContext` to connect a job to the cluster. You can also find this URL on +Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it, +or pass as the "master" argument to `SparkContext`. You can also find this URL on the master's web UI, which is [http://localhost:8080](http://localhost:8080) by default. Similarly, you can start one or more workers and connect them to the master via: - ./run spark.deploy.worker.Worker spark://IP:PORT + ./spark-class spark.deploy.worker.Worker spark://IP:PORT Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default). You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS). @@ -43,7 +32,7 @@ Finally, the following configuration options can be passed to the master and wor </tr> <tr> <td><code>-p PORT</code>, <code>--port PORT</code></td> - <td>IP address or DNS name to listen on (default: 7077 for master, random for worker)</td> + <td>Port for service to listen on (default: 7077 for master, random for worker)</td> </tr> <tr> <td><code>--webui-port PORT</code></td> @@ -68,7 +57,7 @@ Finally, the following configuration options can be passed to the master and wor To launch a Spark standalone cluster with the deploy scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file. -Once you've set up this fine, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`: +Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`: - `bin/start-master.sh` - Starts a master instance on the machine the script is executed on. - `bin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file. @@ -85,47 +74,56 @@ You can optionally configure the cluster further by setting environment variable <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr> <tr> <td><code>SPARK_MASTER_IP</code></td> - <td>Bind the master to a specific IP address, for example a public one</td> + <td>Bind the master to a specific IP address, for example a public one.</td> </tr> <tr> <td><code>SPARK_MASTER_PORT</code></td> - <td>Start the master on a different port (default: 7077)</td> + <td>Start the master on a different port (default: 7077).</td> </tr> <tr> <td><code>SPARK_MASTER_WEBUI_PORT</code></td> - <td>Port for the master web UI (default: 8080)</td> + <td>Port for the master web UI (default: 8080).</td> </tr> <tr> <td><code>SPARK_WORKER_PORT</code></td> - <td>Start the Spark worker on a specific port (default: random)</td> + <td>Start the Spark worker on a specific port (default: random).</td> </tr> <tr> <td><code>SPARK_WORKER_DIR</code></td> - <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work)</td> + <td>Directory to run jobs in, which will include both logs and scratch space (default: SPARK_HOME/work).</td> </tr> <tr> <td><code>SPARK_WORKER_CORES</code></td> - <td>Total number of cores to allow Spark jobs to use on the machine (default: all available cores)</td> + <td>Total number of cores to allow Spark jobs to use on the machine (default: all available cores).</td> </tr> <tr> <td><code>SPARK_WORKER_MEMORY</code></td> - <td>Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's <i>individual</i> memory is configured using <code>SPARK_MEM</code></td> + <td>Total amount of memory to allow Spark jobs to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GB); note that each job's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.</td> </tr> <tr> <td><code>SPARK_WORKER_WEBUI_PORT</code></td> - <td>Port for the worker web UI (default: 8081)</td> + <td>Port for the worker web UI (default: 8081).</td> + </tr> + <tr> + <td><code>SPARK_WORKER_INSTANCES</code></td> + <td> + Number of worker instances to run on each machine (default: 1). You can make this more than 1 if + you have have very large machines and would like multiple Spark worker processes. If you do set + this, make sure to also set <code>SPARK_WORKER_CORES</code> explicitly to limit the cores per worker, + or else each worker will try to use all the cores. + </td> </tr> <tr> <td><code>SPARK_DAEMON_MEMORY</code></td> - <td>Memory to allocate to the Spark master and worker daemons themselves (default: 512m)</td> + <td>Memory to allocate to the Spark master and worker daemons themselves (default: 512m).</td> </tr> <tr> <td><code>SPARK_DAEMON_JAVA_OPTS</code></td> - <td>JVM options for the Spark master and worker daemons themselves (default: none)</td> + <td>JVM options for the Spark master and worker daemons themselves (default: none).</td> </tr> </table> - +**Note:** The launch scripts do not currently support Windows. To run a Spark cluster on Windows, start the master and workers by hand. # Connecting a Job to the Cluster @@ -155,5 +153,5 @@ In addition, detailed log output for each job is also written to the work direct # Running Alongside Hadoop -You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on). +You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on). diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md index 5476c00d02..4e27d6559c 100644 --- a/docs/streaming-custom-receivers.md +++ b/docs/streaming-custom-receivers.md @@ -1,23 +1,53 @@ --- layout: global -title: Tutorial - Spark Streaming, Plugging in a custom receiver. +title: Spark Streaming Custom Receivers --- A "Spark Streaming" receiver can be a simple network stream, streams of messages from a message queue, files etc. A receiver can also assume roles more than just receiving data like filtering, preprocessing, to name a few of the possibilities. The api to plug-in any user defined custom receiver is thus provided to encourage development of receivers which may be well suited to ones specific need. This guide shows the programming model and features by walking through a simple sample receiver and corresponding Spark Streaming application. +### Writing a Simple Receiver -## A quick and naive walk-through +This starts with implementing [NetworkReceiver](api/streaming/index.html#org.apache.spark.streaming.dstream.NetworkReceiver). -### Write a simple receiver +The following is a simple socket text-stream receiver. + +{% highlight scala %} + class SocketTextStreamReceiver(host: String, port: Int( + extends NetworkReceiver[String] + { + protected lazy val blocksGenerator: BlockGenerator = + new BlockGenerator(StorageLevel.MEMORY_ONLY_SER_2) + + protected def onStart() = { + blocksGenerator.start() + val socket = new Socket(host, port) + val dataInputStream = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8")) + var data: String = dataInputStream.readLine() + while (data != null) { + blocksGenerator += data + data = dataInputStream.readLine() + } + } + + protected def onStop() { + blocksGenerator.stop() + } + } +{% endhighlight %} + + +All we did here is extended NetworkReceiver and called blockGenerator's API method (i.e. +=) to push our blocks of data. Please refer to scala-docs of NetworkReceiver for more details. + + +### An Actor as Receiver This starts with implementing [Actor](#References) Following is a simple socket text-stream receiver, which is appearently overly simplified using Akka's socket.io api. {% highlight scala %} - class SocketTextStreamReceiver (host:String, port:Int, bytesToString: ByteString => String) extends Actor with Receiver { @@ -29,43 +59,41 @@ Following is a simple socket text-stream receiver, which is appearently overly s } } - - {% endhighlight %} All we did here is mixed in trait Receiver and called pushBlock api method to push our blocks of data. Please refer to scala-docs of Receiver for more details. -### A sample spark application +### A Sample Spark Application * First create a Spark streaming context with master url and batchduration. {% highlight scala %} - val ssc = new StreamingContext(master, "WordCountCustomStreamSource", Seconds(batchDuration)) - {% endhighlight %} -* Plug-in the actor configuration into the spark streaming context and create a DStream. +* Plug-in the custom receiver into the spark streaming context and create a DStream. {% highlight scala %} + val lines = ssc.networkStream[String](new SocketTextStreamReceiver( + "localhost", 8445)) +{% endhighlight %} + +* OR Plug-in the actor as receiver into the spark streaming context and create a DStream. +{% highlight scala %} val lines = ssc.actorStream[String](Props(new SocketTextStreamReceiver( "localhost",8445, z => z.utf8String)),"SocketReceiver") - {% endhighlight %} * Process it. {% highlight scala %} - val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _) wordCounts.print() ssc.start() - - {% endhighlight %} * After processing it, stream can be tested using the netcat utility. @@ -75,12 +103,11 @@ All we did here is mixed in trait Receiver and called pushBlock api method to pu hello hello -## Multiple homogeneous/heterogeneous receivers. +## Multiple Homogeneous/Heterogeneous Receivers. A DStream union operation is provided for taking union on multiple input streams. {% highlight scala %} - val lines = ssc.actorStream[String](Props(new SocketTextStreamReceiver( "localhost",8445, z => z.utf8String)),"SocketReceiver") @@ -89,7 +116,6 @@ A DStream union operation is provided for taking union on multiple input streams "localhost",8446, z => z.utf8String)),"SocketReceiver") val union = lines.union(lines2) - {% endhighlight %} Above stream can be easily process as described earlier. @@ -99,3 +125,4 @@ _A more comprehensive example is provided in the spark streaming examples_ ## References 1.[Akka Actor documentation](http://doc.akka.io/docs/akka/2.0.5/scala/actors.html) +2.[NetworkReceiver](api/streaming/index.html#org.apache.spark.streaming.dstream.NetworkReceiver) diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 8cd1b0cd66..c7df172024 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -13,6 +13,14 @@ A Spark Streaming application is very similar to a Spark application; it consist This guide shows some how to start programming with DStreams. +# Linking with Spark Streaming + +Add the following SBT or Maven dependency to your project to use Spark Streaming: + + groupId = org.apache.spark + artifactId = spark-streaming_{{site.SCALA_VERSION}} + version = {{site.SPARK_VERSION}} + # Initializing Spark Streaming The first thing a Spark Streaming program must do is create a `StreamingContext` object, which tells Spark how to access a cluster. A `StreamingContext` can be created by using @@ -34,7 +42,7 @@ ssc.textFileStream(directory) // Creates a stream by monitoring and process ssc.socketStream(hostname, port) // Creates a stream that uses a TCP socket to read data from hostname:port {% endhighlight %} -We also provide a input streams for Kafka, Flume, Akka actor, etc. For a complete list of input streams, take a look at the [StreamingContext API documentation](api/streaming/index.html#spark.streaming.StreamingContext). +We also provide a input streams for Kafka, Flume, Akka actor, etc. For a complete list of input streams, take a look at the [StreamingContext API documentation](api/streaming/index.html#org.apache.spark.streaming.StreamingContext). @@ -156,7 +164,7 @@ Spark Streaming features windowed computations, which allow you to apply transfo </table> -A complete list of DStream operations is available in the API documentation of [DStream](api/streaming/index.html#spark.streaming.DStream) and [PairDStreamFunctions](api/streaming/index.html#spark.streaming.PairDStreamFunctions). +A complete list of DStream operations is available in the API documentation of [DStream](api/streaming/index.html#org.apache.spark.streaming.DStream) and [PairDStreamFunctions](api/streaming/index.html#org.apache.spark.streaming.PairDStreamFunctions). ## Output Operations When an output operator is called, it triggers the computation of a stream. Currently the following output operators are defined: @@ -203,11 +211,11 @@ ssc.stop() {% endhighlight %} # Example -A simple example to start off is the [NetworkWordCount](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/NetworkWordCount.scala` . +A simple example to start off is the [NetworkWordCount](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/NetworkWordCount.scala` . {% highlight scala %} -import spark.streaming.{Seconds, StreamingContext} -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.{Seconds, StreamingContext} +import StreamingContext._ ... // Create the context and set up a network input stream to receive from a host:port @@ -234,7 +242,7 @@ $ nc -lk 9999 Then, in a different terminal, you can start NetworkWordCount by using {% highlight bash %} -$ ./run spark.streaming.examples.NetworkWordCount local[2] localhost 9999 +$ ./run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999 {% endhighlight %} This will make NetworkWordCount connect to the netcat server. Any lines typed in the terminal running the netcat server will be counted and printed on screen. @@ -272,7 +280,7 @@ Time: 1357008430000 ms </td> </table> -You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files. +You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run-example org.apache.spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files. # DStream Persistence Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`. @@ -301,6 +309,9 @@ dstream.checkpoint(checkpointInterval) // checkpointInterval must be a multiple For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by default set to a multiple of the DStream's sliding interval such that its at least 10 seconds. +## Custom Receivers +Spark comes with a built in support for most common usage scenarios where input stream source can be either a network socket stream to support for a few message queues. Apart from that it is also possible to supply your own custom receiver via a convenient API. Find more details at [Custom Receiver Guide](streaming-custom-receivers.html). + # Performance Tuning Getting the best performance of a Spark Streaming application on a cluster requires a bit of tuning. This section explains a number of the parameters and configurations that can tuned to improve the performance of you application. At a high level, you need to consider two things: <ol> @@ -312,7 +323,7 @@ Getting the best performance of a Spark Streaming application on a cluster requi There are a number of optimizations that can be done in Spark to minimize the processing time of each batch. These have been discussed in detail in [Tuning Guide](tuning.html). This section highlights some of the most important ones. ### Level of Parallelism -Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`spark.PairDStreamFunctions`](api/streaming/index.html#spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default. +Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`PairDStreamFunctions`](api/streaming/index.html#org.apache.spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default. ### Data Serialization The overhead of data serialization can be significant, especially when sub-second batch sizes are to be achieved. There are two aspects to it. @@ -342,7 +353,7 @@ This value is closely tied with any window operation that is being used. Any win ## Memory Tuning Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section, we highlight a few customizations that are strongly recommended to minimize GC related pauses in Spark Streaming applications and achieving more consistent batch processing times. -* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses. +* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#org.apache.spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#org.apache.spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses. * **Concurrent garbage collector**: Using the concurrent mark-and-sweep GC further minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the overall processing throughput of the system, its use is still recommended to achieve more consistent batch processing times. @@ -464,10 +475,10 @@ If the driver had crashed in the middle of the processing of time 3, then it wil # Java API -Similar to [Spark's Java API](java-programming-guide.html), we also provide a Java API for Spark Streaming which allows all its features to be accessible from a Java program. This is defined in [spark.streaming.api.java] (api/streaming/index.html#spark.streaming.api.java.package) package and includes [JavaStreamingContext](api/streaming/index.html#spark.streaming.api.java.JavaStreamingContext) and [JavaDStream](api/streaming/index.html#spark.streaming.api.java.JavaDStream) classes that provide the same methods as their Scala counterparts, but take Java functions (that is, Function, and Function2) and return Java data and collection types. Some of the key points to note are: +Similar to [Spark's Java API](java-programming-guide.html), we also provide a Java API for Spark Streaming which allows all its features to be accessible from a Java program. This is defined in [org.apache.spark.streaming.api.java] (api/streaming/index.html#org.apache.spark.streaming.api.java.package) package and includes [JavaStreamingContext](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext) and [JavaDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaDStream) classes that provide the same methods as their Scala counterparts, but take Java functions (that is, Function, and Function2) and return Java data and collection types. Some of the key points to note are: -1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#spark.api.java.function.Function) and [Function2](api/core/index.html#spark.api.java.function.Function2) -1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types. +1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#org.apache.spark.api.java.function.Function) and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2) +1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types. Spark's [Java Programming Guide](java-programming-guide.html) gives more ideas about using the Java API. To extends the ideas presented for the RDDs to DStreams, we present parts of the Java version of the same NetworkWordCount example presented above. The full source code is given at `<spark repo>/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java` @@ -479,7 +490,7 @@ JavaDStream<String> lines = ssc.socketTextStream(ip, port); {% endhighlight %} -Then the `lines` are split into words by using the `flatMap` function and [FlatMapFunction](api/core/index.html#spark.api.java.function.FlatMapFunction). +Then the `lines` are split into words by using the `flatMap` function and [FlatMapFunction](api/core/index.html#org.apache.spark.api.java.function.FlatMapFunction). {% highlight java %} JavaDStream<String> words = lines.flatMap( @@ -491,7 +502,7 @@ JavaDStream<String> words = lines.flatMap( }); {% endhighlight %} -The `words` is then mapped to a [JavaPairDStream](api/streaming/index.html#spark.streaming.api.java.JavaPairDStream) of `(word, 1)` pairs using `map` and [PairFunction](api/core/index.html#spark.api.java.function.PairFunction). This is reduced by using `reduceByKey` and [Function2](api/core/index.html#spark.api.java.function.Function2). +The `words` is then mapped to a [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) of `(word, 1)` pairs using `map` and [PairFunction](api/core/index.html#org.apache.spark.api.java.function.PairFunction). This is reduced by using `reduceByKey` and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2). {% highlight java %} JavaPairDStream<String, Integer> wordCounts = words.map( @@ -510,8 +521,8 @@ JavaPairDStream<String, Integer> wordCounts = words.map( {% endhighlight %} - # Where to Go from Here -* API docs - [Scala](api/streaming/index.html#spark.streaming.package) and [Java](api/streaming/index.html#spark.streaming.api.java.package) -* More examples - [Scala](https://github.com/mesos/spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/mesos/spark/tree/master/examples/src/main/java/spark/streaming/examples) + +* API docs - [Scala](api/streaming/index.html#org.apache.spark.streaming.package) and [Java](api/streaming/index.html#org.apache.spark.streaming.api.java.package) +* More examples - [Scala](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/spark/streaming/examples) * [Paper describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf) diff --git a/docs/tuning.md b/docs/tuning.md index 5ffca54481..28d88a2659 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -32,24 +32,25 @@ in your operations) and performance. It provides two serialization libraries: [`java.io.Externalizable`](http://docs.oracle.com/javase/6/docs/api/java/io/Externalizable.html). Java serialization is flexible but often quite slow, and leads to large serialized formats for many classes. -* [Kryo serialization](http://code.google.com/p/kryo/wiki/V1Documentation): Spark can also use +* [Kryo serialization](http://code.google.com/p/kryo/): Spark can also use the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly faster and more compact than Java serialization (often as much as 10x), but does not support all `Serializable` types and requires you to *register* the classes you'll use in the program in advance for best performance. -You can switch to using Kryo by calling `System.setProperty("spark.serializer", "spark.KryoSerializer")` +You can switch to using Kryo by calling `System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")` *before* creating your SparkContext. The only reason it is not the default is because of the custom registration requirement, but we recommend trying it in any network-intensive application. Finally, to register your classes with Kryo, create a public class that extends -[`spark.KryoRegistrator`](api/core/index.html#spark.KryoRegistrator) and set the +[`org.apache.spark.serializer.KryoRegistrator`](api/core/index.html#org.apache.spark.serializer.KryoRegistrator) and set the `spark.kryo.registrator` system property to point to it, as follows: {% highlight scala %} import com.esotericsoftware.kryo.Kryo +import org.apache.spark.serializer.KryoRegistrator -class MyRegistrator extends spark.KryoRegistrator { +class MyRegistrator extends KryoRegistrator { override def registerClasses(kryo: Kryo) { kryo.register(classOf[MyClass1]) kryo.register(classOf[MyClass2]) @@ -57,7 +58,7 @@ class MyRegistrator extends spark.KryoRegistrator { } // Make sure to set these properties *before* creating a SparkContext! -System.setProperty("spark.serializer", "spark.KryoSerializer") +System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer") System.setProperty("spark.kryo.registrator", "mypackage.MyRegistrator") val sc = new SparkContext(...) {% endhighlight %} @@ -216,7 +217,7 @@ enough. Spark automatically sets the number of "map" tasks to run on each file a (though you can control it through optional parameters to `SparkContext.textFile`, etc), and for distributed "reduce" operations, such as `groupByKey` and `reduceByKey`, it uses the largest parent RDD's number of partitions. You can pass the level of parallelism as a second argument -(see the [`spark.PairRDDFunctions`](api/core/index.html#spark.PairRDDFunctions) documentation), +(see the [`spark.PairRDDFunctions`](api/core/index.html#org.apache.spark.rdd.PairRDDFunctions) documentation), or set the system property `spark.default.parallelism` to change the default. In general, we recommend 2-3 tasks per CPU core in your cluster. diff --git a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh b/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh deleted file mode 100644 index 50ecf83404..0000000000 --- a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# These variables are automatically filled in by the mesos-ec2 script. -export MESOS_MASTERS="{{master_list}}" -export MESOS_SLAVES="{{slave_list}}" -export MESOS_ZOO_LIST="{{zoo_list}}" -export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}" -export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" -export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh index 166a884c88..675429c57e 100644 --- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh +++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh @@ -1,11 +1,13 @@ -#!/bin/bash +#!/usr/bin/env bash -# These variables are automatically filled in by the mesos-ec2 script. -export MESOS_MASTERS="{{master_list}}" -export MESOS_SLAVES="{{slave_list}}" -export MESOS_ZOO_LIST="{{zoo_list}}" -export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}" -export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" -export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" +# These variables are automatically filled in by the spark-ec2 script. +export MASTERS="{{master_list}}" +export SLAVES="{{slave_list}}" +export HDFS_DATA_DIRS="{{hdfs_data_dirs}}" +export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" +export SPARK_LOCAL_DIRS="{{spark_local_dirs}}" export MODULES="{{modules}}" +export SPARK_VERSION="{{spark_version}}" +export SHARK_VERSION="{{shark_version}}" +export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}" export SWAP_MB="{{swap}}" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 2ec3c007fb..932e70db96 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -9,9 +9,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -36,9 +36,8 @@ import boto from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType from boto import ec2 -# A static URL from which to figure out the latest Mesos EC2 AMI -LATEST_AMI_URL = "https://s3.amazonaws.com/mesos-images/ids/latest-spark-0.7" - +# A URL prefix from which to fetch AMI information +AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list" # Configure and parse our command-line arguments def parse_args(): @@ -53,7 +52,7 @@ def parse_args(): help="Seconds to wait for nodes to start (default: 120)") parser.add_option("-k", "--key-pair", help="Key pair to use on instances") - parser.add_option("-i", "--identity-file", + parser.add_option("-i", "--identity-file", help="SSH private key file to use for logging into instances") parser.add_option("-t", "--instance-type", default="m1.large", help="Type of instance to launch (default: m1.large). " + @@ -66,9 +65,14 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") - parser.add_option("-a", "--ami", default="latest", - help="Amazon Machine Image ID to use, or 'latest' to use latest " + - "available AMI (default: latest)") + parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use") + parser.add_option("-v", "--spark-version", default="0.7.3", + help="Version of Spark to use: 'X.Y.Z' or a specific git hash") + parser.add_option("--spark-git-repo", + default="https://github.com/mesos/spark", + help="Github repo from which to checkout supplied commit hash") + parser.add_option("--hadoop-major-version", default="1", + help="Major version of Hadoop (default: 1)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + "the given local address (for use with login)") @@ -84,22 +88,16 @@ def parse_args(): parser.add_option("--spot-price", metavar="PRICE", type="float", help="If specified, launch slaves as spot instances with the given " + "maximum price (in dollars)") - parser.add_option("--cluster-type", type="choice", metavar="TYPE", - choices=["mesos", "standalone"], default="standalone", - help="'mesos' for a Mesos cluster, 'standalone' for a standalone " + - "Spark cluster (default: standalone)") parser.add_option("--ganglia", action="store_true", default=True, help="Setup Ganglia monitoring on cluster (default: on). NOTE: " + "the Ganglia page will be publicly accessible") parser.add_option("--no-ganglia", action="store_false", dest="ganglia", help="Disable Ganglia monitoring for the cluster") - parser.add_option("--old-scripts", action="store_true", default=False, - help="Use old mesos-ec2 scripts, for Spark <= 0.6 AMIs") parser.add_option("-u", "--user", default="root", help="The SSH user you want to connect as (default: root)") parser.add_option("--delete-groups", action="store_true", default=False, help="When destroying a cluster, delete the security groups that were created") - + (opts, args) = parser.parse_args() if len(args) != 2: parser.print_help() @@ -109,9 +107,6 @@ def parse_args(): print >> stderr, ("ERROR: The -i or --identity-file argument is " + "required for " + action) sys.exit(1) - if opts.cluster_type not in ["mesos", "standalone"] and action == "launch": - print >> stderr, ("ERROR: Invalid cluster type: " + opts.cluster_type) - sys.exit(1) # Boto config check # http://boto.cloudhackers.com/en/latest/boto_config_tut.html @@ -158,66 +153,96 @@ def wait_for_instances(conn, instances): def is_active(instance): return (instance.state in ['pending', 'running', 'stopping', 'stopped']) +# Return correct versions of Spark and Shark, given the supplied Spark version +def get_spark_shark_version(opts): + spark_shark_map = {"0.7.3": "0.7.0"} + version = opts.spark_version.replace("v", "") + if version not in spark_shark_map: + print >> stderr, "Don't know about Spark version: %s" % version + sys.exit(1) + return (version, spark_shark_map[version]) + +# Attempt to resolve an appropriate AMI given the architecture and +# region of the request. +def get_spark_ami(opts): + instance_types = { + "m1.small": "pvm", + "m1.medium": "pvm", + "m1.large": "pvm", + "m1.xlarge": "pvm", + "t1.micro": "pvm", + "c1.medium": "pvm", + "c1.xlarge": "pvm", + "m2.xlarge": "pvm", + "m2.2xlarge": "pvm", + "m2.4xlarge": "pvm", + "cc1.4xlarge": "hvm", + "cc2.8xlarge": "hvm", + "cg1.4xlarge": "hvm", + "hs1.8xlarge": "hvm", + "hi1.4xlarge": "hvm", + "m3.xlarge": "hvm", + "m3.2xlarge": "hvm", + "cr1.8xlarge": "hvm" + } + if opts.instance_type in instance_types: + instance_type = instance_types[opts.instance_type] + else: + instance_type = "pvm" + print >> stderr,\ + "Don't recognize %s, assuming type is pvm" % opts.instance_type + + ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type) + try: + ami = urllib2.urlopen(ami_path).read().strip() + print "Spark AMI: " + ami + except: + print >> stderr, "Could not resolve AMI at: " + ami_path + sys.exit(1) + + return ami # Launch a cluster of the given name, by setting up its security groups, # and then starting new instances in them. -# Returns a tuple of EC2 reservation objects for the master, slave -# and zookeeper instances (in that order). +# Returns a tuple of EC2 reservation objects for the master and slaves # Fails if there already instances running in the cluster's groups. def launch_cluster(conn, opts, cluster_name): print "Setting up security groups..." master_group = get_or_make_group(conn, cluster_name + "-master") slave_group = get_or_make_group(conn, cluster_name + "-slaves") - zoo_group = get_or_make_group(conn, cluster_name + "-zoo") if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) - master_group.authorize(src_group=zoo_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') + master_group.authorize('tcp', 33000, 33000, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') - if opts.cluster_type == "mesos": - master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') + master_group.authorize('tcp', 3030, 3035, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) - slave_group.authorize(src_group=zoo_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') - if zoo_group.rules == []: # Group was just now created - zoo_group.authorize(src_group=master_group) - zoo_group.authorize(src_group=slave_group) - zoo_group.authorize(src_group=zoo_group) - zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0') - zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') - zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') - zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') # Check if instances are already running in our groups active_nodes = get_existing_cluster(conn, opts, cluster_name, die_on_error=False) if any(active_nodes): print >> stderr, ("ERROR: There are already instances running in " + - "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) + "group %s or %s" % (master_group.name, slave_group.name)) sys.exit(1) - # Figure out the latest AMI from our static URL - if opts.ami == "latest": - try: - opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip() - print "Latest Spark AMI: " + opts.ami - except: - print >> stderr, "Could not read " + LATEST_AMI_URL - sys.exit(1) - + # Figure out Spark AMI + if opts.ami is None: + opts.ami = get_spark_ami(opts) print "Launching instances..." try: @@ -257,7 +282,7 @@ def launch_cluster(conn, opts, cluster_name): block_device_map = block_map) my_req_ids += [req.id for req in slave_reqs] i += 1 - + print "Waiting for spot instances to be granted..." try: while True: @@ -284,9 +309,9 @@ def launch_cluster(conn, opts, cluster_name): print "Canceling spot instance requests" conn.cancel_spot_instance_requests(my_req_ids) # Log a warning if any of these requests actually launched instances: - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) - running = len(master_nodes) + len(slave_nodes) + len(zoo_nodes) + running = len(master_nodes) + len(slave_nodes) if running: print >> stderr, ("WARNING: %d instances are still running" % running) sys.exit(0) @@ -327,21 +352,17 @@ def launch_cluster(conn, opts, cluster_name): master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) - zoo_nodes = [] - # Return all the instances - return (master_nodes, slave_nodes, zoo_nodes) + return (master_nodes, slave_nodes) # Get the EC2 instances in an existing cluster if available. -# Returns a tuple of lists of EC2 instance objects for the masters, -# slaves and zookeeper nodes (in that order). +# Returns a tuple of lists of EC2 instance objects for the masters and slaves def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print "Searching for existing cluster " + cluster_name + "..." reservations = conn.get_all_instances() master_nodes = [] slave_nodes = [] - zoo_nodes = [] for res in reservations: active = [i for i in res.instances if is_active(i)] if len(active) > 0: @@ -350,13 +371,11 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): master_nodes += res.instances elif group_names == [cluster_name + "-slaves"]: slave_nodes += res.instances - elif group_names == [cluster_name + "-zoo"]: - zoo_nodes += res.instances - if any((master_nodes, slave_nodes, zoo_nodes)): - print ("Found %d master(s), %d slaves, %d ZooKeeper nodes" % - (len(master_nodes), len(slave_nodes), len(zoo_nodes))) + if any((master_nodes, slave_nodes)): + print ("Found %d master(s), %d slaves" % + (len(master_nodes), len(slave_nodes))) if (master_nodes != [] and slave_nodes != []) or not die_on_error: - return (master_nodes, slave_nodes, zoo_nodes) + return (master_nodes, slave_nodes) else: if master_nodes == [] and slave_nodes != []: print "ERROR: Could not find master in group " + cluster_name + "-master" @@ -369,7 +388,7 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): # Deploy configuration files and run setup scripts on a newly launched # or started EC2 cluster. -def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_key): +def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): master = master_nodes[0].public_dns_name if deploy_ssh_key: print "Copying SSH key %s to master..." % opts.identity_file @@ -377,63 +396,46 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k scp(master, opts, opts.identity_file, '~/.ssh/id_rsa') ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa') - if opts.cluster_type == "mesos": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mesos'] - elif opts.cluster_type == "standalone": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'spark-standalone'] + modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', + 'mapreduce', 'spark-standalone'] + + if opts.hadoop_major_version == "1": + modules = filter(lambda x: x != "mapreduce", modules) if opts.ganglia: modules.append('ganglia') - if not opts.old_scripts: - # NOTE: We should clone the repository before running deploy_files to - # prevent ec2-variables.sh from being overwritten - ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git") + # NOTE: We should clone the repository before running deploy_files to + # prevent ec2-variables.sh from being overwritten + ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v2") print "Deploying files to master..." - deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, - zoo_nodes, modules) + deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules) print "Running setup on master..." - if opts.old_scripts: - if opts.cluster_type == "mesos": - setup_mesos_cluster(master, opts) - elif opts.cluster_type == "standalone": - setup_standalone_cluster(master, slave_nodes, opts) - else: - setup_spark_cluster(master, opts) + setup_spark_cluster(master, opts) print "Done!" -def setup_mesos_cluster(master, opts): - ssh(master, opts, "chmod u+x mesos-ec2/setup") - ssh(master, opts, "mesos-ec2/setup %s %s %s %s" % - ("generic", "none", "master", opts.swap)) - def setup_standalone_cluster(master, slave_nodes, opts): slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes]) ssh(master, opts, "echo \"%s\" > spark/conf/slaves" % (slave_ips)) ssh(master, opts, "/root/spark/bin/start-all.sh") - + def setup_spark_cluster(master, opts): ssh(master, opts, "chmod u+x spark-ec2/setup.sh") ssh(master, opts, "spark-ec2/setup.sh") - if opts.cluster_type == "mesos": - print "Mesos cluster started at http://%s:8080" % master - elif opts.cluster_type == "standalone": - print "Spark standalone cluster started at http://%s:8080" % master + print "Spark standalone cluster started at http://%s:8080" % master if opts.ganglia: print "Ganglia started at http://%s:5080/ganglia" % master # Wait for a whole cluster (masters, slaves and ZooKeeper) to start up -def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes, zoo_nodes): +def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes): print "Waiting for instances to start up..." time.sleep(5) wait_for_instances(conn, master_nodes) wait_for_instances(conn, slave_nodes) - if zoo_nodes != []: - wait_for_instances(conn, zoo_nodes) print "Waiting %d more seconds..." % wait_secs time.sleep(wait_secs) @@ -454,7 +456,12 @@ def get_num_disks(instance_type): "m2.4xlarge": 2, "cc1.4xlarge": 2, "cc2.8xlarge": 4, - "cg1.4xlarge": 2 + "cg1.4xlarge": 2, + "hs1.8xlarge": 24, + "cr1.8xlarge": 2, + "hi1.4xlarge": 2, + "m3.xlarge": 0, + "m3.2xlarge": 0 } if instance_type in disks_by_instance: return disks_by_instance[instance_type] @@ -469,8 +476,7 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. -def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, - modules): +def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name num_disks = get_num_disks(opts.instance_type) @@ -483,28 +489,30 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i spark_local_dirs += ",/mnt%d/spark" % i - if zoo_nodes != []: - zoo_list = '\n'.join([i.public_dns_name for i in zoo_nodes]) - cluster_url = "zoo://" + ",".join( - ["%s:2181/mesos" % i.public_dns_name for i in zoo_nodes]) - elif opts.cluster_type == "mesos": - zoo_list = "NONE" - cluster_url = "%s:5050" % active_master - elif opts.cluster_type == "standalone": - zoo_list = "NONE" - cluster_url = "%s:7077" % active_master + cluster_url = "%s:7077" % active_master + + if "." in opts.spark_version: + # Pre-built spark & shark deploy + (spark_v, shark_v) = get_spark_shark_version(opts) + else: + # Spark-only custom deploy + spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) + shark_v = "" + modules = filter(lambda x: x != "shark", modules) template_vars = { "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), "active_master": active_master, "slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]), - "zoo_list": zoo_list, "cluster_url": cluster_url, "hdfs_data_dirs": hdfs_data_dirs, "mapred_local_dirs": mapred_local_dirs, "spark_local_dirs": spark_local_dirs, "swap": str(opts.swap), - "modules": '\n'.join(modules) + "modules": '\n'.join(modules), + "spark_version": spark_v, + "shark_version": shark_v, + "hadoop_major_version": opts.hadoop_major_version } # Create a temp directory in which we will place all the files to be @@ -528,7 +536,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, dest.write(text) dest.close() # rsync the whole directory over to the master machine - command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " + + command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " + "'%s/' '%s@%s:/'") % (opts.identity_file, tmp_dir, opts.user, active_master)) subprocess.check_call(command, shell=True) # Remove the temp directory we created above @@ -554,12 +562,12 @@ def ssh(host, opts, command): except subprocess.CalledProcessError as e: if (tries > 2): raise e - print "Error connecting to host {0}, sleeping 30".format(e) + print "Couldn't connect to host {0}, waiting 30 seconds".format(e) time.sleep(30) tries = tries + 1 - - - + + + # Gets a list of zones to launch instances in @@ -593,20 +601,20 @@ def main(): if action == "launch": if opts.resume: - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name) else: - (master_nodes, slave_nodes, zoo_nodes) = launch_cluster( + (master_nodes, slave_nodes) = launch_cluster( conn, opts, cluster_name) - wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes) - setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, True) + wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes) + setup_cluster(conn, master_nodes, slave_nodes, opts, True) elif action == "destroy": response = raw_input("Are you sure you want to destroy the cluster " + cluster_name + "?\nALL DATA ON ALL NODES WILL BE LOST!!\n" + "Destroy cluster " + cluster_name + " (y/N): ") if response == "y": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) print "Terminating master..." for inst in master_nodes: @@ -614,15 +622,11 @@ def main(): print "Terminating slaves..." for inst in slave_nodes: inst.terminate() - if zoo_nodes != []: - print "Terminating zoo..." - for inst in zoo_nodes: - inst.terminate() # Delete security groups as well if opts.delete_groups: print "Deleting security groups (this will take some time)..." - group_names = [cluster_name + "-master", cluster_name + "-slaves", cluster_name + "-zoo"] + group_names = [cluster_name + "-master", cluster_name + "-slaves"] attempt = 1; while attempt <= 3: @@ -639,7 +643,7 @@ def main(): from_port=rule.from_port, to_port=rule.to_port, src_group=grant) - + # Sleep for AWS eventual-consistency to catch up, and for instances # to terminate time.sleep(30) # Yes, it does have to be this long :-( @@ -650,19 +654,19 @@ def main(): except boto.exception.EC2ResponseError: success = False; print "Failed to delete security group " + group.name - + # Unfortunately, group.revoke() returns True even if a rule was not # deleted, so this needs to be rerun if something fails if success: break; - + attempt += 1 - + if not success: print "Failed to delete all security groups after 3 tries." print "Try re-running in a few minutes." elif action == "login": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name) master = master_nodes[0].public_dns_name print "Logging into master " + master + "..." @@ -673,17 +677,17 @@ def main(): (opts.identity_file, proxy_opt, opts.user, master), shell=True) elif action == "get-master": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name) + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) print master_nodes[0].public_dns_name elif action == "stop": response = raw_input("Are you sure you want to stop the cluster " + cluster_name + "?\nDATA ON EPHEMERAL DISKS WILL BE LOST, " + - "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" + + "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" + "AMAZON EBS IF IT IS EBS-BACKED!!\n" + "Stop cluster " + cluster_name + " (y/N): ") if response == "y": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) print "Stopping master..." for inst in master_nodes: @@ -693,15 +697,9 @@ def main(): for inst in slave_nodes: if inst.state not in ["shutting-down", "terminated"]: inst.stop() - if zoo_nodes != []: - print "Stopping zoo..." - for inst in zoo_nodes: - if inst.state not in ["shutting-down", "terminated"]: - inst.stop() elif action == "start": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( - conn, opts, cluster_name) + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) print "Starting slaves..." for inst in slave_nodes: if inst.state not in ["shutting-down", "terminated"]: @@ -710,13 +708,8 @@ def main(): for inst in master_nodes: if inst.state not in ["shutting-down", "terminated"]: inst.start() - if zoo_nodes != []: - print "Starting zoo..." - for inst in zoo_nodes: - if inst.state not in ["shutting-down", "terminated"]: - inst.start() - wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes) - setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, False) + wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes) + setup_cluster(conn, master_nodes, slave_nodes, opts, False) else: print >> stderr, "Invalid action: %s" % action diff --git a/examples/pom.xml b/examples/pom.xml index 7a8d08fade..224cf6c96c 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -19,22 +19,57 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-examples</artifactId> <packaging>jar</packaging> <name>Spark Project Examples</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <dependencies> <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-streaming</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-bagel</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase</artifactId> + <version>0.94.6</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.eclipse.jetty</groupId> @@ -55,182 +90,100 @@ <artifactId>scalacheck_${scala.version}</artifactId> <scope>test</scope> </dependency> - <dependency> - <groupId>org.apache.cassandra</groupId> - <artifactId>cassandra-all</artifactId> - <version>1.2.5</version> - <exclusions> - <exclusion> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </exclusion> - <exclusion> - <groupId>com.googlecode.concurrentlinkedhashmap</groupId> - <artifactId>concurrentlinkedhashmap-lru</artifactId> - </exclusion> - <exclusion> - <groupId>com.ning</groupId> - <artifactId>compress-lzf</artifactId> - </exclusion> - <exclusion> - <groupId>io.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>jline</groupId> - <artifactId>jline</artifactId> - </exclusion> - <exclusion> - <groupId>log4j</groupId> - <artifactId>log4j</artifactId> - </exclusion> - <exclusion> - <groupId>org.apache.cassandra.deps</groupId> - <artifactId>avro</artifactId> - </exclusion> - </exclusions> - </dependency> + <dependency> + <groupId>org.apache.cassandra</groupId> + <artifactId>cassandra-all</artifactId> + <version>1.2.5</version> + <exclusions> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.googlecode.concurrentlinkedhashmap</groupId> + <artifactId>concurrentlinkedhashmap-lru</artifactId> + </exclusion> + <exclusion> + <groupId>com.ning</groupId> + <artifactId>compress-lzf</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>jline</groupId> + <artifactId>jline</artifactId> + </exclusion> + <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.cassandra.deps</groupId> + <artifactId>avro</artifactId> + </exclusion> + </exclusions> + </dependency> </dependencies> - <build> - <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> - <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> - <plugins> - <plugin> - <groupId>org.scalatest</groupId> - <artifactId>scalatest-maven-plugin</artifactId> - </plugin> - </plugins> - </build> <profiles> <profile> - <id>hadoop1</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-streaming</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase</artifactId> - <version>0.94.6</version> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop1</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-streaming</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase</artifactId> - <version>0.94.6</version> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> <id>hadoop2-yarn</id> <dependencies> <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-streaming</artifactId> + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn</artifactId> <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> <scope>provided</scope> </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase</artifactId> - <version>0.94.6</version> - </dependency> </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2-yarn</classifier> - </configuration> - </plugin> - </plugins> - </build> </profile> </profiles> + + <build> + <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile> + <artifactSet> + <includes> + <include>*:*</include> + </includes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> </project> diff --git a/examples/src/main/java/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java index 9485e0cfa9..be0d38589c 100644 --- a/examples/src/main/java/spark/examples/JavaHdfsLR.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.Function; -import spark.api.java.function.Function2; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.Function2; import java.io.Serializable; import java.util.Arrays; diff --git a/examples/src/main/java/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java index 2d34776177..5a6afe7eae 100644 --- a/examples/src/main/java/spark/examples/JavaKMeans.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java @@ -15,15 +15,15 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; import scala.Tuple2; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.Function; -import spark.api.java.function.PairFunction; -import spark.util.Vector; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.util.Vector; import java.util.List; import java.util.Map; diff --git a/examples/src/main/java/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java index d22684d980..152f029213 100644 --- a/examples/src/main/java/spark/examples/JavaLogQuery.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java @@ -15,16 +15,16 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; import com.google.common.collect.Lists; import scala.Tuple2; import scala.Tuple3; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.Function2; -import spark.api.java.function.PairFunction; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; import java.io.Serializable; import java.util.Collections; diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java new file mode 100644 index 0000000000..c5603a639b --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples; + +import scala.Tuple2; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFlatMapFunction; +import org.apache.spark.api.java.function.PairFunction; + +import java.util.List; +import java.util.ArrayList; + +/** + * Computes the PageRank of URLs from an input file. Input file should + * be in format of: + * URL neighbor URL + * URL neighbor URL + * URL neighbor URL + * ... + * where URL and their neighbors are separated by space(s). + */ +public class JavaPageRank { + private static class Sum extends Function2<Double, Double, Double> { + @Override + public Double call(Double a, Double b) { + return a + b; + } + } + + public static void main(String[] args) throws Exception { + if (args.length < 3) { + System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>"); + System.exit(1); + } + + JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank", + System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR")); + + // Loads in input file. It should be in format of: + // URL neighbor URL + // URL neighbor URL + // URL neighbor URL + // ... + JavaRDD<String> lines = ctx.textFile(args[1], 1); + + // Loads all URLs from input file and initialize their neighbors. + JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() { + @Override + public Tuple2<String, String> call(String s) { + String[] parts = s.split("\\s+"); + return new Tuple2<String, String>(parts[0], parts[1]); + } + }).distinct().groupByKey().cache(); + + // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. + JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() { + @Override + public Double call(List<String> rs) throws Exception { + return 1.0; + } + }); + + // Calculates and updates URL ranks continuously using PageRank algorithm. + for (int current = 0; current < Integer.parseInt(args[2]); current++) { + // Calculates URL contributions to the rank of other URLs. + JavaPairRDD<String, Double> contribs = links.join(ranks).values() + .flatMap(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() { + @Override + public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) { + List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>(); + for (String n : s._1) { + results.add(new Tuple2<String, Double>(n, s._2 / s._1.size())); + } + return results; + } + }); + + // Re-calculates URL ranks based on neighbor contributions. + ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { + @Override + public Double call(Double sum) throws Exception { + return 0.15 + sum * 0.85; + } + }); + } + + // Collects all URL ranks and dump them to console. + List<Tuple2<String, Double>> output = ranks.collect(); + for (Tuple2 tuple : output) { + System.out.println(tuple._1 + " has rank: " + tuple._2 + "."); + } + + System.exit(0); + } +} diff --git a/examples/src/main/java/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java index d5f42fbb38..4a2380caf5 100644 --- a/examples/src/main/java/spark/examples/JavaSparkPi.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.Function; -import spark.api.java.function.Function2; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.Function2; import java.util.ArrayList; import java.util.List; diff --git a/examples/src/main/java/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java index 559d7f9e53..17f21f6b77 100644 --- a/examples/src/main/java/spark/examples/JavaTC.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; import scala.Tuple2; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.PairFunction; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.PairFunction; import java.util.ArrayList; import java.util.HashSet; diff --git a/examples/src/main/java/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java index 1af370c1c3..07d32ad659 100644 --- a/examples/src/main/java/spark/examples/JavaWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java @@ -15,15 +15,15 @@ * limitations under the License. */ -package spark.examples; +package org.apache.spark.examples; import scala.Tuple2; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.FlatMapFunction; -import spark.api.java.function.Function2; -import spark.api.java.function.PairFunction; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; import java.util.Arrays; import java.util.List; diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java new file mode 100644 index 0000000000..628cb892b6 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.examples; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; + +import org.apache.spark.mllib.recommendation.ALS; +import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; +import org.apache.spark.mllib.recommendation.Rating; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.StringTokenizer; + +import scala.Tuple2; + +/** + * Example using MLLib ALS from Java. + */ +public class JavaALS { + + static class ParseRating extends Function<String, Rating> { + public Rating call(String line) { + StringTokenizer tok = new StringTokenizer(line, ","); + int x = Integer.parseInt(tok.nextToken()); + int y = Integer.parseInt(tok.nextToken()); + double rating = Double.parseDouble(tok.nextToken()); + return new Rating(x, y, rating); + } + } + + static class FeaturesToString extends Function<Tuple2<Object, double[]>, String> { + public String call(Tuple2<Object, double[]> element) { + return element._1().toString() + "," + Arrays.toString(element._2()); + } + } + + public static void main(String[] args) { + + if (args.length != 5 && args.length != 6) { + System.err.println( + "Usage: JavaALS <master> <ratings_file> <rank> <iterations> <output_dir> [<blocks>]"); + System.exit(1); + } + + int rank = Integer.parseInt(args[2]); + int iterations = Integer.parseInt(args[3]); + String outputDir = args[4]; + int blocks = -1; + if (args.length == 6) { + blocks = Integer.parseInt(args[5]); + } + + JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS", + System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR")); + JavaRDD<String> lines = sc.textFile(args[1]); + + JavaRDD<Rating> ratings = lines.map(new ParseRating()); + + MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks); + + model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( + outputDir + "/userFeatures"); + model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( + outputDir + "/productFeatures"); + System.out.println("Final user/product features written to " + outputDir); + + System.exit(0); + } +} diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java new file mode 100644 index 0000000000..cd59a139b9 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.examples; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; + +import org.apache.spark.mllib.clustering.KMeans; +import org.apache.spark.mllib.clustering.KMeansModel; + +import java.util.Arrays; +import java.util.StringTokenizer; + +/** + * Example using MLLib KMeans from Java. + */ +public class JavaKMeans { + + static class ParsePoint extends Function<String, double[]> { + public double[] call(String line) { + StringTokenizer tok = new StringTokenizer(line, " "); + int numTokens = tok.countTokens(); + double[] point = new double[numTokens]; + for (int i = 0; i < numTokens; ++i) { + point[i] = Double.parseDouble(tok.nextToken()); + } + return point; + } + } + + public static void main(String[] args) { + + if (args.length < 4) { + System.err.println( + "Usage: JavaKMeans <master> <input_file> <k> <max_iterations> [<runs>]"); + System.exit(1); + } + + String inputFile = args[1]; + int k = Integer.parseInt(args[2]); + int iterations = Integer.parseInt(args[3]); + int runs = 1; + + if (args.length >= 5) { + runs = Integer.parseInt(args[4]); + } + + JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans", + System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR")); + JavaRDD<String> lines = sc.textFile(args[1]); + + JavaRDD<double[]> points = lines.map(new ParsePoint()); + + KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs); + + System.out.println("Cluster centers:"); + for (double[] center : model.clusterCenters()) { + System.out.println(" " + Arrays.toString(center)); + } + double cost = model.computeCost(points.rdd()); + System.out.println("Cost: " + cost); + + System.exit(0); + } +} diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java new file mode 100644 index 0000000000..258061c8e6 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.examples; + + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; + +import org.apache.spark.mllib.classification.LogisticRegressionWithSGD; +import org.apache.spark.mllib.classification.LogisticRegressionModel; +import org.apache.spark.mllib.regression.LabeledPoint; + +import java.util.Arrays; +import java.util.StringTokenizer; + +/** + * Logistic regression based classification using ML Lib. + */ +public class JavaLR { + + static class ParsePoint extends Function<String, LabeledPoint> { + public LabeledPoint call(String line) { + String[] parts = line.split(","); + double y = Double.parseDouble(parts[0]); + StringTokenizer tok = new StringTokenizer(parts[1], " "); + int numTokens = tok.countTokens(); + double[] x = new double[numTokens]; + for (int i = 0; i < numTokens; ++i) { + x[i] = Double.parseDouble(tok.nextToken()); + } + return new LabeledPoint(y, x); + } + } + + public static void printWeights(double[] a) { + System.out.println(Arrays.toString(a)); + } + + public static void main(String[] args) { + if (args.length != 4) { + System.err.println("Usage: JavaLR <master> <input_dir> <step_size> <niters>"); + System.exit(1); + } + + JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR", + System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR")); + JavaRDD<String> lines = sc.textFile(args[1]); + JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache(); + double stepSize = Double.parseDouble(args[2]); + int iterations = Integer.parseInt(args[3]); + + // Another way to configure LogisticRegression + // + // LogisticRegressionWithSGD lr = new LogisticRegressionWithSGD(); + // lr.optimizer().setNumIterations(iterations) + // .setStepSize(stepSize) + // .setMiniBatchFraction(1.0); + // lr.setIntercept(true); + // LogisticRegressionModel model = lr.train(points.rdd()); + + LogisticRegressionModel model = LogisticRegressionWithSGD.train(points.rdd(), + iterations, stepSize); + + System.out.print("Final w: "); + printWeights(model.weights()); + + System.exit(0); + } +} diff --git a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java index 096a9ae219..261813bf2f 100644 --- a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.streaming.examples; +package org.apache.spark.streaming.examples; -import spark.api.java.function.Function; -import spark.streaming.*; -import spark.streaming.api.java.*; -import spark.streaming.dstream.SparkFlumeEvent; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.streaming.*; +import org.apache.spark.streaming.api.java.*; +import org.apache.spark.streaming.dstream.SparkFlumeEvent; /** * Produces a count of events received from Flume. diff --git a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java index c54d3f3d59..def87c199b 100644 --- a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java @@ -15,17 +15,17 @@ * limitations under the License. */ -package spark.streaming.examples; +package org.apache.spark.streaming.examples; import com.google.common.collect.Lists; import scala.Tuple2; -import spark.api.java.function.FlatMapFunction; -import spark.api.java.function.Function2; -import spark.api.java.function.PairFunction; -import spark.streaming.Duration; -import spark.streaming.api.java.JavaDStream; -import spark.streaming.api.java.JavaPairDStream; -import spark.streaming.api.java.JavaStreamingContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.streaming.Duration; +import org.apache.spark.streaming.api.java.JavaDStream; +import org.apache.spark.streaming.api.java.JavaPairDStream; +import org.apache.spark.streaming.api.java.JavaStreamingContext; /** * Counts words in UTF8 encoded, '\n' delimited text received from the network every second. diff --git a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java index 1f4a991542..c8c7389dd1 100644 --- a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java @@ -15,17 +15,17 @@ * limitations under the License. */ -package spark.streaming.examples; +package org.apache.spark.streaming.examples; import com.google.common.collect.Lists; import scala.Tuple2; -import spark.api.java.JavaRDD; -import spark.api.java.function.Function2; -import spark.api.java.function.PairFunction; -import spark.streaming.Duration; -import spark.streaming.api.java.JavaDStream; -import spark.streaming.api.java.JavaPairDStream; -import spark.streaming.api.java.JavaStreamingContext; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.streaming.Duration; +import org.apache.spark.streaming.api.java.JavaDStream; +import org.apache.spark.streaming.api.java.JavaPairDStream; +import org.apache.spark.streaming.api.java.JavaStreamingContext; import java.util.LinkedList; import java.util.List; diff --git a/examples/src/main/scala/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala index 911490cb6c..868ff81f67 100644 --- a/examples/src/main/scala/spark/examples/BroadcastTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext +import org.apache.spark.SparkContext object BroadcastTest { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/examples/CassandraTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala index 104bfd5204..33bf7151a7 100644 --- a/examples/src/main/scala/spark/examples/CassandraTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala @@ -15,15 +15,15 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import org.apache.hadoop.mapreduce.Job import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat import org.apache.cassandra.hadoop.ConfigHelper import org.apache.cassandra.hadoop.ColumnFamilyInputFormat import org.apache.cassandra.thrift._ -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import java.nio.ByteBuffer import java.util.SortedMap import org.apache.cassandra.db.IColumn diff --git a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala index 67ddaec8d2..92eb96bd8e 100644 --- a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext +import org.apache.spark.SparkContext object ExceptionHandlingTest { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala index 5cee413615..42c2e0e8e1 100644 --- a/examples/src/main/scala/spark/examples/GroupByTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import java.util.Random object GroupByTest { diff --git a/examples/src/main/scala/spark/examples/HBaseTest.scala b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala index 4dd6c243ac..efe2e93b0d 100644 --- a/examples/src/main/scala/spark/examples/HBaseTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark._ -import spark.rdd.NewHadoopRDD +import org.apache.spark._ +import org.apache.spark.rdd.NewHadoopRDD import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor} import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.mapreduce.TableInputFormat diff --git a/examples/src/main/scala/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala index 23258336e2..d6a88d3032 100644 --- a/examples/src/main/scala/spark/examples/HdfsTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark._ +import org.apache.spark._ object HdfsTest { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala index 7a449a9d72..4af45b2b4a 100644 --- a/examples/src/main/scala/spark/examples/LocalALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import scala.math.sqrt import cern.jet.math._ diff --git a/examples/src/main/scala/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala index c1f8d32aa8..fb130ea198 100644 --- a/examples/src/main/scala/spark/examples/LocalFileLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random -import spark.util.Vector +import org.apache.spark.util.Vector object LocalFileLR { val D = 10 // Numer of dimensions diff --git a/examples/src/main/scala/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala index 0a0bc6f476..f90ea35cd4 100644 --- a/examples/src/main/scala/spark/examples/LocalKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random -import spark.util.Vector -import spark.SparkContext._ +import org.apache.spark.util.Vector +import org.apache.spark.SparkContext._ import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet diff --git a/examples/src/main/scala/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index ab99bf1fbe..cd4e9f1af0 100644 --- a/examples/src/main/scala/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random -import spark.util.Vector +import org.apache.spark.util.Vector /** * Logistic regression based classification. diff --git a/examples/src/main/scala/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala index ccd69695df..bb7f22ec8d 100644 --- a/examples/src/main/scala/spark/examples/LocalPi.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import scala.math.random -import spark._ +import org.apache.spark._ import SparkContext._ object LocalPi { diff --git a/examples/src/main/scala/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala index e815ececf7..17ff3ce764 100644 --- a/examples/src/main/scala/spark/examples/LogQuery.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ /** * Executes a roll up-style query against Apache logs. */ diff --git a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala index d0b1cf06e5..f79f0142b8 100644 --- a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext +import org.apache.spark.SparkContext object MultiBroadcastTest { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala index d197bbaf7c..37ddfb5db7 100644 --- a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import java.util.Random object SimpleSkewedGroupByTest { diff --git a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala index 4641b82444..9c954b2b5b 100644 --- a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import java.util.Random object SkewedGroupByTest { diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index ba0dfd8f9b..814944ba1c 100644 --- a/examples/src/main/scala/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import scala.math.sqrt import cern.jet.math._ import cern.colt.matrix._ import cern.colt.matrix.linalg._ -import spark._ +import org.apache.spark._ /** * Alternating least squares matrix factorization. diff --git a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala index ef6e09a8e8..646682878f 100644 --- a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala @@ -15,14 +15,13 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random import scala.math.exp -import spark.util.Vector -import spark._ -import spark.deploy.SparkHadoopUtil -import spark.scheduler.InputFormatInfo +import org.apache.spark.util.Vector +import org.apache.spark._ +import org.apache.spark.scheduler.InputFormatInfo /** * Logistic regression based classification. @@ -52,7 +51,7 @@ object SparkHdfsLR { System.exit(1) } val inputPath = args(1) - val conf = SparkHadoopUtil.newConfiguration() + val conf = SparkEnv.get.hadoop.newConfiguration() val sc = new SparkContext(args(0), "SparkHdfsLR", System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(), InputFormatInfo.computePreferredLocations( diff --git a/examples/src/main/scala/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala index 38ed3b149a..f7bf75b4e5 100644 --- a/examples/src/main/scala/spark/examples/SparkKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random -import spark.SparkContext -import spark.util.Vector -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.util.Vector +import org.apache.spark.SparkContext._ import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet diff --git a/examples/src/main/scala/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala index 52a0d69744..9ed9fe4d76 100644 --- a/examples/src/main/scala/spark/examples/SparkLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import java.util.Random import scala.math.exp -import spark.util.Vector -import spark._ +import org.apache.spark.util.Vector +import org.apache.spark._ /** * Logistic regression based classification. diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala new file mode 100644 index 0000000000..2721caf08b --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -0,0 +1,46 @@ +package org.apache.spark.examples + +import org.apache.spark.SparkContext._ +import org.apache.spark.SparkContext + + +/** + * Computes the PageRank of URLs from an input file. Input file should + * be in format of: + * URL neighbor URL + * URL neighbor URL + * URL neighbor URL + * ... + * where URL and their neighbors are separated by space(s). + */ +object SparkPageRank { + def main(args: Array[String]) { + if (args.length < 3) { + System.err.println("Usage: PageRank <master> <file> <number_of_iterations>") + System.exit(1) + } + var iters = args(2).toInt + val ctx = new SparkContext(args(0), "PageRank", + System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) + val lines = ctx.textFile(args(1), 1) + val links = lines.map{ s => + val parts = s.split("\\s+") + (parts(0), parts(1)) + }.distinct().groupByKey().cache() + var ranks = links.mapValues(v => 1.0) + + for (i <- 1 to iters) { + val contribs = links.join(ranks).values.flatMap{ case (urls, rank) => + val size = urls.size + urls.map(url => (url, rank / size)) + } + ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _) + } + + val output = ranks.collect() + output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + ".")) + + System.exit(0) + } +} + diff --git a/examples/src/main/scala/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala index 00560ac9d1..5a2bc9b0d0 100644 --- a/examples/src/main/scala/spark/examples/SparkPi.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples import scala.math.random -import spark._ +import org.apache.spark._ import SparkContext._ /** Computes an approximation to pi */ diff --git a/examples/src/main/scala/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala index bf988a953b..5a7a9d1bd8 100644 --- a/examples/src/main/scala/spark/examples/SparkTC.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.examples +package org.apache.spark.examples -import spark._ +import org.apache.spark._ import SparkContext._ import scala.util.Random import scala.collection.mutable diff --git a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala index de65e27fe0..cfafbaf23e 100644 --- a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala +++ b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala @@ -15,13 +15,14 @@ * limitations under the License. */ -package spark.bagel.examples +package org.apache.spark.examples.bagel -import spark._ -import spark.SparkContext._ +import org.apache.spark._ +import org.apache.spark.SparkContext._ +import org.apache.spark.serializer.KryoRegistrator -import spark.bagel._ -import spark.bagel.Bagel._ +import org.apache.spark.bagel._ +import org.apache.spark.bagel.Bagel._ import scala.collection.mutable.ArrayBuffer diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala index a0c5ac9c18..72b5c7b88e 100644 --- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.bagel.examples +package org.apache.spark.examples.bagel -import spark._ -import spark.SparkContext._ +import org.apache.spark._ +import org.apache.spark.SparkContext._ -import spark.bagel._ -import spark.bagel.Bagel._ +import org.apache.spark.bagel._ +import org.apache.spark.bagel.Bagel._ import scala.xml.{XML,NodeSeq} @@ -37,7 +37,7 @@ object WikipediaPageRank { System.exit(-1) } - System.setProperty("spark.serializer", "spark.KryoSerializer") + System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer") System.setProperty("spark.kryo.registrator", classOf[PRKryoRegistrator].getName) val inputFile = args(0) diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala index 3c54a85f42..ddf6855325 100644 --- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala +++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala @@ -15,21 +15,18 @@ * limitations under the License. */ -package spark.bagel.examples +package org.apache.spark.examples.bagel -import spark._ -import serializer.{DeserializationStream, SerializationStream, SerializerInstance} -import spark.SparkContext._ - -import spark.bagel._ -import spark.bagel.Bagel._ - -import scala.xml.{XML,NodeSeq} +import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream} +import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer +import scala.xml.{XML, NodeSeq} -import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream} -import java.nio.ByteBuffer +import org.apache.spark._ +import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.RDD object WikipediaPageRankStandalone { def main(args: Array[String]) { @@ -131,7 +128,7 @@ object WikipediaPageRankStandalone { } } -class WPRSerializer extends spark.serializer.Serializer { +class WPRSerializer extends org.apache.spark.serializer.Serializer { def newInstance(): SerializerInstance = new WPRSerializerInstance() } diff --git a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala index f97174aeae..cd3423a07b 100644 --- a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples import scala.collection.mutable.LinkedList import scala.util.Random @@ -25,11 +25,11 @@ import akka.actor.ActorRef import akka.actor.Props import akka.actor.actorRef2Scala -import spark.streaming.Seconds -import spark.streaming.StreamingContext -import spark.streaming.StreamingContext.toPairDStreamFunctions -import spark.streaming.receivers.Receiver -import spark.util.AkkaUtils +import org.apache.spark.streaming.Seconds +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions +import org.apache.spark.streaming.receivers.Receiver +import org.apache.spark.util.AkkaUtils case class SubscribeReceiver(receiverActor: ActorRef) case class UnsubscribeReceiver(receiverActor: ActorRef) @@ -80,7 +80,7 @@ class FeederActor extends Actor { * goes and subscribe to a typical publisher/feeder actor and receives * data. * - * @see [[spark.streaming.examples.FeederActor]] + * @see [[org.apache.spark.streaming.examples.FeederActor]] */ class SampleActorReceiver[T: ClassManifest](urlOfPublisher: String) extends Actor with Receiver { @@ -132,9 +132,9 @@ object FeederActor { * <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on. * * To run this example locally, you may run Feeder Actor as - * `$ ./run spark.streaming.examples.FeederActor 127.0.1.1 9999` + * `$ ./run-example spark.streaming.examples.FeederActor 127.0.1.1 9999` * and then run the example - * `$ ./run spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999` + * `$ ./run-example spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999` */ object ActorWordCount { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala index 3ab4fc2c37..9f6e163454 100644 --- a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.util.IntParam -import spark.storage.StorageLevel -import spark.streaming._ +import org.apache.spark.util.IntParam +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming._ /** * Produces a count of events received from Flume. diff --git a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala index f5baec242d..bc8564b3ba 100644 --- a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming.{Seconds, StreamingContext} -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.streaming.StreamingContext._ /** @@ -28,7 +28,7 @@ import spark.streaming.StreamingContext._ * <directory> is the directory that Spark Streaming will use to find and read new text files. * * To run this on your local machine on directory `localdir`, run this example - * `$ ./run spark.streaming.examples.HdfsWordCount local[2] localdir` + * `$ ./run-example spark.streaming.examples.HdfsWordCount local[2] localdir` * Then create a text file in `localdir` and the words in the file will get counted. */ object HdfsWordCount { diff --git a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala index 4929703ba2..12f939d5a7 100644 --- a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala @@ -15,17 +15,17 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples import java.util.Properties import kafka.message.Message import kafka.producer.SyncProducerConfig import kafka.producer._ -import spark.SparkContext -import spark.streaming._ -import spark.streaming.StreamingContext._ -import spark.storage.StorageLevel -import spark.streaming.util.RawTextHelper._ +import org.apache.spark.SparkContext +import org.apache.spark.streaming._ +import org.apache.spark.streaming.StreamingContext._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.util.RawTextHelper._ /** * Consumes messages from one or more topics in Kafka and does wordcount. @@ -37,7 +37,7 @@ import spark.streaming.util.RawTextHelper._ * <numThreads> is the number of threads the kafka consumer should use * * Example: - * `./run spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1` + * `./run-example spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1` */ object KafkaWordCount { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala index 150fb5eb9c..e2487dca5f 100644 --- a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming.{Seconds, StreamingContext} -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.streaming.StreamingContext._ /** * Counts words in UTF8 encoded, '\n' delimited text received from the network every second. @@ -29,7 +29,7 @@ import spark.streaming.StreamingContext._ * To run this on your local machine, you need to first run a Netcat server * `$ nc -lk 9999` * and then run the example - * `$ ./run spark.streaming.examples.NetworkWordCount local[2] localhost 9999` + * `$ ./run-example spark.streaming.examples.NetworkWordCount local[2] localhost 9999` */ object NetworkWordCount { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala index da36c8c23c..fad512eeba 100644 --- a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.RDD -import spark.streaming.{Seconds, StreamingContext} -import spark.streaming.StreamingContext._ +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.streaming.StreamingContext._ import scala.collection.mutable.SynchronizedQueue diff --git a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala index 7fb680bcc3..0b45c30d20 100644 --- a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala @@ -15,20 +15,20 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.util.IntParam -import spark.storage.StorageLevel +import org.apache.spark.util.IntParam +import org.apache.spark.storage.StorageLevel -import spark.streaming._ -import spark.streaming.util.RawTextHelper +import org.apache.spark.streaming._ +import org.apache.spark.streaming.util.RawTextHelper /** * Receives text from multiple rawNetworkStreams and counts how many '\n' delimited * lines have the word 'the' in them. This is useful for benchmarking purposes. This * will only work with spark.streaming.util.RawTextSender running on all worker nodes * and with Spark using Kryo serialization (set Java property "spark.serializer" to - * "spark.KryoSerializer"). + * "org.apache.spark.serializer.KryoSerializer"). * Usage: RawNetworkGrep <master> <numStreams> <host> <port> <batchMillis> * <master> is the Spark master URL * <numStream> is the number rawNetworkStreams, which should be same as number diff --git a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala index 33ab324732..cb30c4edb3 100644 --- a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming._ -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming._ +import org.apache.spark.streaming.StreamingContext._ /** * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every second. @@ -29,7 +29,7 @@ import spark.streaming.StreamingContext._ * To run this on your local machine, you need to first run a Netcat server * `$ nc -lk 9999` * and then run the example - * `$ ./run spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999` + * `$ ./run-example spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999` */ object StatefulNetworkWordCount { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala index 8770abd57e..35b6329ab3 100644 --- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming.{Seconds, StreamingContext} -import spark.storage.StorageLevel +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.storage.StorageLevel import com.twitter.algebird._ -import spark.streaming.StreamingContext._ -import spark.SparkContext._ +import org.apache.spark.streaming.StreamingContext._ +import org.apache.spark.SparkContext._ /** * Illustrates the use of the Count-Min Sketch, from Twitter's Algebird library, to compute diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala index cba5c986be..8bfde2a829 100644 --- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming.{Seconds, StreamingContext} -import spark.storage.StorageLevel +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.storage.StorageLevel import com.twitter.algebird.HyperLogLog._ import com.twitter.algebird.HyperLogLogMonoid -import spark.streaming.dstream.TwitterInputDStream +import org.apache.spark.streaming.dstream.TwitterInputDStream /** * Illustrates the use of the HyperLogLog algorithm, from Twitter's Algebird library, to compute diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala index 682b99f75e..27aa6b14bf 100644 --- a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples -import spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.streaming.{Seconds, StreamingContext} import StreamingContext._ -import spark.SparkContext._ +import org.apache.spark.SparkContext._ /** * Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter diff --git a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala index e264fae609..c8743b9e25 100644 --- a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming.examples +package org.apache.spark.streaming.examples import akka.actor.ActorSystem import akka.actor.actorRef2Scala import akka.zeromq._ -import spark.streaming.{ Seconds, StreamingContext } -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.{ Seconds, StreamingContext } +import org.apache.spark.streaming.StreamingContext._ import akka.zeromq.Subscribe /** @@ -60,9 +60,9 @@ object SimpleZeroMQPublisher { * <zeroMQurl> and <topic> describe where zeroMq publisher is running. * * To run this example locally, you may run publisher as - * `$ ./run spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar` + * `$ ./run-example spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar` * and run the example as - * `$ ./run spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo` + * `$ ./run-example spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo` */ object ZeroMQWordCount { def main(args: Array[String]) { diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala index 375d5c9d22..884d6d6f34 100644 --- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming.examples.clickstream +package org.apache.spark.streaming.examples.clickstream import java.net.{InetAddress,ServerSocket,Socket,SocketException} import java.io.{InputStreamReader, BufferedReader, PrintWriter} @@ -37,8 +37,8 @@ object PageView { /** Generates streaming events to simulate page views on a website. * * This should be used in tandem with PageViewStream.scala. Example: - * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10 - * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444 + * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10 + * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444 * */ object PageViewGenerator { val pages = Map("http://foo.com/" -> .7, diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala index a24342bebf..8282cc9269 100644 --- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala @@ -15,19 +15,19 @@ * limitations under the License. */ -package spark.streaming.examples.clickstream +package org.apache.spark.streaming.examples.clickstream -import spark.streaming.{Seconds, StreamingContext} -import spark.streaming.StreamingContext._ -import spark.SparkContext._ +import org.apache.spark.streaming.{Seconds, StreamingContext} +import org.apache.spark.streaming.StreamingContext._ +import org.apache.spark.SparkContext._ /** Analyses a streaming dataset of web page views. This class demonstrates several types of * operators available in Spark streaming. * * This should be used in tandem with PageViewStream.scala. Example: - * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10 - * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444 - * */ + * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10 + * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444 + */ object PageViewStream { def main(args: Array[String]) { if (args.length != 3) { diff --git a/make-distribution.sh b/make-distribution.sh index 0116215163..91f6278491 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,6 +24,11 @@ # so it is completely self contained. # It does not contain source or *.class files. # +# Optional Arguments +# --tgz: Additionally creates spark-$VERSION-bin.tar.gz +# --hadoop VERSION: Builds against specified version of Hadoop. +# --with-yarn: Enables support for Hadoop YARN. +# # Recommended deploy/testing procedure (standalone mode): # 1) Rsync / deploy the dist/ dir to one host # 2) cd to deploy dir; ./bin/start-master.sh @@ -38,21 +43,70 @@ DISTDIR="$FWDIR/dist" # Get version from SBT export TERM=dumb # Prevents color codes in SBT output -VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2) -echo "Making distribution for Spark $VERSION in $DISTDIR..." +VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') + +# Initialize defaults +SPARK_HADOOP_VERSION=1.0.4 +SPARK_YARN=false +MAKE_TGZ=false + +# Parse arguments +while (( "$#" )); do + case $1 in + --hadoop) + SPARK_HADOOP_VERSION="$2" + shift + ;; + --with-yarn) + SPARK_YARN=true + ;; + --tgz) + MAKE_TGZ=true + ;; + esac + shift +done + +if [ "$MAKE_TGZ" == "true" ]; then + echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" +else + echo "Making distribution for Spark $VERSION in $DISTDIR..." +fi + +echo "Hadoop version set to $SPARK_HADOOP_VERSION" +if [ "$SPARK_YARN" == "true" ]; then + echo "YARN enabled" +else + echo "YARN disabled" +fi # Build fat JAR -$FWDIR/sbt/sbt "repl/assembly" +export SPARK_HADOOP_VERSION +export SPARK_YARN +"$FWDIR/sbt/sbt" "assembly/assembly" # Make directories rm -rf "$DISTDIR" mkdir -p "$DISTDIR/jars" -echo "$VERSION" >$DISTDIR/RELEASE +echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE" # Copy jars -cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/" +cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/" # Copy other things +mkdir "$DISTDIR"/conf +cp -r "$FWDIR/conf/*.template" "$DISTDIR" cp -r "$FWDIR/bin" "$DISTDIR" -cp -r "$FWDIR/conf" "$DISTDIR" -cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR" +cp -r "$FWDIR/python" "$DISTDIR" +cp "$FWDIR/spark-class" "$DISTDIR" +cp "$FWDIR/spark-shell" "$DISTDIR" +cp "$FWDIR/spark-executor" "$DISTDIR" +cp "$FWDIR/pyspark" "$DISTDIR" + + +if [ "$MAKE_TGZ" == "true" ]; then + TARDIR="$FWDIR/spark-$VERSION" + cp -r "$DISTDIR" "$TARDIR" + tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION" + rm -rf "$TARDIR" +fi diff --git a/mllib/pom.xml b/mllib/pom.xml new file mode 100644 index 0000000000..966caf6835 --- /dev/null +++ b/mllib/pom.xml @@ -0,0 +1,75 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent</artifactId> + <version>0.8.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib</artifactId> + <packaging>jar</packaging> + <name>Spark Project ML Library</name> + <url>http://spark.incubator.apache.org/</url> + + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-server</artifactId> + </dependency> + <dependency> + <groupId>org.jblas</groupId> + <artifactId>jblas</artifactId> + <version>1.2.3</version> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_${scala.version}</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalacheck</groupId> + <artifactId>scalacheck_${scala.version}</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.novocode</groupId> + <artifactId>junit-interface</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + <build> + <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> + <plugins> + <plugin> + <groupId>org.scalatest</groupId> + <artifactId>scalatest-maven-plugin</artifactId> + </plugin> + </plugins> + </build> +</project> diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala new file mode 100644 index 0000000000..60cb44ce89 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala @@ -0,0 +1,21 @@ +package org.apache.spark.mllib.classification + +import org.apache.spark.rdd.RDD + +trait ClassificationModel extends Serializable { + /** + * Predict values for the given data set using the model trained. + * + * @param testData RDD representing data points to be predicted + * @return RDD[Int] where each entry contains the corresponding prediction + */ + def predict(testData: RDD[Array[Double]]): RDD[Double] + + /** + * Predict values for a single data point using the model trained. + * + * @param testData array representing a single data point + * @return Int prediction from the trained model + */ + def predict(testData: Array[Double]): Double +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala new file mode 100644 index 0000000000..50aede9c07 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification + +import scala.math.round + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.regression._ +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.mllib.util.DataValidators + +import org.jblas.DoubleMatrix + +/** + * Classification model trained using Logistic Regression. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +class LogisticRegressionModel( + override val weights: Array[Double], + override val intercept: Double) + extends GeneralizedLinearModel(weights, intercept) + with ClassificationModel with Serializable { + + override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double) = { + val margin = dataMatrix.mmul(weightMatrix).get(0) + intercept + round(1.0/ (1.0 + math.exp(margin * -1))) + } +} + +/** + * Train a classification model for Logistic Regression using Stochastic Gradient Descent. + * NOTE: Labels used in Logistic Regression should be {0, 1} + */ +class LogisticRegressionWithSGD private ( + var stepSize: Double, + var numIterations: Int, + var regParam: Double, + var miniBatchFraction: Double) + extends GeneralizedLinearAlgorithm[LogisticRegressionModel] + with Serializable { + + val gradient = new LogisticGradient() + val updater = new SimpleUpdater() + override val optimizer = new GradientDescent(gradient, updater) + .setStepSize(stepSize) + .setNumIterations(numIterations) + .setRegParam(regParam) + .setMiniBatchFraction(miniBatchFraction) + override val validators = List(DataValidators.classificationLabels) + + /** + * Construct a LogisticRegression object with default parameters + */ + def this() = this(1.0, 100, 0.0, 1.0) + + def createModel(weights: Array[Double], intercept: Double) = { + new LogisticRegressionModel(weights, intercept) + } +} + +/** + * Top-level methods for calling Logistic Regression. + * NOTE: Labels used in Logistic Regression should be {0, 1} + */ +object LogisticRegressionWithSGD { + // NOTE(shivaram): We use multiple train methods instead of default arguments to support + // Java programs. + + /** + * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed + * number of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in + * gradient descent are initialized using the initial weights provided. + * NOTE: Labels used in Logistic Regression should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param miniBatchFraction Fraction of data to be used per iteration. + * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * the number of features in the data. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) + : LogisticRegressionModel = + { + new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run( + input, initialWeights) + } + + /** + * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed + * number of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. + * NOTE: Labels used in Logistic Regression should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + + * @param miniBatchFraction Fraction of data to be used per iteration. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + miniBatchFraction: Double) + : LogisticRegressionModel = + { + new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run( + input) + } + + /** + * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed + * number of iterations of gradient descent using the specified step size. We use the entire data + * set to update the gradient in each iteration. + * NOTE: Labels used in Logistic Regression should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param stepSize Step size to be used for each iteration of Gradient Descent. + + * @param numIterations Number of iterations of gradient descent to run. + * @return a LogisticRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double) + : LogisticRegressionModel = + { + train(input, numIterations, stepSize, 1.0) + } + + /** + * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed + * number of iterations of gradient descent using a step size of 1.0. We use the entire data set + * to update the gradient in each iteration. + * NOTE: Labels used in Logistic Regression should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @return a LogisticRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int) + : LogisticRegressionModel = + { + train(input, numIterations, 1.0, 1.0) + } + + def main(args: Array[String]) { + if (args.length != 4) { + println("Usage: LogisticRegression <master> <input_dir> <step_size> " + + "<niters>") + System.exit(1) + } + val sc = new SparkContext(args(0), "LogisticRegression") + val data = MLUtils.loadLabeledData(sc, args(1)) + val model = LogisticRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble) + + sc.stop() + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala new file mode 100644 index 0000000000..3511e24bce --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification + +import scala.math.signum + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.regression._ +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.mllib.util.DataValidators + +import org.jblas.DoubleMatrix + +/** + * Model built using SVM. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +class SVMModel( + override val weights: Array[Double], + override val intercept: Double) + extends GeneralizedLinearModel(weights, intercept) + with ClassificationModel with Serializable { + + override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double) = { + val margin = dataMatrix.dot(weightMatrix) + intercept + if (margin < 0) 0.0 else 1.0 + } +} + +/** + * Train an SVM using Stochastic Gradient Descent. + * NOTE: Labels used in SVM should be {0, 1} + */ +class SVMWithSGD private ( + var stepSize: Double, + var numIterations: Int, + var regParam: Double, + var miniBatchFraction: Double) + extends GeneralizedLinearAlgorithm[SVMModel] with Serializable { + + val gradient = new HingeGradient() + val updater = new SquaredL2Updater() + override val optimizer = new GradientDescent(gradient, updater) + .setStepSize(stepSize) + .setNumIterations(numIterations) + .setRegParam(regParam) + .setMiniBatchFraction(miniBatchFraction) + + override val validators = List(DataValidators.classificationLabels) + + /** + * Construct a SVM object with default parameters + */ + def this() = this(1.0, 100, 1.0, 1.0) + + def createModel(weights: Array[Double], intercept: Double) = { + new SVMModel(weights, intercept) + } +} + +/** + * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1} + */ +object SVMWithSGD { + + /** + * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in + * gradient descent are initialized using the initial weights provided. + * NOTE: Labels used in SVM should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * the number of features in the data. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) + : SVMModel = + { + new SVMWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input, + initialWeights) + } + + /** + * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. + * NOTE: Labels used in SVM should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double) + : SVMModel = + { + new SVMWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input) + } + + /** + * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. We use the entire data set to + * update the gradient in each iteration. + * NOTE: Labels used in SVM should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param stepSize Step size to be used for each iteration of Gradient Descent. + * @param regParam Regularization parameter. + * @param numIterations Number of iterations of gradient descent to run. + * @return a SVMModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double) + : SVMModel = + { + train(input, numIterations, stepSize, regParam, 1.0) + } + + /** + * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using a step size of 1.0. We use the entire data set to + * update the gradient in each iteration. + * NOTE: Labels used in SVM should be {0, 1} + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @return a SVMModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int) + : SVMModel = + { + train(input, numIterations, 1.0, 1.0, 1.0) + } + + def main(args: Array[String]) { + if (args.length != 5) { + println("Usage: SVM <master> <input_dir> <step_size> <regularization_parameter> <niters>") + System.exit(1) + } + val sc = new SparkContext(args(0), "SVM") + val data = MLUtils.loadLabeledData(sc, args(1)) + val model = SVMWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble) + + sc.stop() + } +} diff --git a/mllib/src/main/scala/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index d875d6de50..edbf77dbcc 100644 --- a/mllib/src/main/scala/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -15,15 +15,16 @@ * limitations under the License. */ -package spark.mllib.clustering +package org.apache.spark.mllib.clustering import scala.collection.mutable.ArrayBuffer import scala.util.Random -import spark.{SparkContext, RDD} -import spark.SparkContext._ -import spark.Logging -import spark.mllib.util.MLUtils +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.RDD +import org.apache.spark.Logging +import org.apache.spark.mllib.util.MLUtils import org.jblas.DoubleMatrix @@ -112,7 +113,7 @@ class KMeans private ( * Train a K-means model on the given set of points; `data` should be cached for high * performance, because this is an iterative algorithm. */ - def train(data: RDD[Array[Double]]): KMeansModel = { + def run(data: RDD[Array[Double]]): KMeansModel = { // TODO: check whether data is persistent; this needs RDD.storageLevel to be publicly readable val sc = data.sparkContext @@ -194,8 +195,8 @@ class KMeans private ( */ private def initRandom(data: RDD[Array[Double]]): Array[ClusterCenters] = { // Sample all the cluster centers in one pass to avoid repeated scans - val sample = data.takeSample(true, runs * k, new Random().nextInt()) - Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k)) + val sample = data.takeSample(true, runs * k, new Random().nextInt()).toSeq + Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).toArray) } /** @@ -210,7 +211,7 @@ class KMeans private ( private def initKMeansParallel(data: RDD[Array[Double]]): Array[ClusterCenters] = { // Initialize each run's center to a random point val seed = new Random().nextInt() - val sample = data.takeSample(true, runs, seed) + val sample = data.takeSample(true, runs, seed).toSeq val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r))) // On each step, sample 2 * k points on average for each run with probability proportional @@ -271,7 +272,7 @@ object KMeans { .setMaxIterations(maxIterations) .setRuns(runs) .setInitializationMode(initializationMode) - .train(data) + .run(data) } def train(data: RDD[Array[Double]], k: Int, maxIterations: Int, runs: Int): KMeansModel = { @@ -315,14 +316,15 @@ object KMeans { } def main(args: Array[String]) { - if (args.length != 4) { - println("Usage: KMeans <master> <input_file> <k> <max_iterations>") + if (args.length < 4) { + println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]") System.exit(1) } val (master, inputFile, k, iters) = (args(0), args(1), args(2).toInt, args(3).toInt) + val runs = if (args.length >= 5) args(4).toInt else 1 val sc = new SparkContext(master, "KMeans") - val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble)) - val model = KMeans.train(data, k, iters) + val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble)).cache() + val model = KMeans.train(data, k, iters, runs) val cost = model.computeCost(data) println("Cluster centers:") for (c <- model.clusterCenters) { diff --git a/mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index b8f80e80cd..cfc81c985a 100644 --- a/mllib/src/main/scala/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.mllib.clustering +package org.apache.spark.mllib.clustering -import spark.RDD -import spark.SparkContext._ -import spark.mllib.util.MLUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ +import org.apache.spark.mllib.util.MLUtils /** diff --git a/mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala index 89fe7d7e85..baf8251d8f 100644 --- a/mllib/src/main/scala/spark/mllib/clustering/LocalKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.mllib.clustering +package org.apache.spark.mllib.clustering import scala.util.Random diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala new file mode 100644 index 0000000000..749e7364f4 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.optimization + +import org.jblas.DoubleMatrix + +/** + * Class used to compute the gradient for a loss function, given a single data point. + */ +abstract class Gradient extends Serializable { + /** + * Compute the gradient and loss given features of a single data point. + * + * @param data - Feature values for one data point. Column matrix of size nx1 + * where n is the number of features. + * @param label - Label for this data item. + * @param weights - Column matrix containing weights for every feature. + * + * @return A tuple of 2 elements. The first element is a column matrix containing the computed + * gradient and the second element is the loss computed at this data point. + * + */ + def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): + (DoubleMatrix, Double) +} + +/** + * Compute gradient and loss for a logistic loss function. + */ +class LogisticGradient extends Gradient { + override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): + (DoubleMatrix, Double) = { + val margin: Double = -1.0 * data.dot(weights) + val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label + + val gradient = data.mul(gradientMultiplier) + val loss = + if (margin > 0) { + math.log(1 + math.exp(0 - margin)) + } else { + math.log(1 + math.exp(margin)) - margin + } + + (gradient, loss) + } +} + +/** + * Compute gradient and loss for a Least-squared loss function. + */ +class SquaredGradient extends Gradient { + override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): + (DoubleMatrix, Double) = { + val diff: Double = data.dot(weights) - label + + val loss = 0.5 * diff * diff + val gradient = data.mul(diff) + + (gradient, loss) + } +} + +/** + * Compute gradient and loss for a Hinge loss function. + * NOTE: This assumes that the labels are {0,1} + */ +class HingeGradient extends Gradient { + override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): + (DoubleMatrix, Double) = { + + val dotProduct = data.dot(weights) + + // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x))) + // Therefore the gradient is -(2y - 1)*x + val labelScaled = 2 * label - 1.0 + + if (1.0 > labelScaled * dotProduct) { + (data.mul(-labelScaled), 1.0 - labelScaled * dotProduct) + } else { + (DoubleMatrix.zeros(1, weights.length), 0.0) + } + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala new file mode 100644 index 0000000000..b77364e08d --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.optimization + +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ + +import org.jblas.DoubleMatrix + +import scala.collection.mutable.ArrayBuffer + +/** + * Class used to solve an optimization problem using Gradient Descent. + * @param gradient Gradient function to be used. + * @param updater Updater to be used to update weights after every iteration. + */ +class GradientDescent(var gradient: Gradient, var updater: Updater) + extends Optimizer with Logging +{ + private var stepSize: Double = 1.0 + private var numIterations: Int = 100 + private var regParam: Double = 0.0 + private var miniBatchFraction: Double = 1.0 + + /** + * Set the step size per-iteration of SGD. Default 1.0. + */ + def setStepSize(step: Double): this.type = { + this.stepSize = step + this + } + + /** + * Set fraction of data to be used for each SGD iteration. Default 1.0. + */ + def setMiniBatchFraction(fraction: Double): this.type = { + this.miniBatchFraction = fraction + this + } + + /** + * Set the number of iterations for SGD. Default 100. + */ + def setNumIterations(iters: Int): this.type = { + this.numIterations = iters + this + } + + /** + * Set the regularization parameter used for SGD. Default 0.0. + */ + def setRegParam(regParam: Double): this.type = { + this.regParam = regParam + this + } + + /** + * Set the gradient function to be used for SGD. + */ + def setGradient(gradient: Gradient): this.type = { + this.gradient = gradient + this + } + + + /** + * Set the updater function to be used for SGD. + */ + def setUpdater(updater: Updater): this.type = { + this.updater = updater + this + } + + def optimize(data: RDD[(Double, Array[Double])], initialWeights: Array[Double]) + : Array[Double] = { + + val (weights, stochasticLossHistory) = GradientDescent.runMiniBatchSGD( + data, + gradient, + updater, + stepSize, + numIterations, + regParam, + miniBatchFraction, + initialWeights) + weights + } + +} + +// Top-level method to run gradient descent. +object GradientDescent extends Logging { + /** + * Run gradient descent in parallel using mini batches. + * + * @param data - Input data for SGD. RDD of form (label, [feature values]). + * @param gradient - Gradient object that will be used to compute the gradient. + * @param updater - Updater object that will be used to update the model. + * @param stepSize - stepSize to be used during update. + * @param numIterations - number of iterations that SGD should be run. + * @param regParam - regularization parameter + * @param miniBatchFraction - fraction of the input data set that should be used for + * one iteration of SGD. Default value 1.0. + * + * @return A tuple containing two elements. The first element is a column matrix containing + * weights for every feature, and the second element is an array containing the stochastic + * loss computed for every iteration. + */ + def runMiniBatchSGD( + data: RDD[(Double, Array[Double])], + gradient: Gradient, + updater: Updater, + stepSize: Double, + numIterations: Int, + regParam: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) : (Array[Double], Array[Double]) = { + + val stochasticLossHistory = new ArrayBuffer[Double](numIterations) + + val nexamples: Long = data.count() + val miniBatchSize = nexamples * miniBatchFraction + + // Initialize weights as a column vector + var weights = new DoubleMatrix(initialWeights.length, 1, initialWeights:_*) + var regVal = 0.0 + + for (i <- 1 to numIterations) { + val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map { + case (y, features) => + val featuresCol = new DoubleMatrix(features.length, 1, features:_*) + val (grad, loss) = gradient.compute(featuresCol, y, weights) + (grad, loss) + }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2)) + + /** + * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration + * and regVal is the regularization value computed in the previous iteration as well. + */ + stochasticLossHistory.append(lossSum / miniBatchSize + regVal) + val update = updater.compute( + weights, gradientSum.div(miniBatchSize), stepSize, i, regParam) + weights = update._1 + regVal = update._2 + } + + logInfo("GradientDescent finished. Last 10 stochastic losses %s".format( + stochasticLossHistory.takeRight(10).mkString(", "))) + + (weights.toArray, stochasticLossHistory.toArray) + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala new file mode 100644 index 0000000000..94d30b56f2 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.optimization + +import org.apache.spark.rdd.RDD + +trait Optimizer { + + /** + * Solve the provided convex optimization problem. + */ + def optimize(data: RDD[(Double, Array[Double])], initialWeights: Array[Double]): Array[Double] + +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala new file mode 100644 index 0000000000..4c51f4f881 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.optimization + +import scala.math._ +import org.jblas.DoubleMatrix + +/** + * Class used to update weights used in Gradient Descent. + */ +abstract class Updater extends Serializable { + /** + * Compute an updated value for weights given the gradient, stepSize, iteration number and + * regularization parameter. Also returns the regularization value computed using the + * *updated* weights. + * + * @param weightsOld - Column matrix of size nx1 where n is the number of features. + * @param gradient - Column matrix of size nx1 where n is the number of features. + * @param stepSize - step size across iterations + * @param iter - Iteration number + * @param regParam - Regularization parameter + * + * @return A tuple of 2 elements. The first element is a column matrix containing updated weights, + * and the second element is the regularization value computed using updated weights. + */ + def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int, + regParam: Double): (DoubleMatrix, Double) +} + +/** + * A simple updater that adaptively adjusts the learning rate the + * square root of the number of iterations. Does not perform any regularization. + */ +class SimpleUpdater extends Updater { + override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, + stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = { + val thisIterStepSize = stepSize / math.sqrt(iter) + val normGradient = gradient.mul(thisIterStepSize) + (weightsOld.sub(normGradient), 0) + } +} + +/** + * Updater that adjusts learning rate and performs L1 regularization. + * + * The corresponding proximal operator used is the soft-thresholding function. + * That is, each weight component is shrunk towards 0 by shrinkageVal. + * + * If w > shrinkageVal, set weight component to w-shrinkageVal. + * If w < -shrinkageVal, set weight component to w+shrinkageVal. + * If -shrinkageVal < w < shrinkageVal, set weight component to 0. + * + * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal) + */ +class L1Updater extends Updater { + override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, + stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = { + val thisIterStepSize = stepSize / math.sqrt(iter) + val normGradient = gradient.mul(thisIterStepSize) + // Take gradient step + val newWeights = weightsOld.sub(normGradient) + // Soft thresholding + val shrinkageVal = regParam * thisIterStepSize + (0 until newWeights.length).foreach { i => + val wi = newWeights.get(i) + newWeights.put(i, signum(wi) * max(0.0, abs(wi) - shrinkageVal)) + } + (newWeights, newWeights.norm1 * regParam) + } +} + +/** + * Updater that adjusts the learning rate and performs L2 regularization + */ +class SquaredL2Updater extends Updater { + override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, + stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = { + val thisIterStepSize = stepSize / math.sqrt(iter) + val normGradient = gradient.mul(thisIterStepSize) + val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0) + (newWeights, pow(newWeights.norm2, 2.0) * regParam) + } +} + diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index 7da96397a6..be002d02bc 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package spark.mllib.recommendation +package org.apache.spark.mllib.recommendation import scala.collection.mutable.{ArrayBuffer, BitSet} import scala.util.Random import scala.util.Sorting -import spark.{HashPartitioner, Partitioner, SparkContext, RDD} -import spark.storage.StorageLevel -import spark.KryoRegistrator -import spark.SparkContext._ +import org.apache.spark.{HashPartitioner, Partitioner, SparkContext} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.rdd.RDD +import org.apache.spark.serializer.KryoRegistrator +import org.apache.spark.SparkContext._ import com.esotericsoftware.kryo.Kryo import org.jblas.{DoubleMatrix, SimpleBlas, Solve} @@ -35,8 +36,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas, Solve} * of the elements within this block, and the list of destination blocks that each user or * product will need to send its feature vector to. */ -private[recommendation] case class OutLinkBlock( - elementIds: Array[Int], shouldSend: Array[BitSet]) +private[recommendation] case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[BitSet]) /** @@ -56,8 +56,7 @@ private[recommendation] case class InLinkBlock( /** * A more compact class to represent a rating than Tuple3[Int, Int, Double]. */ -private[recommendation] case class Rating(user: Int, product: Int, rating: Double) - +case class Rating(val user: Int, val product: Int, val rating: Double) /** * Alternating Least Squares matrix factorization. @@ -105,10 +104,10 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l } /** - * Run ALS with the configured parmeters on an input RDD of (user, product, rating) triples. + * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples. * Returns a MatrixFactorizationModel with feature vectors for each user and product. */ - def train(ratings: RDD[(Int, Int, Double)]): MatrixFactorizationModel = { + def run(ratings: RDD[Rating]): MatrixFactorizationModel = { val numBlocks = if (this.numBlocks == -1) { math.max(ratings.context.defaultParallelism, ratings.partitions.size / 2) } else { @@ -117,16 +116,36 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l val partitioner = new HashPartitioner(numBlocks) - val ratingsByUserBlock = ratings.map{ case (u, p, r) => (u % numBlocks, Rating(u, p, r)) } - val ratingsByProductBlock = ratings.map{ case (u, p, r) => (p % numBlocks, Rating(p, u, r)) } + val ratingsByUserBlock = ratings.map{ rating => (rating.user % numBlocks, rating) } + val ratingsByProductBlock = ratings.map{ rating => + (rating.product % numBlocks, Rating(rating.product, rating.user, rating.rating)) + } val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock) val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock) - // Initialize user and product factors randomly - val seed = new Random().nextInt() - var users = userOutLinks.mapValues(_.elementIds.map(u => randomFactor(rank, seed ^ u))) - var products = productOutLinks.mapValues(_.elementIds.map(p => randomFactor(rank, seed ^ ~p))) + // Initialize user and product factors randomly, but use a deterministic seed for each partition + // so that fault recovery works + val seedGen = new Random() + val seed1 = seedGen.nextInt() + val seed2 = seedGen.nextInt() + // Hash an integer to propagate random bits at all positions, similar to java.util.HashTable + def hash(x: Int): Int = { + val r = x ^ (x >>> 20) ^ (x >>> 12) + r ^ (r >>> 7) ^ (r >>> 4) + } + var users = userOutLinks.mapPartitionsWithIndex { (index, itr) => + val rand = new Random(hash(seed1 ^ index)) + itr.map { case (x, y) => + (x, y.elementIds.map(_ => randomFactor(rank, rand))) + } + } + var products = productOutLinks.mapPartitionsWithIndex { (index, itr) => + val rand = new Random(hash(seed2 ^ index)) + itr.map { case (x, y) => + (x, y.elementIds.map(_ => randomFactor(rank, rand))) + } + } for (iter <- 0 until iterations) { // perform ALS update @@ -213,11 +232,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l } /** - * Make a random factor vector with the given seed. - * TODO: Initialize things using mapPartitionsWithIndex to make it faster? + * Make a random factor vector with the given random. */ - private def randomFactor(rank: Int, seed: Int): Array[Double] = { - val rand = new Random(seed) + private def randomFactor(rank: Int, rand: Random): Array[Double] = { Array.fill(rank)(rand.nextDouble) } @@ -357,14 +374,14 @@ object ALS { * @param blocks level of parallelism to split computation into */ def train( - ratings: RDD[(Int, Int, Double)], + ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, blocks: Int) : MatrixFactorizationModel = { - new ALS(blocks, rank, iterations, lambda).train(ratings) + new ALS(blocks, rank, iterations, lambda).run(ratings) } /** @@ -379,7 +396,7 @@ object ALS { * @param iterations number of iterations of ALS (recommended: 10-20) * @param lambda regularization factor (recommended: 0.01) */ - def train(ratings: RDD[(Int, Int, Double)], rank: Int, iterations: Int, lambda: Double) + def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double) : MatrixFactorizationModel = { train(ratings, rank, iterations, lambda, -1) @@ -396,7 +413,7 @@ object ALS { * @param rank number of features to use * @param iterations number of iterations of ALS (recommended: 10-20) */ - def train(ratings: RDD[(Int, Int, Double)], rank: Int, iterations: Int) + def train(ratings: RDD[Rating], rank: Int, iterations: Int) : MatrixFactorizationModel = { train(ratings, rank, iterations, 0.01, -1) @@ -416,14 +433,15 @@ object ALS { val (master, ratingsFile, rank, iters, outputDir) = (args(0), args(1), args(2).toInt, args(3).toInt, args(4)) val blocks = if (args.length == 6) args(5).toInt else -1 - System.setProperty("spark.serializer", "spark.KryoSerializer") + System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer") System.setProperty("spark.kryo.registrator", classOf[ALSRegistrator].getName) System.setProperty("spark.kryo.referenceTracking", "false") + System.setProperty("spark.kryoserializer.buffer.mb", "8") System.setProperty("spark.locality.wait", "10000") val sc = new SparkContext(master, "ALS") val ratings = sc.textFile(ratingsFile).map { line => val fields = line.split(',') - (fields(0).toInt, fields(1).toInt, fields(2).toDouble) + Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble) } val model = ALS.train(ratings, rank, iters, 0.01, blocks) model.userFeatures.map{ case (id, vec) => id + "," + vec.mkString(" ") } diff --git a/mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index 38637b3dd1..af43d89c70 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -15,13 +15,22 @@ * limitations under the License. */ -package spark.mllib.recommendation +package org.apache.spark.mllib.recommendation -import spark.RDD -import spark.SparkContext._ +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ import org.jblas._ +/** + * Model representing the result of matrix factorization. + * + * @param rank Rank for the features in this model. + * @param userFeatures RDD of tuples where each tuple represents the userId and + * the features computed for this user. + * @param productFeatures RDD of tuples where each tuple represents the productId + * and the features computed for this product. + */ class MatrixFactorizationModel( val rank: Int, val userFeatures: RDD[(Int, Array[Double])], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala new file mode 100644 index 0000000000..f98b0b536d --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ + +import org.jblas.DoubleMatrix + +/** + * GeneralizedLinearModel (GLM) represents a model trained using + * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and + * an intercept. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +abstract class GeneralizedLinearModel(val weights: Array[Double], val intercept: Double) + extends Serializable { + + // Create a column vector that can be used for predictions + private val weightsMatrix = new DoubleMatrix(weights.length, 1, weights:_*) + + /** + * Predict the result given a data point and the weights learned. + * + * @param dataMatrix Row vector containing the features for this data point + * @param weightMatrix Column vector containing the weights of the model + * @param intercept Intercept of the model. + */ + def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double): Double + + /** + * Predict values for the given data set using the model trained. + * + * @param testData RDD representing data points to be predicted + * @return RDD[Double] where each entry contains the corresponding prediction + */ + def predict(testData: RDD[Array[Double]]): RDD[Double] = { + // A small optimization to avoid serializing the entire model. Only the weightsMatrix + // and intercept is needed. + val localWeights = weightsMatrix + val localIntercept = intercept + + testData.map { x => + val dataMatrix = new DoubleMatrix(1, x.length, x:_*) + predictPoint(dataMatrix, localWeights, localIntercept) + } + } + + /** + * Predict values for a single data point using the model trained. + * + * @param testData array representing a single data point + * @return Double prediction from the trained model + */ + def predict(testData: Array[Double]): Double = { + val dataMat = new DoubleMatrix(1, testData.length, testData:_*) + predictPoint(dataMat, weightsMatrix, intercept) + } +} + +/** + * GeneralizedLinearAlgorithm implements methods to train a Genearalized Linear Model (GLM). + * This class should be extended with an Optimizer to create a new GLM. + */ +abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] + extends Logging with Serializable { + + protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List() + + val optimizer: Optimizer + + protected var addIntercept: Boolean = true + + protected var validateData: Boolean = true + + /** + * Create a model given the weights and intercept + */ + protected def createModel(weights: Array[Double], intercept: Double): M + + /** + * Set if the algorithm should add an intercept. Default true. + */ + def setIntercept(addIntercept: Boolean): this.type = { + this.addIntercept = addIntercept + this + } + + /** + * Set if the algorithm should validate data before training. Default true. + */ + def setValidateData(validateData: Boolean): this.type = { + this.validateData = validateData + this + } + + /** + * Run the algorithm with the configured parameters on an input + * RDD of LabeledPoint entries. + */ + def run(input: RDD[LabeledPoint]) : M = { + val nfeatures: Int = input.first().features.length + val initialWeights = Array.fill(nfeatures)(1.0) + run(input, initialWeights) + } + + /** + * Run the algorithm with the configured parameters on an input RDD + * of LabeledPoint entries starting from the initial weights provided. + */ + def run(input: RDD[LabeledPoint], initialWeights: Array[Double]) : M = { + + // Check the data properties before running the optimizer + if (validateData && !validators.forall(func => func(input))) { + throw new SparkException("Input validation failed.") + } + + // Add a extra variable consisting of all 1.0's for the intercept. + val data = if (addIntercept) { + input.map(labeledPoint => (labeledPoint.label, Array(1.0, labeledPoint.features:_*))) + } else { + input.map(labeledPoint => (labeledPoint.label, labeledPoint.features)) + } + + val initialWeightsWithIntercept = if (addIntercept) { + Array(1.0, initialWeights:_*) + } else { + initialWeights + } + + val weights = optimizer.optimize(data, initialWeightsWithIntercept) + val intercept = weights(0) + val weightsScaled = weights.tail + + val model = createModel(weightsScaled, intercept) + + logInfo("Final model weights " + model.weights.mkString(",")) + logInfo("Final model intercept " + model.intercept) + model + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala new file mode 100644 index 0000000000..63240e24dc --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +/** + * Class that represents the features and labels of a data point. + * + * @param label Label for this data point. + * @param features List of features for this data point. + */ +case class LabeledPoint(val label: Double, val features: Array[Double]) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala new file mode 100644 index 0000000000..d959695325 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.util.MLUtils + +import org.jblas.DoubleMatrix + +/** + * Regression model trained using Lasso. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +class LassoModel( + override val weights: Array[Double], + override val intercept: Double) + extends GeneralizedLinearModel(weights, intercept) + with RegressionModel with Serializable { + + override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double) = { + dataMatrix.dot(weightMatrix) + intercept + } +} + +/** + * Train a regression model with L1-regularization using Stochastic Gradient Descent. + */ +class LassoWithSGD private ( + var stepSize: Double, + var numIterations: Int, + var regParam: Double, + var miniBatchFraction: Double) + extends GeneralizedLinearAlgorithm[LassoModel] + with Serializable { + + val gradient = new SquaredGradient() + val updater = new L1Updater() + @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize) + .setNumIterations(numIterations) + .setRegParam(regParam) + .setMiniBatchFraction(miniBatchFraction) + + // We don't want to penalize the intercept, so set this to false. + setIntercept(false) + + var yMean = 0.0 + var xColMean: DoubleMatrix = _ + var xColSd: DoubleMatrix = _ + + /** + * Construct a Lasso object with default parameters + */ + def this() = this(1.0, 100, 1.0, 1.0) + + def createModel(weights: Array[Double], intercept: Double) = { + val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*) + val weightsScaled = weightsMat.div(xColSd) + val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)) + + new LassoModel(weightsScaled.data, interceptScaled) + } + + override def run( + input: RDD[LabeledPoint], + initialWeights: Array[Double]) + : LassoModel = + { + val nfeatures: Int = input.first.features.length + val nexamples: Long = input.count() + + // To avoid penalizing the intercept, we center and scale the data. + val stats = MLUtils.computeStats(input, nfeatures, nexamples) + yMean = stats._1 + xColMean = stats._2 + xColSd = stats._3 + + val normalizedData = input.map { point => + val yNormalized = point.label - yMean + val featuresMat = new DoubleMatrix(nfeatures, 1, point.features:_*) + val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd) + LabeledPoint(yNormalized, featuresNormalized.toArray) + } + + super.run(normalizedData, initialWeights) + } +} + +/** + * Top-level methods for calling Lasso. + */ +object LassoWithSGD { + + /** + * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in + * gradient descent are initialized using the initial weights provided. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * the number of features in the data. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) + : LassoModel = + { + new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input, + initialWeights) + } + + /** + * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double) + : LassoModel = + { + new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input) + } + + /** + * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param stepSize Step size to be used for each iteration of Gradient Descent. + * @param regParam Regularization parameter. + * @param numIterations Number of iterations of gradient descent to run. + * @return a LassoModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double) + : LassoModel = + { + train(input, numIterations, stepSize, regParam, 1.0) + } + + /** + * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using a step size of 1.0. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @return a LassoModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int) + : LassoModel = + { + train(input, numIterations, 1.0, 1.0, 1.0) + } + + def main(args: Array[String]) { + if (args.length != 5) { + println("Usage: Lasso <master> <input_dir> <step_size> <regularization_parameter> <niters>") + System.exit(1) + } + val sc = new SparkContext(args(0), "Lasso") + val data = MLUtils.loadLabeledData(sc, args(1)) + val model = LassoWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble) + + sc.stop() + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala new file mode 100644 index 0000000000..ae95ea24fc --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.util.MLUtils + +import org.jblas.DoubleMatrix + +/** + * Regression model trained using LinearRegression. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +class LinearRegressionModel( + override val weights: Array[Double], + override val intercept: Double) + extends GeneralizedLinearModel(weights, intercept) + with RegressionModel with Serializable { + + override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double) = { + dataMatrix.dot(weightMatrix) + intercept + } +} + +/** + * Train a regression model with no regularization using Stochastic Gradient Descent. + */ +class LinearRegressionWithSGD private ( + var stepSize: Double, + var numIterations: Int, + var miniBatchFraction: Double) + extends GeneralizedLinearAlgorithm[LinearRegressionModel] + with Serializable { + + val gradient = new SquaredGradient() + val updater = new SimpleUpdater() + val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize) + .setNumIterations(numIterations) + .setMiniBatchFraction(miniBatchFraction) + + /** + * Construct a LinearRegression object with default parameters + */ + def this() = this(1.0, 100, 1.0) + + def createModel(weights: Array[Double], intercept: Double) = { + new LinearRegressionModel(weights, intercept) + } +} + +/** + * Top-level methods for calling LinearRegression. + */ +object LinearRegressionWithSGD { + + /** + * Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in + * gradient descent are initialized using the initial weights provided. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param miniBatchFraction Fraction of data to be used per iteration. + * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * the number of features in the data. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) + : LinearRegressionModel = + { + new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input, + initialWeights) + } + + /** + * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param miniBatchFraction Fraction of data to be used per iteration. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + miniBatchFraction: Double) + : LinearRegressionModel = + { + new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input) + } + + /** + * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param stepSize Step size to be used for each iteration of Gradient Descent. + * @param numIterations Number of iterations of gradient descent to run. + * @return a LinearRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double) + : LinearRegressionModel = + { + train(input, numIterations, stepSize, 1.0) + } + + /** + * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using a step size of 1.0. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @return a LinearRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int) + : LinearRegressionModel = + { + train(input, numIterations, 1.0, 1.0) + } + + def main(args: Array[String]) { + if (args.length != 5) { + println("Usage: LinearRegression <master> <input_dir> <step_size> <niters>") + System.exit(1) + } + val sc = new SparkContext(args(0), "LinearRegression") + val data = MLUtils.loadLabeledData(sc, args(1)) + val model = LinearRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble) + + sc.stop() + } +} diff --git a/mllib/src/main/scala/spark/mllib/regression/Regression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala index 645204ddf3..423afc32d6 100644 --- a/mllib/src/main/scala/spark/mllib/regression/Regression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.mllib.regression +package org.apache.spark.mllib.regression -import spark.RDD +import org.apache.spark.rdd.RDD -trait RegressionModel { +trait RegressionModel extends Serializable { /** * Predict values for the given data set using the model trained. * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala new file mode 100644 index 0000000000..b29508d2b9 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.util.MLUtils + +import org.jblas.DoubleMatrix + +/** + * Regression model trained using RidgeRegression. + * + * @param weights Weights computed for every feature. + * @param intercept Intercept computed for this model. + */ +class RidgeRegressionModel( + override val weights: Array[Double], + override val intercept: Double) + extends GeneralizedLinearModel(weights, intercept) + with RegressionModel with Serializable { + + override def predictPoint(dataMatrix: DoubleMatrix, weightMatrix: DoubleMatrix, + intercept: Double) = { + dataMatrix.dot(weightMatrix) + intercept + } +} + +/** + * Train a regression model with L2-regularization using Stochastic Gradient Descent. + */ +class RidgeRegressionWithSGD private ( + var stepSize: Double, + var numIterations: Int, + var regParam: Double, + var miniBatchFraction: Double) + extends GeneralizedLinearAlgorithm[RidgeRegressionModel] + with Serializable { + + val gradient = new SquaredGradient() + val updater = new SquaredL2Updater() + + @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize) + .setNumIterations(numIterations) + .setRegParam(regParam) + .setMiniBatchFraction(miniBatchFraction) + + // We don't want to penalize the intercept in RidgeRegression, so set this to false. + setIntercept(false) + + var yMean = 0.0 + var xColMean: DoubleMatrix = _ + var xColSd: DoubleMatrix = _ + + /** + * Construct a RidgeRegression object with default parameters + */ + def this() = this(1.0, 100, 1.0, 1.0) + + def createModel(weights: Array[Double], intercept: Double) = { + val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*) + val weightsScaled = weightsMat.div(xColSd) + val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)) + + new RidgeRegressionModel(weightsScaled.data, interceptScaled) + } + + override def run( + input: RDD[LabeledPoint], + initialWeights: Array[Double]) + : RidgeRegressionModel = + { + val nfeatures: Int = input.first.features.length + val nexamples: Long = input.count() + + // To avoid penalizing the intercept, we center and scale the data. + val stats = MLUtils.computeStats(input, nfeatures, nexamples) + yMean = stats._1 + xColMean = stats._2 + xColSd = stats._3 + + val normalizedData = input.map { point => + val yNormalized = point.label - yMean + val featuresMat = new DoubleMatrix(nfeatures, 1, point.features:_*) + val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd) + LabeledPoint(yNormalized, featuresNormalized.toArray) + } + + super.run(normalizedData, initialWeights) + } +} + +/** + * Top-level methods for calling RidgeRegression. + */ +object RidgeRegressionWithSGD { + + /** + * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in + * gradient descent are initialized using the initial weights provided. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * the number of features in the data. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double, + initialWeights: Array[Double]) + : RidgeRegressionModel = + { + new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run( + input, initialWeights) + } + + /** + * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. Each iteration uses + * `miniBatchFraction` fraction of the data to calculate the gradient. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @param stepSize Step size to be used for each iteration of gradient descent. + * @param regParam Regularization parameter. + * @param miniBatchFraction Fraction of data to be used per iteration. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double, + miniBatchFraction: Double) + : RidgeRegressionModel = + { + new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input) + } + + /** + * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using the specified step size. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param stepSize Step size to be used for each iteration of Gradient Descent. + * @param regParam Regularization parameter. + * @param numIterations Number of iterations of gradient descent to run. + * @return a RidgeRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int, + stepSize: Double, + regParam: Double) + : RidgeRegressionModel = + { + train(input, numIterations, stepSize, regParam, 1.0) + } + + /** + * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number + * of iterations of gradient descent using a step size of 1.0. We use the entire data set to + * update the gradient in each iteration. + * + * @param input RDD of (label, array of features) pairs. + * @param numIterations Number of iterations of gradient descent to run. + * @return a RidgeRegressionModel which has the weights and offset from training. + */ + def train( + input: RDD[LabeledPoint], + numIterations: Int) + : RidgeRegressionModel = + { + train(input, numIterations, 1.0, 1.0, 1.0) + } + + def main(args: Array[String]) { + if (args.length != 5) { + println("Usage: RidgeRegression <master> <input_dir> <step_size> <regularization_parameter>" + + " <niters>") + System.exit(1) + } + val sc = new SparkContext(args(0), "RidgeRegression") + val data = MLUtils.loadLabeledData(sc, args(1)) + val model = RidgeRegressionWithSGD.train(data, args(4).toInt, args(2).toDouble, + args(3).toDouble) + + sc.stop() + } +} diff --git a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala index df55be1254..8b55bce7c4 100644 --- a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala @@ -15,30 +15,29 @@ * limitations under the License. */ -package spark.deploy -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.mapred.JobConf +package org.apache.spark.mllib.util +import org.apache.spark.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.regression.LabeledPoint /** - * Contains util methods to interact with Hadoop from spark. + * A collection of methods used to validate data before applying ML algorithms. */ -object SparkHadoopUtil { - - def getUserNameFromEnvironment(): String = { - // defaulting to -D ... - System.getProperty("user.name") - } - - def runAsUser(func: (Product) => Unit, args: Product) { - - // Add support, if exists - for now, simply run func ! - func(args) +object DataValidators extends Logging { + + /** + * Function to check if labels used for classification are either zero or one. + * + * @param data - input data set that needs to be checked + * + * @return True if labels are all zero or one, false otherwise. + */ + val classificationLabels: RDD[LabeledPoint] => Boolean = { data => + val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count() + if (numInvalid != 0) { + logError("Classification labels should be 0 or 1. Found " + numInvalid + " invalid labels") + } + numInvalid == 0 } - - // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems - def newConfiguration(): Configuration = new Configuration() - - // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster - def addCredentials(conf: JobConf) {} } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala new file mode 100644 index 0000000000..9109189dff --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.util + +import scala.util.Random + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD + +/** + * Generate test data for KMeans. This class first chooses k cluster centers + * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian + * cluster with scale 1 around each center. + */ + +object KMeansDataGenerator { + + /** + * Generate an RDD containing test data for KMeans. + * + * @param sc SparkContext to use for creating the RDD + * @param numPoints Number of points that will be contained in the RDD + * @param k Number of clusters + * @param d Number of dimensions + * @param r Scaling factor for the distribution of the initial centers + * @param numPartitions Number of partitions of the generated RDD; default 2 + */ + def generateKMeansRDD( + sc: SparkContext, + numPoints: Int, + k: Int, + d: Int, + r: Double, + numPartitions: Int = 2) + : RDD[Array[Double]] = + { + // First, generate some centers + val rand = new Random(42) + val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r)) + // Then generate points around each center + sc.parallelize(0 until numPoints, numPartitions).map { idx => + val center = centers(idx % k) + val rand2 = new Random(42 + idx) + Array.tabulate(d)(i => center(i) + rand2.nextGaussian()) + } + } + + def main(args: Array[String]) { + if (args.length < 6) { + println("Usage: KMeansGenerator " + + "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]") + System.exit(1) + } + + val sparkMaster = args(0) + val outputPath = args(1) + val numPoints = args(2).toInt + val k = args(3).toInt + val d = args(4).toInt + val r = args(5).toDouble + val parts = if (args.length >= 7) args(6).toInt else 2 + + val sc = new SparkContext(sparkMaster, "KMeansDataGenerator") + val data = generateKMeansRDD(sc, numPoints, k, d, r, parts) + data.map(_.mkString(" ")).saveAsTextFile(outputPath) + + System.exit(0) + } +} + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala new file mode 100644 index 0000000000..bc5045fb05 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.util + +import scala.collection.JavaConversions._ +import scala.util.Random + +import org.jblas.DoubleMatrix + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.regression.LabeledPoint + +/** + * Generate sample data used for Linear Data. This class generates + * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the + * response variable `Y`. + */ +object LinearDataGenerator { + + /** + * Return a Java List of synthetic data randomly generated according to a multi + * collinear model. + * @param intercept Data intercept + * @param weights Weights to be applied. + * @param nPoints Number of points in sample. + * @param seed Random seed + * @return Java List of input. + */ + def generateLinearInputAsList( + intercept: Double, + weights: Array[Double], + nPoints: Int, + seed: Int, + eps: Double): java.util.List[LabeledPoint] = { + seqAsJavaList(generateLinearInput(intercept, weights, nPoints, seed, eps)) + } + + /** + * + * @param intercept Data intercept + * @param weights Weights to be applied. + * @param nPoints Number of points in sample. + * @param seed Random seed + * @param eps Epsilon scaling factor. + * @return + */ + def generateLinearInput( + intercept: Double, + weights: Array[Double], + nPoints: Int, + seed: Int, + eps: Double = 0.1): Seq[LabeledPoint] = { + + val rnd = new Random(seed) + val weightsMat = new DoubleMatrix(1, weights.length, weights:_*) + val x = Array.fill[Array[Double]](nPoints)( + Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0)) + val y = x.map { xi => + (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + eps * rnd.nextGaussian() + } + y.zip(x).map(p => LabeledPoint(p._1, p._2)) + } + + /** + * Generate an RDD containing sample data for Linear Regression models - including Ridge, Lasso, + * and uregularized variants. + * + * @param sc SparkContext to be used for generating the RDD. + * @param nexamples Number of examples that will be contained in the RDD. + * @param nfeatures Number of features to generate for each example. + * @param eps Epsilon factor by which examples are scaled. + * @param weights Weights associated with the first weights.length features. + * @param nparts Number of partitions in the RDD. Default value is 2. + * + * @return RDD of LabeledPoint containing sample data. + */ + def generateLinearRDD( + sc: SparkContext, + nexamples: Int, + nfeatures: Int, + eps: Double, + nparts: Int = 2, + intercept: Double = 0.0) : RDD[LabeledPoint] = { + org.jblas.util.Random.seed(42) + // Random values distributed uniformly in [-0.5, 0.5] + val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5) + + val data: RDD[LabeledPoint] = sc.parallelize(0 until nparts, nparts).flatMap { p => + val seed = 42 + p + val examplesInPartition = nexamples / nparts + generateLinearInput(intercept, w.toArray, examplesInPartition, seed, eps) + } + data + } + + def main(args: Array[String]) { + if (args.length < 2) { + println("Usage: LinearDataGenerator " + + "<master> <output_dir> [num_examples] [num_features] [num_partitions]") + System.exit(1) + } + + val sparkMaster: String = args(0) + val outputPath: String = args(1) + val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 + val nfeatures: Int = if (args.length > 3) args(3).toInt else 100 + val parts: Int = if (args.length > 4) args(4).toInt else 2 + val eps = 10 + + val sc = new SparkContext(sparkMaster, "LinearDataGenerator") + val data = generateLinearRDD(sc, nexamples, nfeatures, eps, nparts = parts) + + MLUtils.saveLabeledData(data, outputPath) + sc.stop() + } +} diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala index 8094d22405..52c4a71d62 100644 --- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegressionGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala @@ -15,16 +15,49 @@ * limitations under the License. */ -package spark.mllib.regression +package org.apache.spark.mllib.util import scala.util.Random -import org.jblas.DoubleMatrix +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.regression.LabeledPoint -import spark.{RDD, SparkContext} -import spark.mllib.util.MLUtils +/** + * Generate test data for LogisticRegression. This class chooses positive labels + * with probability `probOne` and scales features for positive examples by `eps`. + */ + +object LogisticRegressionDataGenerator { + + /** + * Generate an RDD containing test data for LogisticRegression. + * + * @param sc SparkContext to use for creating the RDD. + * @param nexamples Number of examples that will be contained in the RDD. + * @param nfeatures Number of features to generate for each example. + * @param eps Epsilon factor by which positive examples are scaled. + * @param nparts Number of partitions of the generated RDD. Default value is 2. + * @param probOne Probability that a label is 1 (and not 0). Default value is 0.5. + */ + def generateLogisticRDD( + sc: SparkContext, + nexamples: Int, + nfeatures: Int, + eps: Double, + nparts: Int = 2, + probOne: Double = 0.5): RDD[LabeledPoint] = { + val data = sc.parallelize(0 until nexamples, nparts).map { idx => + val rnd = new Random(42 + idx) -object LogisticRegressionGenerator { + val y = if (idx % 2 == 0) 0.0 else 1.0 + val x = Array.fill[Double](nfeatures) { + rnd.nextGaussian() + (y * eps) + } + LabeledPoint(y, x) + } + data + } def main(args: Array[String]) { if (args.length != 5) { @@ -40,17 +73,8 @@ object LogisticRegressionGenerator { val parts: Int = if (args.length > 4) args(4).toInt else 2 val eps = 3 - val sc = new SparkContext(sparkMaster, "LogisticRegressionGenerator") - - val data: RDD[(Double, Array[Double])] = sc.parallelize(0 until nexamples, parts).map { idx => - val rnd = new Random(42 + idx) - - val y = if (idx % 2 == 0) 0 else 1 - val x = Array.fill[Double](nfeatures) { - rnd.nextGaussian() + (y * eps) - } - (y, x) - } + val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator") + val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts) MLUtils.saveLabeledData(data, outputPath) sc.stop() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala new file mode 100644 index 0000000000..5aec867257 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.recommendation + +import scala.util.Random + +import org.jblas.DoubleMatrix + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.util.MLUtils + +/** +* Generate RDD(s) containing data for Matrix Factorization. +* +* This method samples training entries according to the oversampling factor +* 'trainSampFact', which is a multiplicative factor of the number of +* degrees of freedom of the matrix: rank*(m+n-rank). +* +* It optionally samples entries for a testing matrix using +* 'testSampFact', the percentage of the number of training entries +* to use for testing. +* +* This method takes the following inputs: +* sparkMaster (String) The master URL. +* outputPath (String) Directory to save output. +* m (Int) Number of rows in data matrix. +* n (Int) Number of columns in data matrix. +* rank (Int) Underlying rank of data matrix. +* trainSampFact (Double) Oversampling factor. +* noise (Boolean) Whether to add gaussian noise to training data. +* sigma (Double) Standard deviation of added gaussian noise. +* test (Boolean) Whether to create testing RDD. +* testSampFact (Double) Percentage of training data to use as test data. +*/ + +object MFDataGenerator{ + + def main(args: Array[String]) { + if (args.length < 2) { + println("Usage: MFDataGenerator " + + "<master> <outputDir> [m] [n] [rank] [trainSampFact] [noise] [sigma] [test] [testSampFact]") + System.exit(1) + } + + val sparkMaster: String = args(0) + val outputPath: String = args(1) + val m: Int = if (args.length > 2) args(2).toInt else 100 + val n: Int = if (args.length > 3) args(3).toInt else 100 + val rank: Int = if (args.length > 4) args(4).toInt else 10 + val trainSampFact: Double = if (args.length > 5) args(5).toDouble else 1.0 + val noise: Boolean = if (args.length > 6) args(6).toBoolean else false + val sigma: Double = if (args.length > 7) args(7).toDouble else 0.1 + val test: Boolean = if (args.length > 8) args(8).toBoolean else false + val testSampFact: Double = if (args.length > 9) args(9).toDouble else 0.1 + + val sc = new SparkContext(sparkMaster, "MFDataGenerator") + + val A = DoubleMatrix.randn(m, rank) + val B = DoubleMatrix.randn(rank, n) + val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank))) + A.mmuli(z) + B.mmuli(z) + val fullData = A.mmul(B) + + val df = rank * (m + n - rank) + val sampSize = scala.math.min(scala.math.round(trainSampFact * df), + scala.math.round(.99 * m * n)).toInt + val rand = new Random() + val mn = m * n + val shuffled = rand.shuffle(1 to mn toIterable) + + val omega = shuffled.slice(0, sampSize) + val ordered = omega.sortWith(_ < _).toArray + val trainData: RDD[(Int, Int, Double)] = sc.parallelize(ordered) + .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1))) + + // optionally add gaussian noise + if (noise) { + trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma)) + } + + trainData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) + + // optionally generate testing data + if (test) { + val testSampSize = scala.math + .min(scala.math.round(sampSize * testSampFact),scala.math.round(mn - sampSize)).toInt + val testOmega = shuffled.slice(sampSize, sampSize + testSampSize) + val testOrdered = testOmega.sortWith(_ < _).toArray + val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered) + .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1))) + testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) + } + + sc.stop() + + } +} diff --git a/mllib/src/main/scala/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index b5e564df6d..d91b74c3ac 100644 --- a/mllib/src/main/scala/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -15,38 +15,49 @@ * limitations under the License. */ -package spark.mllib.util +package org.apache.spark.mllib.util -import spark.{RDD, SparkContext} -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ import org.jblas.DoubleMatrix +import org.apache.spark.mllib.regression.LabeledPoint /** - * Helper methods to load and save data - * Data format: - * <l>, <f1> <f2> ... - * where <f1>, <f2> are feature values in Double and <l> is the corresponding label as Double. + * Helper methods to load, save and pre-process data used in ML Lib. */ object MLUtils { /** + * Load labeled data from a file. The data format used here is + * <L>, <f1> <f2> ... + * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double. + * * @param sc SparkContext * @param dir Directory to the input data files. - * @return An RDD of tuples. For each tuple, the first element is the label, and the second - * element represents the feature values (an array of Double). + * @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is + * the label, and the second element represents the feature values (an array of Double). */ - def loadLabeledData(sc: SparkContext, dir: String): RDD[(Double, Array[Double])] = { + def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = { sc.textFile(dir).map { line => - val parts = line.split(",") + val parts = line.split(',') val label = parts(0).toDouble - val features = parts(1).trim().split(" ").map(_.toDouble) - (label, features) + val features = parts(1).trim().split(' ').map(_.toDouble) + LabeledPoint(label, features) } } - def saveLabeledData(data: RDD[(Double, Array[Double])], dir: String) { - val dataStr = data.map(x => x._1 + "," + x._2.mkString(" ")) + /** + * Save labeled data to a file. The data format used here is + * <L>, <f1> <f2> ... + * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double. + * + * @param data An RDD of LabeledPoints containing data to be saved. + * @param dir Directory to save the data. + */ + def saveLabeledData(data: RDD[LabeledPoint], dir: String) { + val dataStr = data.map(x => x.label + "," + x.features.mkString(" ")) dataStr.saveAsTextFile(dir) } @@ -62,16 +73,16 @@ object MLUtils { * xColMean - Row vector with mean for every column (or feature) of the input data * xColSd - Row vector standard deviation for every column (or feature) of the input data. */ - def computeStats(data: RDD[(Double, Array[Double])], nfeatures: Int, nexamples: Long): + def computeStats(data: RDD[LabeledPoint], nfeatures: Int, nexamples: Long): (Double, DoubleMatrix, DoubleMatrix) = { - val yMean: Double = data.map { case (y, features) => y }.reduce(_ + _) / nexamples + val yMean: Double = data.map { labeledPoint => labeledPoint.label }.reduce(_ + _) / nexamples // NOTE: We shuffle X by column here to compute column sum and sum of squares. - val xColSumSq: RDD[(Int, (Double, Double))] = data.flatMap { case(y, features) => - val nCols = features.length + val xColSumSq: RDD[(Int, (Double, Double))] = data.flatMap { labeledPoint => + val nCols = labeledPoint.features.length // Traverse over every column and emit (col, value, value^2) Iterator.tabulate(nCols) { i => - (i, (features(i), features(i)*features(i))) + (i, (labeledPoint.features(i), labeledPoint.features(i)*labeledPoint.features(i))) } }.reduceByKey { case(x1, x2) => (x1._1 + x2._1, x1._2 + x2._2) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala new file mode 100644 index 0000000000..6e9f667635 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala @@ -0,0 +1,51 @@ +package org.apache.spark.mllib.util + +import scala.util.Random + +import org.jblas.DoubleMatrix + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.regression.LabeledPoint + +/** + * Generate sample data used for SVM. This class generates uniform random values + * for the features and adds Gaussian noise with weight 0.1 to generate labels. + */ +object SVMDataGenerator { + + def main(args: Array[String]) { + if (args.length < 2) { + println("Usage: SVMGenerator " + + "<master> <output_dir> [num_examples] [num_features] [num_partitions]") + System.exit(1) + } + + val sparkMaster: String = args(0) + val outputPath: String = args(1) + val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 + val nfeatures: Int = if (args.length > 3) args(3).toInt else 2 + val parts: Int = if (args.length > 4) args(4).toInt else 2 + + val sc = new SparkContext(sparkMaster, "SVMGenerator") + + val globalRnd = new Random(94720) + val trueWeights = new DoubleMatrix(1, nfeatures + 1, + Array.fill[Double](nfeatures + 1)(globalRnd.nextGaussian()):_*) + + val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx => + val rnd = new Random(42 + idx) + + val x = Array.fill[Double](nfeatures) { + rnd.nextDouble() * 2.0 - 1.0 + } + val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + rnd.nextGaussian() * 0.1 + val y = if (yD < 0) 0.0 else 1.0 + LabeledPoint(y, x) + } + + MLUtils.saveLabeledData(data, outputPath) + + sc.stop() + } +} diff --git a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala deleted file mode 100644 index 2fb0c8136f..0000000000 --- a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.optimization - -import org.jblas.DoubleMatrix - -abstract class Gradient extends Serializable { - /** - * Compute the gradient for a given row of data. - * - * @param data - One row of data. Row matrix of size 1xn where n is the number of features. - * @param label - Label for this data item. - * @param weights - Column matrix containing weights for every feature. - */ - def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): - (DoubleMatrix, Double) -} - -class LogisticGradient extends Gradient { - override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): - (DoubleMatrix, Double) = { - val margin: Double = -1.0 * data.dot(weights) - val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label - - val gradient = data.mul(gradientMultiplier) - val loss = - if (margin > 0) { - math.log(1 + math.exp(0 - margin)) - } else { - math.log(1 + math.exp(margin)) - margin - } - - (gradient, loss) - } -} diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala deleted file mode 100644 index e1b73bc25e..0000000000 --- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.optimization - -import spark.{Logging, RDD, SparkContext} -import spark.SparkContext._ - -import org.jblas.DoubleMatrix - -import scala.collection.mutable.ArrayBuffer - - -object GradientDescent { - - /** - * Run gradient descent in parallel using mini batches. - * Based on Matlab code written by John Duchi. - * - * @param data - Input data for SGD. RDD of form (label, [feature values]). - * @param gradient - Gradient object that will be used to compute the gradient. - * @param updater - Updater object that will be used to update the model. - * @param stepSize - stepSize to be used during update. - * @param numIters - number of iterations that SGD should be run. - * @param miniBatchFraction - fraction of the input data set that should be used for - * one iteration of SGD. Default value 1.0. - * - * @return weights - Column matrix containing weights for every feature. - * @return lossHistory - Array containing the loss computed for every iteration. - */ - def runMiniBatchSGD( - data: RDD[(Double, Array[Double])], - gradient: Gradient, - updater: Updater, - stepSize: Double, - numIters: Int, - miniBatchFraction: Double=1.0) : (DoubleMatrix, Array[Double]) = { - - val lossHistory = new ArrayBuffer[Double](numIters) - - val nfeatures: Int = data.take(1)(0)._2.length - val nexamples: Long = data.count() - val miniBatchSize = nexamples * miniBatchFraction - - // Initialize weights as a column matrix - var weights = DoubleMatrix.ones(nfeatures) - var reg_val = 0.0 - - for (i <- 1 to numIters) { - val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map { - case (y, features) => - val featuresRow = new DoubleMatrix(features.length, 1, features:_*) - val (grad, loss) = gradient.compute(featuresRow, y, weights) - (grad, loss) - }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2)) - - lossHistory.append(lossSum / miniBatchSize + reg_val) - val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i) - weights = update._1 - reg_val = update._2 - } - - (weights, lossHistory.toArray) - } -} diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala deleted file mode 100644 index b864fd4634..0000000000 --- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.optimization - -import org.jblas.DoubleMatrix - -abstract class Updater extends Serializable { - /** - * Compute an updated value for weights given the gradient, stepSize and iteration number. - * - * @param weightsOld - Column matrix of size nx1 where n is the number of features. - * @param gradient - Column matrix of size nx1 where n is the number of features. - * @param stepSize - step size across iterations - * @param iter - Iteration number - * - * @return weightsNew - Column matrix containing updated weights - * @return reg_val - regularization value - */ - def compute(weightsOlds: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int): - (DoubleMatrix, Double) -} - -class SimpleUpdater extends Updater { - override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, - stepSize: Double, iter: Int): (DoubleMatrix, Double) = { - val normGradient = gradient.mul(stepSize / math.sqrt(iter)) - (weightsOld.sub(normGradient), 0) - } -} diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala deleted file mode 100644 index bb294c2257..0000000000 --- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.regression - -import spark.{Logging, RDD, SparkContext} -import spark.mllib.optimization._ -import spark.mllib.util.MLUtils - -import org.jblas.DoubleMatrix - -/** - * Logistic Regression using Stochastic Gradient Descent. - * Based on Matlab code written by John Duchi. - */ -class LogisticRegressionModel( - val weights: DoubleMatrix, - val intercept: Double, - val losses: Array[Double]) extends RegressionModel { - - override def predict(testData: spark.RDD[Array[Double]]) = { - testData.map { x => - val margin = new DoubleMatrix(1, x.length, x:_*).mmul(this.weights).get(0) + this.intercept - 1.0/ (1.0 + math.exp(margin * -1)) - } - } - - override def predict(testData: Array[Double]): Double = { - val dataMat = new DoubleMatrix(1, testData.length, testData:_*) - val margin = dataMat.mmul(this.weights).get(0) + this.intercept - 1.0/ (1.0 + math.exp(margin * -1)) - } -} - -class LogisticRegression private (var stepSize: Double, var miniBatchFraction: Double, - var numIters: Int) - extends Logging { - - /** - * Construct a LogisticRegression object with default parameters - */ - def this() = this(1.0, 1.0, 100) - - /** - * Set the step size per-iteration of SGD. Default 1.0. - */ - def setStepSize(step: Double) = { - this.stepSize = step - this - } - - /** - * Set fraction of data to be used for each SGD iteration. Default 1.0. - */ - def setMiniBatchFraction(fraction: Double) = { - this.miniBatchFraction = fraction - this - } - - /** - * Set the number of iterations for SGD. Default 100. - */ - def setNumIterations(iters: Int) = { - this.numIters = iters - this - } - - def train(input: RDD[(Double, Array[Double])]): LogisticRegressionModel = { - // Add a extra variable consisting of all 1.0's for the intercept. - val data = input.map { case (y, features) => - (y, Array(1.0, features:_*)) - } - - val (weights, losses) = GradientDescent.runMiniBatchSGD( - data, new LogisticGradient(), new SimpleUpdater(), stepSize, numIters, miniBatchFraction) - - val weightsScaled = weights.getRange(1, weights.length) - val intercept = weights.get(0) - - val model = new LogisticRegressionModel(weightsScaled, intercept, losses) - - logInfo("Final model weights " + model.weights) - logInfo("Final model intercept " + model.intercept) - logInfo("Last 10 losses " + model.losses.takeRight(10).mkString(", ")) - model - } -} - -/** - * Top-level methods for calling Logistic Regression. - */ -object LogisticRegression { - - /** - * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number - * of iterations of gradient descent using the specified step size. Each iteration uses - * `miniBatchFraction` fraction of the data to calculate the gradient. - * - * @param input RDD of (label, array of features) pairs. - * @param numIterations Number of iterations of gradient descent to run. - * @param stepSize Step size to be used for each iteration of gradient descent. - * @param miniBatchFraction Fraction of data to be used per iteration. - */ - def train( - input: RDD[(Double, Array[Double])], - numIterations: Int, - stepSize: Double, - miniBatchFraction: Double) - : LogisticRegressionModel = - { - new LogisticRegression(stepSize, miniBatchFraction, numIterations).train(input) - } - - /** - * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number - * of iterations of gradient descent using the specified step size. We use the entire data set to update - * the gradient in each iteration. - * - * @param input RDD of (label, array of features) pairs. - * @param stepSize Step size to be used for each iteration of Gradient Descent. - * @param numIterations Number of iterations of gradient descent to run. - * @return a LogisticRegressionModel which has the weights and offset from training. - */ - def train( - input: RDD[(Double, Array[Double])], - numIterations: Int, - stepSize: Double) - : LogisticRegressionModel = - { - train(input, numIterations, stepSize, 1.0) - } - - /** - * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number - * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update - * the gradient in each iteration. - * - * @param input RDD of (label, array of features) pairs. - * @param numIterations Number of iterations of gradient descent to run. - * @return a LogisticRegressionModel which has the weights and offset from training. - */ - def train( - input: RDD[(Double, Array[Double])], - numIterations: Int) - : LogisticRegressionModel = - { - train(input, numIterations, 1.0, 1.0) - } - - def main(args: Array[String]) { - if (args.length != 4) { - println("Usage: LogisticRegression <master> <input_dir> <step_size> <niters>") - System.exit(1) - } - val sc = new SparkContext(args(0), "LogisticRegression") - val data = MLUtils.loadLabeledData(sc, args(1)) - val model = LogisticRegression.train(data, args(3).toInt, args(2).toDouble) - - sc.stop() - } -} diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala deleted file mode 100644 index 7c7f912b43..0000000000 --- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.regression - -import spark.{Logging, RDD, SparkContext} -import spark.mllib.util.MLUtils - -import org.jblas.DoubleMatrix -import org.jblas.Solve - -import scala.annotation.tailrec -import scala.collection.mutable - -/** - * Ridge Regression from Joseph Gonzalez's implementation in MLBase - */ -class RidgeRegressionModel( - val weights: DoubleMatrix, - val intercept: Double, - val lambdaOpt: Double, - val lambdas: Seq[(Double, Double, DoubleMatrix)]) - extends RegressionModel { - - override def predict(testData: RDD[Array[Double]]): RDD[Double] = { - testData.map { x => - (new DoubleMatrix(1, x.length, x:_*).mmul(this.weights)).get(0) + this.intercept - } - } - - override def predict(testData: Array[Double]): Double = { - (new DoubleMatrix(1, testData.length, testData:_*).mmul(this.weights)).get(0) + this.intercept - } -} - -class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double) - extends Logging { - - def this() = this(0.0, 100.0) - - /** - * Set the lower bound on binary search for lambda's. Default is 0. - */ - def setLowLambda(low: Double) = { - this.lambdaLow = low - this - } - - /** - * Set the upper bound on binary search for lambda's. Default is 100.0. - */ - def setHighLambda(hi: Double) = { - this.lambdaHigh = hi - this - } - - def train(input: RDD[(Double, Array[Double])]): RidgeRegressionModel = { - val nfeatures: Int = input.take(1)(0)._2.length - val nexamples: Long = input.count() - - val (yMean, xColMean, xColSd) = MLUtils.computeStats(input, nfeatures, nexamples) - - val data = input.map { case(y, features) => - val yNormalized = y - yMean - val featuresMat = new DoubleMatrix(nfeatures, 1, features:_*) - val featuresNormalized = featuresMat.sub(xColMean).divi(xColSd) - (yNormalized, featuresNormalized.toArray) - } - - // Compute XtX - Size of XtX is nfeatures by nfeatures - val XtX: DoubleMatrix = data.map { case (y, features) => - val x = new DoubleMatrix(1, features.length, features:_*) - x.transpose().mmul(x) - }.reduce(_.addi(_)) - - // Compute Xt*y - Size of Xty is nfeatures by 1 - val Xty: DoubleMatrix = data.map { case (y, features) => - new DoubleMatrix(features.length, 1, features:_*).mul(y) - }.reduce(_.addi(_)) - - // Define a function to compute the leave one out cross validation error - // for a single example - def crossValidate(lambda: Double): (Double, Double, DoubleMatrix) = { - // Compute the MLE ridge regression parameter value - - // Ridge Regression parameter = inv(XtX + \lambda*I) * Xty - val XtXlambda = DoubleMatrix.eye(nfeatures).muli(lambda).addi(XtX) - val w = Solve.solveSymmetric(XtXlambda, Xty) - - val invXtX = Solve.solveSymmetric(XtXlambda, DoubleMatrix.eye(nfeatures)) - - // compute the generalized cross validation score - val cverror = data.map { - case (y, features) => - val x = new DoubleMatrix(features.length, 1, features:_*) - val yhat = w.transpose().mmul(x).get(0) - val H_ii = x.transpose().mmul(invXtX).mmul(x).get(0) - val residual = (y - yhat) / (1.0 - H_ii) - residual * residual - }.reduce(_ + _) / nexamples - - (lambda, cverror, w) - } - - // Binary search for the best assignment to lambda. - def binSearch(low: Double, high: Double): Seq[(Double, Double, DoubleMatrix)] = { - val buffer = mutable.ListBuffer.empty[(Double, Double, DoubleMatrix)] - - @tailrec - def loop(low: Double, high: Double): Seq[(Double, Double, DoubleMatrix)] = { - val mid = (high - low) / 2 + low - val lowValue = crossValidate((mid - low) / 2 + low) - val highValue = crossValidate((high - mid) / 2 + mid) - val (newLow, newHigh) = if (lowValue._2 < highValue._2) { - (low, mid + (high-low)/4) - } else { - (mid - (high-low)/4, high) - } - if (newHigh - newLow > 1.0E-7) { - buffer += lowValue += highValue - loop(newLow, newHigh) - } else { - buffer += lowValue += highValue - buffer.result() - } - } - - loop(low, high) - } - - // Actually compute the best lambda - val lambdas = binSearch(lambdaLow, lambdaHigh).sortBy(_._1) - - // Find the best parameter set by taking the lowest cverror. - val (lambdaOpt, cverror, weights) = lambdas.reduce((a, b) => if (a._2 < b._2) a else b) - - // Return the model which contains the solution - val weightsScaled = weights.div(xColSd) - val intercept = yMean - (weights.transpose().mmul(xColMean.div(xColSd)).get(0)) - val model = new RidgeRegressionModel(weightsScaled, intercept, lambdaOpt, lambdas) - - logInfo("RidgeRegression: optimal lambda " + model.lambdaOpt) - logInfo("RidgeRegression: optimal weights " + model.weights) - logInfo("RidgeRegression: optimal intercept " + model.intercept) - logInfo("RidgeRegression: cross-validation error " + cverror) - - model - } -} - -/** - * Top-level methods for calling Ridge Regression. - */ -object RidgeRegression { - - /** - * Train a ridge regression model given an RDD of (response, features) pairs. - * We use the closed form solution to compute the cross-validation score for - * a given lambda. The optimal lambda is computed by performing binary search - * between the provided bounds of lambda. - * - * @param input RDD of (response, array of features) pairs. - * @param lambdaLow lower bound used in binary search for lambda - * @param lambdaHigh upper bound used in binary search for lambda - */ - def train( - input: RDD[(Double, Array[Double])], - lambdaLow: Double, - lambdaHigh: Double) - : RidgeRegressionModel = - { - new RidgeRegression(lambdaLow, lambdaHigh).train(input) - } - - /** - * Train a ridge regression model given an RDD of (response, features) pairs. - * We use the closed form solution to compute the cross-validation score for - * a given lambda. The optimal lambda is computed by performing binary search - * between lambda values of 0 and 100. - * - * @param input RDD of (response, array of features) pairs. - */ - def train(input: RDD[(Double, Array[Double])]) : RidgeRegressionModel = { - train(input, 0.0, 100.0) - } - - def main(args: Array[String]) { - if (args.length != 2) { - println("Usage: RidgeRegression <master> <input_dir>") - System.exit(1) - } - val sc = new SparkContext(args(0), "RidgeRegression") - val data = MLUtils.loadLabeledData(sc, args(1)) - val model = RidgeRegression.train(data, 0, 1000) - sc.stop() - } -} diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala deleted file mode 100644 index c2260ae286..0000000000 --- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegressionGenerator.scala +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.regression - -import scala.util.Random - -import org.jblas.DoubleMatrix - -import spark.{RDD, SparkContext} -import spark.mllib.util.MLUtils - - -object RidgeRegressionGenerator { - - def main(args: Array[String]) { - if (args.length != 5) { - println("Usage: RidgeRegressionGenerator " + - "<master> <output_dir> <num_examples> <num_features> <num_partitions>") - System.exit(1) - } - - val sparkMaster: String = args(0) - val outputPath: String = args(1) - val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 - val nfeatures: Int = if (args.length > 3) args(3).toInt else 100 - val parts: Int = if (args.length > 4) args(4).toInt else 2 - val eps = 10 - - org.jblas.util.Random.seed(42) - val sc = new SparkContext(sparkMaster, "RidgeRegressionGenerator") - - // Random values distributed uniformly in [-0.5, 0.5] - val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5) - w.put(0, 0, 10) - w.put(1, 0, 10) - - val data: RDD[(Double, Array[Double])] = sc.parallelize(0 until parts, parts).flatMap { p => - org.jblas.util.Random.seed(42 + p) - val examplesInPartition = nexamples / parts - - val X = DoubleMatrix.rand(examplesInPartition, nfeatures) - val y = X.mmul(w) - - val rnd = new Random(42 + p) - - val normalValues = Array.fill[Double](examplesInPartition)(rnd.nextGaussian() * eps) - val yObs = new DoubleMatrix(normalValues).addi(y) - - Iterator.tabulate(examplesInPartition) { i => - (yObs.get(i, 0), X.getRow(i).toArray) - } - } - - MLUtils.saveLabeledData(data, outputPath) - sc.stop() - } -} diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java new file mode 100644 index 0000000000..e18e3bc6a8 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification; + +import java.io.Serializable; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +import org.apache.spark.mllib.regression.LabeledPoint; + +public class JavaLogisticRegressionSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaLogisticRegressionSuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + int validatePrediction(List<LabeledPoint> validationData, LogisticRegressionModel model) { + int numAccurate = 0; + for (LabeledPoint point: validationData) { + Double prediction = model.predict(point.features()); + if (prediction == point.label()) { + numAccurate++; + } + } + return numAccurate; + } + + @Test + public void runLRUsingConstructor() { + int nPoints = 10000; + double A = 2.0; + double B = -1.5; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize( + LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 42), 2).cache(); + List<LabeledPoint> validationData = + LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17); + + LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD(); + lrImpl.optimizer().setStepSize(1.0) + .setRegParam(1.0) + .setNumIterations(100); + LogisticRegressionModel model = lrImpl.run(testRDD.rdd()); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + + @Test + public void runLRUsingStaticMethods() { + int nPoints = 10000; + double A = 2.0; + double B = -1.5; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize( + LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 42), 2).cache(); + List<LabeledPoint> validationData = + LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17); + + LogisticRegressionModel model = LogisticRegressionWithSGD.train( + testRDD.rdd(), 100, 1.0, 1.0); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java new file mode 100644 index 0000000000..117e5eaa8b --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification; + + +import java.io.Serializable; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +import org.apache.spark.mllib.regression.LabeledPoint; + +public class JavaSVMSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaSVMSuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + int validatePrediction(List<LabeledPoint> validationData, SVMModel model) { + int numAccurate = 0; + for (LabeledPoint point: validationData) { + Double prediction = model.predict(point.features()); + if (prediction == point.label()) { + numAccurate++; + } + } + return numAccurate; + } + + @Test + public void runSVMUsingConstructor() { + int nPoints = 10000; + double A = 2.0; + double[] weights = {-1.5, 1.0}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(SVMSuite.generateSVMInputAsList(A, + weights, nPoints, 42), 2).cache(); + List<LabeledPoint> validationData = + SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17); + + SVMWithSGD svmSGDImpl = new SVMWithSGD(); + svmSGDImpl.optimizer().setStepSize(1.0) + .setRegParam(1.0) + .setNumIterations(100); + SVMModel model = svmSGDImpl.run(testRDD.rdd()); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + + @Test + public void runSVMUsingStaticMethods() { + int nPoints = 10000; + double A = 2.0; + double[] weights = {-1.5, 1.0}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(SVMSuite.generateSVMInputAsList(A, + weights, nPoints, 42), 2).cache(); + List<LabeledPoint> validationData = + SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17); + + SVMModel model = SVMWithSGD.train(testRDD.rdd(), 100, 1.0, 1.0, 1.0); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java new file mode 100644 index 0000000000..32d3934ac1 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.clustering; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +public class JavaKMeansSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaKMeans"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + // L1 distance between two points + double distance1(double[] v1, double[] v2) { + double distance = 0.0; + for (int i = 0; i < v1.length; ++i) { + distance = Math.max(distance, Math.abs(v1[i] - v2[i])); + } + return distance; + } + + // Assert that two sets of points are equal, within EPSILON tolerance + void assertSetsEqual(double[][] v1, double[][] v2) { + double EPSILON = 1e-4; + Assert.assertTrue(v1.length == v2.length); + for (int i = 0; i < v1.length; ++i) { + double minDistance = Double.MAX_VALUE; + for (int j = 0; j < v2.length; ++j) { + minDistance = Math.min(minDistance, distance1(v1[i], v2[j])); + } + Assert.assertTrue(minDistance <= EPSILON); + } + + for (int i = 0; i < v2.length; ++i) { + double minDistance = Double.MAX_VALUE; + for (int j = 0; j < v1.length; ++j) { + minDistance = Math.min(minDistance, distance1(v2[i], v1[j])); + } + Assert.assertTrue(minDistance <= EPSILON); + } + } + + + @Test + public void runKMeansUsingStaticMethods() { + List<double[]> points = new ArrayList(); + points.add(new double[]{1.0, 2.0, 6.0}); + points.add(new double[]{1.0, 3.0, 0.0}); + points.add(new double[]{1.0, 4.0, 6.0}); + + double[][] expectedCenter = { {1.0, 3.0, 4.0} }; + + JavaRDD<double[]> data = sc.parallelize(points, 2); + KMeansModel model = KMeans.train(data.rdd(), 1, 1); + assertSetsEqual(model.clusterCenters(), expectedCenter); + + model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM()); + assertSetsEqual(model.clusterCenters(), expectedCenter); + } + + @Test + public void runKMeansUsingConstructor() { + List<double[]> points = new ArrayList(); + points.add(new double[]{1.0, 2.0, 6.0}); + points.add(new double[]{1.0, 3.0, 0.0}); + points.add(new double[]{1.0, 4.0, 6.0}); + + double[][] expectedCenter = { {1.0, 3.0, 4.0} }; + + JavaRDD<double[]> data = sc.parallelize(points, 2); + KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd()); + assertSetsEqual(model.clusterCenters(), expectedCenter); + + model = new KMeans().setK(1) + .setMaxIterations(1) + .setRuns(1) + .setInitializationMode(KMeans.RANDOM()) + .run(data.rdd()); + assertSetsEqual(model.clusterCenters(), expectedCenter); + } +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java new file mode 100644 index 0000000000..3323f6cee2 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.recommendation; + +import java.io.Serializable; +import java.util.List; + +import scala.Tuple2; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +import org.jblas.DoubleMatrix; + +public class JavaALSSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaALS"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + void validatePrediction(MatrixFactorizationModel model, int users, int products, int features, + DoubleMatrix trueRatings, double matchThreshold) { + DoubleMatrix predictedU = new DoubleMatrix(users, features); + List<scala.Tuple2<Object, double[]>> userFeatures = model.userFeatures().toJavaRDD().collect(); + for (int i = 0; i < features; ++i) { + for (scala.Tuple2<Object, double[]> userFeature : userFeatures) { + predictedU.put((Integer)userFeature._1(), i, userFeature._2()[i]); + } + } + DoubleMatrix predictedP = new DoubleMatrix(products, features); + + List<scala.Tuple2<Object, double[]>> productFeatures = + model.productFeatures().toJavaRDD().collect(); + for (int i = 0; i < features; ++i) { + for (scala.Tuple2<Object, double[]> productFeature : productFeatures) { + predictedP.put((Integer)productFeature._1(), i, productFeature._2()[i]); + } + } + + DoubleMatrix predictedRatings = predictedU.mmul(predictedP.transpose()); + + for (int u = 0; u < users; ++u) { + for (int p = 0; p < products; ++p) { + double prediction = predictedRatings.get(u, p); + double correct = trueRatings.get(u, p); + Assert.assertTrue(Math.abs(prediction - correct) < matchThreshold); + } + } + } + + @Test + public void runALSUsingStaticMethods() { + int features = 1; + int iterations = 15; + int users = 10; + int products = 10; + scala.Tuple2<List<Rating>, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( + users, products, features, 0.7); + + JavaRDD<Rating> data = sc.parallelize(testData._1()); + MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations); + validatePrediction(model, users, products, features, testData._2(), 0.3); + } + + @Test + public void runALSUsingConstructor() { + int features = 2; + int iterations = 15; + int users = 20; + int products = 30; + scala.Tuple2<List<Rating>, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( + users, products, features, 0.7); + + JavaRDD<Rating> data = sc.parallelize(testData._1()); + + MatrixFactorizationModel model = new ALS().setRank(features) + .setIterations(iterations) + .run(data.rdd()); + validatePrediction(model, users, products, features, testData._2(), 0.3); + } +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java new file mode 100644 index 0000000000..f44b25cd44 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression; + +import java.io.Serializable; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.util.LinearDataGenerator; + +public class JavaLassoSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaLassoSuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + int validatePrediction(List<LabeledPoint> validationData, LassoModel model) { + int numAccurate = 0; + for (LabeledPoint point: validationData) { + Double prediction = model.predict(point.features()); + // A prediction is off if the prediction is more than 0.5 away from expected value. + if (Math.abs(prediction - point.label()) <= 0.5) { + numAccurate++; + } + } + return numAccurate; + } + + @Test + public void runLassoUsingConstructor() { + int nPoints = 10000; + double A = 2.0; + double[] weights = {-1.5, 1.0e-2}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, + weights, nPoints, 42, 0.1), 2).cache(); + List<LabeledPoint> validationData = + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + + LassoWithSGD lassoSGDImpl = new LassoWithSGD(); + lassoSGDImpl.optimizer().setStepSize(1.0) + .setRegParam(0.01) + .setNumIterations(20); + LassoModel model = lassoSGDImpl.run(testRDD.rdd()); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + + @Test + public void runLassoUsingStaticMethods() { + int nPoints = 10000; + double A = 2.0; + double[] weights = {-1.5, 1.0e-2}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A, + weights, nPoints, 42, 0.1), 2).cache(); + List<LabeledPoint> validationData = + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + + LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java new file mode 100644 index 0000000000..5a4410a632 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression; + +import java.io.Serializable; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.util.LinearDataGenerator; + +public class JavaLinearRegressionSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaLinearRegressionSuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + int validatePrediction(List<LabeledPoint> validationData, LinearRegressionModel model) { + int numAccurate = 0; + for (LabeledPoint point: validationData) { + Double prediction = model.predict(point.features()); + // A prediction is off if the prediction is more than 0.5 away from expected value. + if (Math.abs(prediction - point.label()) <= 0.5) { + numAccurate++; + } + } + return numAccurate; + } + + @Test + public void runLinearRegressionUsingConstructor() { + int nPoints = 100; + double A = 3.0; + double[] weights = {10, 10}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize( + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); + List<LabeledPoint> validationData = + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + + LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(); + LinearRegressionModel model = linSGDImpl.run(testRDD.rdd()); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + + @Test + public void runLinearRegressionUsingStaticMethods() { + int nPoints = 100; + double A = 3.0; + double[] weights = {10, 10}; + + JavaRDD<LabeledPoint> testRDD = sc.parallelize( + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); + List<LabeledPoint> validationData = + LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1); + + LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100); + + int numAccurate = validatePrediction(validationData, model); + Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0); + } + +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java new file mode 100644 index 0000000000..2fdd5fc8fd --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression; + +import java.io.Serializable; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.jblas.DoubleMatrix; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.util.LinearDataGenerator; + +public class JavaRidgeRegressionSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaRidgeRegressionSuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + System.clearProperty("spark.driver.port"); + } + + double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) { + double errorSum = 0; + for (LabeledPoint point: validationData) { + Double prediction = model.predict(point.features()); + errorSum += (prediction - point.label()) * (prediction - point.label()); + } + return errorSum / validationData.size(); + } + + List<LabeledPoint> generateRidgeData(int numPoints, int nfeatures, double eps) { + org.jblas.util.Random.seed(42); + // Pick weights as random values distributed uniformly in [-0.5, 0.5] + DoubleMatrix w = DoubleMatrix.rand(nfeatures, 1).subi(0.5); + // Set first two weights to eps + w.put(0, 0, eps); + w.put(1, 0, eps); + return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, eps); + } + + @Test + public void runRidgeRegressionUsingConstructor() { + int nexamples = 200; + int nfeatures = 20; + double eps = 10.0; + List<LabeledPoint> data = generateRidgeData(2*nexamples, nfeatures, eps); + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, nexamples)); + List<LabeledPoint> validationData = data.subList(nexamples, 2*nexamples); + + RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD(); + ridgeSGDImpl.optimizer().setStepSize(1.0) + .setRegParam(0.0) + .setNumIterations(200); + RidgeRegressionModel model = ridgeSGDImpl.run(testRDD.rdd()); + double unRegularizedErr = predictionError(validationData, model); + + ridgeSGDImpl.optimizer().setRegParam(0.1); + model = ridgeSGDImpl.run(testRDD.rdd()); + double regularizedErr = predictionError(validationData, model); + + Assert.assertTrue(regularizedErr < unRegularizedErr); + } + + @Test + public void runRidgeRegressionUsingStaticMethods() { + int nexamples = 200; + int nfeatures = 20; + double eps = 10.0; + List<LabeledPoint> data = generateRidgeData(2*nexamples, nfeatures, eps); + + JavaRDD<LabeledPoint> testRDD = sc.parallelize(data.subList(0, nexamples)); + List<LabeledPoint> validationData = data.subList(nexamples, 2*nexamples); + + RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0); + double unRegularizedErr = predictionError(validationData, model); + + model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1); + double regularizedErr = predictionError(validationData, model); + + Assert.assertTrue(regularizedErr < unRegularizedErr); + } +} diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties index a112e0b506..4265ba6e5d 100644 --- a/mllib/src/test/resources/log4j.properties +++ b/mllib/src/test/resources/log4j.properties @@ -19,7 +19,7 @@ log4j.rootCategory=INFO, file log4j.appender.file=org.apache.log4j.FileAppender log4j.appender.file.append=false -log4j.appender.file.file=ml/target/unit-tests.log +log4j.appender.file.file=mllib/target/unit-tests.log log4j.appender.file.layout=org.apache.log4j.PatternLayout log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala new file mode 100644 index 0000000000..34c67294e9 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification + +import scala.util.Random +import scala.collection.JavaConversions._ + +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite +import org.scalatest.matchers.ShouldMatchers + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.regression._ + +object LogisticRegressionSuite { + + def generateLogisticInputAsList( + offset: Double, + scale: Double, + nPoints: Int, + seed: Int): java.util.List[LabeledPoint] = { + seqAsJavaList(generateLogisticInput(offset, scale, nPoints, seed)) + } + + // Generate input of the form Y = logistic(offset + scale*X) + def generateLogisticInput( + offset: Double, + scale: Double, + nPoints: Int, + seed: Int): Seq[LabeledPoint] = { + val rnd = new Random(seed) + val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian()) + + // NOTE: if U is uniform[0, 1] then ln(u) - ln(1-u) is Logistic(0,1) + val unifRand = new scala.util.Random(45) + val rLogis = (0 until nPoints).map { i => + val u = unifRand.nextDouble() + math.log(u) - math.log(1.0-u) + } + + // y <- A + B*x + rLogis() + // y <- as.numeric(y > 0) + val y: Seq[Int] = (0 until nPoints).map { i => + val yVal = offset + scale * x1(i) + rLogis(i) + if (yVal > 0) 1 else 0 + } + + val testData = (0 until nPoints).map(i => LabeledPoint(y(i), Array(x1(i)))) + testData + } + +} + +class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with ShouldMatchers { + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } + + + override def afterAll() { + sc.stop() + System.clearProperty("spark.driver.port") + } + + def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { + val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => + (prediction != expected.label) + }.size + // At least 83% of the predictions should be on. + ((input.length - numOffPredictions).toDouble / input.length) should be > 0.83 + } + + // Test if we can correctly learn A, B where Y = logistic(A + B*X) + test("logistic regression") { + val nPoints = 10000 + val A = 2.0 + val B = -1.5 + + val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + val lr = new LogisticRegressionWithSGD() + lr.optimizer.setStepSize(10.0).setNumIterations(20) + + val model = lr.run(testRDD) + + // Test the weights + val weight0 = model.weights(0) + assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]") + assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]") + + val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } + + test("logistic regression with initial weights") { + val nPoints = 10000 + val A = 2.0 + val B = -1.5 + + val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42) + + val initialB = -1.0 + val initialWeights = Array(initialB) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + + // Use half as many iterations as the previous test. + val lr = new LogisticRegressionWithSGD() + lr.optimizer.setStepSize(10.0).setNumIterations(10) + + val model = lr.run(testRDD, initialWeights) + + val weight0 = model.weights(0) + assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]") + assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]") + + val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala new file mode 100644 index 0000000000..6a957e3ddc --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.classification + +import scala.util.Random +import scala.math.signum +import scala.collection.JavaConversions._ + +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite + +import org.jblas.DoubleMatrix + +import org.apache.spark.{SparkException, SparkContext} +import org.apache.spark.mllib.regression._ + +object SVMSuite { + + def generateSVMInputAsList( + intercept: Double, + weights: Array[Double], + nPoints: Int, + seed: Int): java.util.List[LabeledPoint] = { + seqAsJavaList(generateSVMInput(intercept, weights, nPoints, seed)) + } + + // Generate noisy input of the form Y = signum(x.dot(weights) + intercept + noise) + def generateSVMInput( + intercept: Double, + weights: Array[Double], + nPoints: Int, + seed: Int): Seq[LabeledPoint] = { + val rnd = new Random(seed) + val weightsMat = new DoubleMatrix(1, weights.length, weights:_*) + val x = Array.fill[Array[Double]](nPoints)( + Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0)) + val y = x.map { xi => + val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + + intercept + 0.01 * rnd.nextGaussian() + if (yD < 0) 0.0 else 1.0 + } + y.zip(x).map(p => LabeledPoint(p._1, p._2)) + } + +} + +class SVMSuite extends FunSuite with BeforeAndAfterAll { + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } + + override def afterAll() { + sc.stop() + System.clearProperty("spark.driver.port") + } + + def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { + val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => + (prediction != expected.label) + }.size + // At least 80% of the predictions should be on. + assert(numOffPredictions < input.length / 5) + } + + + test("SVM using local random SGD") { + val nPoints = 10000 + + // NOTE: Intercept should be small for generating equal 0s and 1s + val A = 0.01 + val B = -1.5 + val C = 1.0 + + val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + + val svm = new SVMWithSGD() + svm.optimizer.setStepSize(1.0).setRegParam(1.0).setNumIterations(100) + + val model = svm.run(testRDD) + + val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) + + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } + + test("SVM local random SGD with initial weights") { + val nPoints = 10000 + + // NOTE: Intercept should be small for generating equal 0s and 1s + val A = 0.01 + val B = -1.5 + val C = 1.0 + + val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + + val initialB = -1.0 + val initialC = -1.0 + val initialWeights = Array(initialB,initialC) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + + val svm = new SVMWithSGD() + svm.optimizer.setStepSize(1.0).setRegParam(1.0).setNumIterations(100) + + val model = svm.run(testRDD, initialWeights) + + val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17) + val validationRDD = sc.parallelize(validationData,2) + + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } + + test("SVM with invalid labels") { + val nPoints = 10000 + + // NOTE: Intercept should be small for generating equal 0s and 1s + val A = 0.01 + val B = -1.5 + val C = 1.0 + + val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + val testRDD = sc.parallelize(testData, 2) + + val testRDDInvalid = testRDD.map { lp => + if (lp.label == 0.0) { + LabeledPoint(-1.0, lp.features) + } else { + lp + } + } + + intercept[SparkException] { + val model = SVMWithSGD.train(testRDDInvalid, 100) + } + + // Turning off data validation should not throw an exception + val noValidationModel = new SVMWithSGD().setValidateData(false).run(testRDDInvalid) + } +} diff --git a/mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala index bebade9afb..94245f6027 100644 --- a/mllib/src/test/scala/spark/mllib/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala @@ -15,21 +15,24 @@ * limitations under the License. */ -package spark.mllib.clustering +package org.apache.spark.mllib.clustering import scala.util.Random import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import org.jblas._ - class KMeansSuite extends FunSuite with BeforeAndAfterAll { - val sc = new SparkContext("local", "test") + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } override def afterAll() { sc.stop() diff --git a/mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala index f98590b8d9..347ef238f4 100644 --- a/mllib/src/test/scala/spark/mllib/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala @@ -15,21 +15,62 @@ * limitations under the License. */ -package spark.mllib.recommendation +package org.apache.spark.mllib.recommendation +import scala.collection.JavaConversions._ import scala.util.Random import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import org.jblas._ +object ALSSuite { + + def generateRatingsAsJavaList( + users: Int, + products: Int, + features: Int, + samplingRate: Double): (java.util.List[Rating], DoubleMatrix) = { + val (sampledRatings, trueRatings) = generateRatings(users, products, features, samplingRate) + (seqAsJavaList(sampledRatings), trueRatings) + } + + def generateRatings( + users: Int, + products: Int, + features: Int, + samplingRate: Double): (Seq[Rating], DoubleMatrix) = { + val rand = new Random(42) + + // Create a random matrix with uniform values from -1 to 1 + def randomMatrix(m: Int, n: Int) = + new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*) + + val userMatrix = randomMatrix(users, features) + val productMatrix = randomMatrix(features, products) + val trueRatings = userMatrix.mmul(productMatrix) + + val sampledRatings = { + for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate) + yield Rating(u, p, trueRatings.get(u, p)) + } + + (sampledRatings, trueRatings) + } + +} + class ALSSuite extends FunSuite with BeforeAndAfterAll { - val sc = new SparkContext("local", "test") + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } override def afterAll() { sc.stop() @@ -57,21 +98,8 @@ class ALSSuite extends FunSuite with BeforeAndAfterAll { def testALS(users: Int, products: Int, features: Int, iterations: Int, samplingRate: Double, matchThreshold: Double) { - val rand = new Random(42) - - // Create a random matrix with uniform values from -1 to 1 - def randomMatrix(m: Int, n: Int) = - new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*) - - val userMatrix = randomMatrix(users, features) - val productMatrix = randomMatrix(features, products) - val trueRatings = userMatrix.mmul(productMatrix) - - val sampledRatings = { - for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate) - yield (u, p, trueRatings.get(u, p)) - } - + val (sampledRatings, trueRatings) = ALSSuite.generateRatings(users, products, + features, samplingRate) val model = ALS.train(sc.parallelize(sampledRatings), features, iterations) val predictedU = new DoubleMatrix(users, features) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala new file mode 100644 index 0000000000..db980c7bae --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import scala.collection.JavaConversions._ +import scala.util.Random + +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.util.LinearDataGenerator + + +class LassoSuite extends FunSuite with BeforeAndAfterAll { + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } + + + override def afterAll() { + sc.stop() + System.clearProperty("spark.driver.port") + } + + def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { + val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => + // A prediction is off if the prediction is more than 0.5 away from expected value. + math.abs(prediction - expected.label) > 0.5 + }.size + // At least 80% of the predictions should be on. + assert(numOffPredictions < input.length / 5) + } + + test("Lasso local random SGD") { + val nPoints = 10000 + + val A = 2.0 + val B = -1.5 + val C = 1.0e-2 + + val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + + val ls = new LassoWithSGD() + ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(20) + + val model = ls.run(testRDD) + + val weight0 = model.weights(0) + val weight1 = model.weights(1) + assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]") + assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]") + assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]") + + val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) + + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } + + test("Lasso local random SGD with initial weights") { + val nPoints = 10000 + + val A = 2.0 + val B = -1.5 + val C = 1.0e-2 + + val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42) + + val initialB = -1.0 + val initialC = -1.0 + val initialWeights = Array(initialB,initialC) + + val testRDD = sc.parallelize(testData, 2) + testRDD.cache() + + val ls = new LassoWithSGD() + ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(20) + + val model = ls.run(testRDD, initialWeights) + + val weight0 = model.weights(0) + val weight1 = model.weights(1) + assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]") + assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]") + assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]") + + val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17) + val validationRDD = sc.parallelize(validationData,2) + + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala new file mode 100644 index 0000000000..ef500c704c --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite + +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark.mllib.util.LinearDataGenerator + +class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll { + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } + + override def afterAll() { + sc.stop() + System.clearProperty("spark.driver.port") + } + + def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { + val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => + // A prediction is off if the prediction is more than 0.5 away from expected value. + math.abs(prediction - expected.label) > 0.5 + }.size + // At least 80% of the predictions should be on. + assert(numOffPredictions < input.length / 5) + } + + // Test if we can correctly learn Y = 3 + 10*X1 + 10*X2 + test("linear regression") { + val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput( + 3.0, Array(10.0, 10.0), 100, 42), 2).cache() + val linReg = new LinearRegressionWithSGD() + linReg.optimizer.setNumIterations(1000).setStepSize(1.0) + + val model = linReg.run(testRDD) + + assert(model.intercept >= 2.5 && model.intercept <= 3.5) + assert(model.weights.length === 2) + assert(model.weights(0) >= 9.0 && model.weights(0) <= 11.0) + assert(model.weights(1) >= 9.0 && model.weights(1) <= 11.0) + + val validationData = LinearDataGenerator.generateLinearInput( + 3.0, Array(10.0, 10.0), 100, 17) + val validationRDD = sc.parallelize(validationData, 2).cache() + + // Test prediction on RDD. + validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) + + // Test prediction on Array. + validatePrediction(validationData.map(row => model.predict(row.features)), validationData) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala new file mode 100644 index 0000000000..c18092d804 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.regression + +import scala.collection.JavaConversions._ +import scala.util.Random + +import org.jblas.DoubleMatrix +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite + +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark.mllib.util.LinearDataGenerator + +class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll { + @transient private var sc: SparkContext = _ + + override def beforeAll() { + sc = new SparkContext("local", "test") + } + + override def afterAll() { + sc.stop() + System.clearProperty("spark.driver.port") + } + + def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = { + predictions.zip(input).map { case (prediction, expected) => + (prediction - expected.label) * (prediction - expected.label) + }.reduceLeft(_ + _) / predictions.size + } + + test("regularization with skewed weights") { + val nexamples = 200 + val nfeatures = 20 + val eps = 10 + + org.jblas.util.Random.seed(42) + // Pick weights as random values distributed uniformly in [-0.5, 0.5] + val w = DoubleMatrix.rand(nfeatures, 1).subi(0.5) + // Set first two weights to eps + w.put(0, 0, eps) + w.put(1, 0, eps) + + // Use half of data for training and other half for validation + val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2*nexamples, 42, eps) + val testData = data.take(nexamples) + val validationData = data.takeRight(nexamples) + + val testRDD = sc.parallelize(testData, 2).cache() + val validationRDD = sc.parallelize(validationData, 2).cache() + + // First run without regularization. + val linearReg = new LinearRegressionWithSGD() + linearReg.optimizer.setNumIterations(200) + .setStepSize(1.0) + + val linearModel = linearReg.run(testRDD) + val linearErr = predictionError( + linearModel.predict(validationRDD.map(_.features)).collect(), validationData) + + val ridgeReg = new RidgeRegressionWithSGD() + ridgeReg.optimizer.setNumIterations(200) + .setRegParam(0.1) + .setStepSize(1.0) + val ridgeModel = ridgeReg.run(testRDD) + val ridgeErr = predictionError( + ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData) + + // Ridge CV-error should be lower than linear regression + assert(ridgeErr < linearErr, + "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")") + } +} diff --git a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala deleted file mode 100644 index bc9bfd054f..0000000000 --- a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.regression - -import scala.util.Random - -import org.scalatest.BeforeAndAfterAll -import org.scalatest.FunSuite - -import spark.SparkContext -import spark.SparkContext._ - - -class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll { - val sc = new SparkContext("local", "test") - - override def afterAll() { - sc.stop() - System.clearProperty("spark.driver.port") - } - - // Test if we can correctly learn A, B where Y = logistic(A + B*X) - test("logistic regression") { - val nPoints = 10000 - val rnd = new Random(42) - - val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian()) - - val A = 2.0 - val B = -1.5 - - // NOTE: if U is uniform[0, 1] then ln(u) - ln(1-u) is Logistic(0,1) - val unifRand = new scala.util.Random(45) - val rLogis = (0 until nPoints).map { i => - val u = unifRand.nextDouble() - math.log(u) - math.log(1.0-u) - } - - // y <- A + B*x + rlogis(100) - // y <- as.numeric(y > 0) - val y = (0 until nPoints).map { i => - val yVal = A + B * x1(i) + rLogis(i) - if (yVal > 0) 1.0 else 0.0 - } - - val testData = (0 until nPoints).map(i => (y(i).toDouble, Array(x1(i)))).toArray - - val testRDD = sc.parallelize(testData, 2) - testRDD.cache() - val lr = new LogisticRegression().setStepSize(10.0) - .setNumIterations(20) - - val model = lr.train(testRDD) - - val weight0 = model.weights.get(0) - assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]") - assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]") - } -} diff --git a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala deleted file mode 100644 index 3c588c6162..0000000000 --- a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.mllib.regression - -import scala.util.Random - -import org.scalatest.BeforeAndAfterAll -import org.scalatest.FunSuite - -import spark.SparkContext -import spark.SparkContext._ - - -class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll { - val sc = new SparkContext("local", "test") - - override def afterAll() { - sc.stop() - System.clearProperty("spark.driver.port") - } - - // Test if we can correctly learn Y = 3 + X1 + X2 when - // X1 and X2 are collinear. - test("multi-collinear variables") { - val rnd = new Random(43) - val x1 = Array.fill[Double](20)(rnd.nextGaussian()) - - // Pick a mean close to mean of x1 - val rnd1 = new Random(42) //new NormalDistribution(0.1, 0.01) - val x2 = Array.fill[Double](20)(0.1 + rnd1.nextGaussian() * 0.01) - - val xMat = (0 until 20).map(i => Array(x1(i), x2(i))).toArray - - val y = xMat.map(i => 3 + i(0) + i(1)) - val testData = (0 until 20).map(i => (y(i), xMat(i))).toArray - - val testRDD = sc.parallelize(testData, 2) - testRDD.cache() - val ridgeReg = new RidgeRegression().setLowLambda(0) - .setHighLambda(10) - - val model = ridgeReg.train(testRDD) - - assert(model.intercept >= 2.9 && model.intercept <= 3.1) - assert(model.weights.length === 2) - assert(model.weights.get(0) >= 0.9 && model.weights.get(0) <= 1.1) - assert(model.weights.get(1) >= 0.9 && model.weights.get(1) <= 1.1) - } -} diff --git a/pagerank_data.txt b/pagerank_data.txt new file mode 100644 index 0000000000..95755ab8f5 --- /dev/null +++ b/pagerank_data.txt @@ -0,0 +1,6 @@ +1 2 +1 3 +1 4 +2 1 +3 1 +4 1 @@ -18,22 +18,22 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <packaging>pom</packaging> <name>Spark Project Parent POM</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <licenses> <license> - <name>BSD License</name> - <url>https://github.com/mesos/spark/blob/master/LICENSE</url> + <name>Apache 2.0 License</name> + <url>http://www.apache.org/licenses/LICENSE-2.0.html</url> <distribution>repo</distribution> </license> </licenses> <scm> - <connection>scm:git:git@github.com:mesos/spark.git</connection> - <url>scm:git:git@github.com:mesos/spark.git</url> + <connection>scm:git:git@github.com:apache/incubator-spark.git</connection> + <url>scm:git:git@github.com:apache/incubator-spark.git</url> </scm> <developers> <developer> @@ -41,12 +41,12 @@ <name>Matei Zaharia</name> <email>matei.zaharia@gmail.com</email> <url>http://www.cs.berkeley.edu/~matei</url> - <organization>U.C. Berkeley Computer Science</organization> - <organizationUrl>http://www.cs.berkeley.edu/</organizationUrl> + <organization>Apache Software Foundation</organization> + <organizationUrl>http://spark.incubator.apache.org</organizationUrl> </developer> </developers> <issueManagement> - <system>github</system> + <system>JIRA</system> <url>https://spark-project.atlassian.net/browse/SPARK</url> </issueManagement> @@ -58,9 +58,11 @@ <module>core</module> <module>bagel</module> <module>examples</module> + <module>mllib</module> + <module>tools</module> <module>streaming</module> <module>repl</module> - <module>repl-bin</module> + <module>assembly</module> </modules> <properties> @@ -69,11 +71,12 @@ <java.version>1.5</java.version> <scala.version>2.9.3</scala.version> - <mesos.version>0.9.0-incubating</mesos.version> - <akka.version>2.0.3</akka.version> + <mesos.version>0.12.1</mesos.version> + <akka.version>2.0.5</akka.version> <slf4j.version>1.7.2</slf4j.version> - <cdh.version>4.1.2</cdh.version> <log4j.version>1.2.17</log4j.version> + <hadoop.version>1.0.4</hadoop.version> + <hbase.version>0.94.6</hbase.version> <PermGen>64m</PermGen> <MaxPermGen>512m</MaxPermGen> @@ -155,12 +158,17 @@ <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> - <version>7.5.3.v20111011</version> + <version>7.6.8.v20121106</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> - <version>11.0.1</version> + <version>14.0.1</version> + </dependency> + <dependency> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + <version>1.3.9</version> </dependency> <dependency> <groupId>org.slf4j</groupId> @@ -183,6 +191,11 @@ <version>0.8.4</version> </dependency> <dependency> + <groupId>org.xerial.snappy</groupId> + <artifactId>snappy-java</artifactId> + <version>1.0.5</version> + </dependency> + <dependency> <groupId>org.ow2.asm</groupId> <artifactId>asm</artifactId> <version>4.0</version> @@ -193,9 +206,14 @@ <version>2.4.1</version> </dependency> <dependency> - <groupId>de.javakaffee</groupId> - <artifactId>kryo-serializers</artifactId> - <version>0.22</version> + <groupId>com.twitter</groupId> + <artifactId>chill_2.9.3</artifactId> + <version>0.3.1</version> + </dependency> + <dependency> + <groupId>com.twitter</groupId> + <artifactId>chill-java</artifactId> + <version>0.3.1</version> </dependency> <dependency> <groupId>com.typesafe.akka</groupId> @@ -249,6 +267,21 @@ <version>2.5</version> </dependency> <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-core</artifactId> + <version>3.0.0</version> + </dependency> + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-jvm</artifactId> + <version>3.0.0</version> + </dependency> + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-json</artifactId> + <version>3.0.0</version> + </dependency> + <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-compiler</artifactId> <version>${scala.version}</version> @@ -296,9 +329,57 @@ <dependency> <groupId>com.novocode</groupId> <artifactId>junit-interface</artifactId> - <version>0.8</version> + <version>0.9</version> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> + <!-- Specify Avro version because Kafka also has it as a dependency --> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>1.7.4</version> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + <version>1.7.4</version> + <exclusions> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + </exclusions> + </dependency> </dependencies> </dependencyManagement> @@ -367,6 +448,7 @@ <args> <arg>-unchecked</arg> <arg>-optimise</arg> + <arg>-deprecation</arg> </args> <jvmArgs> <jvmArg>-Xms64m</jvmArg> @@ -411,7 +493,7 @@ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory> <junitxml>.</junitxml> <filereports>${project.build.directory}/SparkTestSuite.txt</filereports> - <argLine>-Xms64m -Xmx1024m</argLine> + <argLine>-Xms64m -Xmx3g</argLine> <stderr/> </configuration> <executions> @@ -505,68 +587,18 @@ <profiles> <profile> - <id>hadoop1</id> - <properties> - <hadoop.major.version>1</hadoop.major.version> - </properties> - <dependencyManagement> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <version>1.0.4</version> - </dependency> - </dependencies> - </dependencyManagement> - </profile> - - <profile> - <id>hadoop2</id> - <properties> - <hadoop.major.version>2</hadoop.major.version> - </properties> - <dependencyManagement> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <version>2.0.0-mr1-cdh${cdh.version}</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>2.0.0-mr1-cdh${cdh.version}</version> - </dependency> - <!-- Specify Avro version because Kafka also has it as a dependency --> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>1.7.4</version> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <version>1.7.4</version> - <exclusions> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </dependencyManagement> - </profile> - - <profile> <id>hadoop2-yarn</id> <properties> <hadoop.major.version>2</hadoop.major.version> <!-- 0.23.* is same as 2.0.* - except hardened to run production jobs --> - <!-- <yarn.version>0.23.7</yarn.version> --> - <yarn.version>2.0.2-alpha</yarn.version> + <!-- <hadoop.version>0.23.7</hadoop.version> --> + <hadoop.version>2.0.5-alpha</hadoop.version> </properties> + <modules> + <module>yarn</module> + </modules> + <repositories> <repository> <id>maven-root</id> @@ -587,36 +619,138 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> - <version>${yarn.version}</version> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> - <version>${yarn.version}</version> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> - <version>${yarn.version}</version> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-client</artifactId> - <version>${yarn.version}</version> - </dependency> - <!-- Specify Avro version because Kafka also has it as a dependency --> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>1.7.4</version> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <version>1.7.4</version> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> </dependency> </dependencies> </dependencyManagement> </profile> + <profile> + <id>repl-bin</id> + <activation> + <activeByDefault>false</activeByDefault> + </activation> + <modules> + <module>repl-bin</module> + </modules> + </profile> </profiles> </project> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 7a4d4c4575..b1b99b37c4 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -24,42 +24,57 @@ import AssemblyKeys._ //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { - // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or - // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - val HADOOP_VERSION = "1.0.4" - val HADOOP_MAJOR_VERSION = "1" - val HADOOP_YARN = false + // Hadoop version to build against. For example, "1.0.4" for Apache releases, or + // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set + // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN. + val DEFAULT_HADOOP_VERSION = "1.0.4" + val DEFAULT_YARN = false - // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2" - //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1" - //val HADOOP_MAJOR_VERSION = "2" - //val HADOOP_YARN = false + // HBase version; set as appropriate. + val HBASE_VERSION = "0.94.6" - // For Hadoop 2 YARN support - //val HADOOP_VERSION = "2.0.2-alpha" - //val HADOOP_MAJOR_VERSION = "2" - //val HADOOP_YARN = true - - lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib) + lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*) lazy val core = Project("core", file("core"), settings = coreSettings) - lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) + lazy val repl = Project("repl", file("repl"), settings = replSettings) + .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*) + + lazy val examples = Project("examples", file("examples"), settings = examplesSettings) + .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*) + + lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) - lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) dependsOn (streaming) + lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core) - lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core) + lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core) - lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core) + lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core) - lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core) + lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core) + + lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings) + .dependsOn(core, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) // A configuration to set an alternative publishLocalConfiguration lazy val MavenCompile = config("m2r") extend(Compile) lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") + // Allows build configuration to be set through environment variables + lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) + lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match { + case None => DEFAULT_YARN + case Some(v) => v.toBoolean + } + + // Conditionally include the yarn sub-project + lazy val maybeYarn = if(isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() + lazy val maybeYarnRef = if(isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]() + lazy val allProjects = Seq[ProjectReference]( + core, repl, examples, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef + def sharedSettings = Defaults.defaultSettings ++ Seq( - organization := "org.spark-project", + organization := "org.apache.spark", version := "0.8.0-SNAPSHOT", scalaVersion := "2.9.3", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), @@ -71,7 +86,7 @@ object SparkBuild extends Build { // Fork new JVMs for tests and set Java options for those fork := true, - javaOptions += "-Xmx2500m", + javaOptions += "-Xmx3g", // Only allow one test at a time, even across projects, since they run in the same JVM concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), @@ -88,17 +103,17 @@ object SparkBuild extends Build { //useGpg in Global := true, pomExtra := ( - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <licenses> <license> - <name>BSD License</name> - <url>https://github.com/mesos/spark/blob/master/LICENSE</url> + <name>Apache 2.0 License</name> + <url>http://www.apache.org/licenses/LICENSE-2.0.html</url> <distribution>repo</distribution> </license> </licenses> <scm> - <connection>scm:git:git@github.com:mesos/spark.git</connection> - <url>scm:git:git@github.com:mesos/spark.git</url> + <connection>scm:git:git@github.com:apache/incubator-spark.git</connection> + <url>scm:git:git@github.com:apache/incubator-spark.git</url> </scm> <developers> <developer> @@ -106,10 +121,14 @@ object SparkBuild extends Build { <name>Matei Zaharia</name> <email>matei.zaharia@gmail.com</email> <url>http://www.cs.berkeley.edu/~matei</url> - <organization>U.C. Berkeley Computer Science</organization> - <organizationUrl>http://www.cs.berkeley.edu/</organizationUrl> + <organization>Apache Software Foundation</organization> + <organizationUrl>http://spark.incubator.apache.org</organizationUrl> </developer> </developers> + <issueManagement> + <system>JIRA</system> + <url>https://spark-project.atlassian.net/browse/SPARK</url> + </issueManagement> ), /* @@ -124,7 +143,6 @@ object SparkBuild extends Build { */ libraryDependencies ++= Seq( - "io.netty" % "netty" % "3.5.3.Final", "org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106", "org.scalatest" %% "scalatest" % "1.9.1" % "test", "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", @@ -149,62 +167,45 @@ object SparkBuild extends Build { val excludeJackson = ExclusionRule(organization = "org.codehaus.jackson") val excludeNetty = ExclusionRule(organization = "org.jboss.netty") val excludeAsm = ExclusionRule(organization = "asm") + val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy") def coreSettings = sharedSettings ++ Seq( name := "spark-core", resolvers ++= Seq( "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", - "Spray Repository" at "http://repo.spray.cc/", "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/" ), libraryDependencies ++= Seq( "com.google.guava" % "guava" % "14.0.1", "com.google.code.findbugs" % "jsr305" % "1.3.9", - "log4j" % "log4j" % "1.2.16", + "log4j" % "log4j" % "1.2.17", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, - "commons-daemon" % "commons-daemon" % "1.0.10", + "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407 "com.ning" % "compress-lzf" % "0.8.4", + "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", "com.google.protobuf" % "protobuf-java" % "2.4.1", - "de.javakaffee" % "kryo-serializers" % "0.22", - "com.typesafe.akka" % "akka-actor" % "2.0.3" excludeAll(excludeNetty), - "com.typesafe.akka" % "akka-remote" % "2.0.3" excludeAll(excludeNetty), - "com.typesafe.akka" % "akka-slf4j" % "2.0.3" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-actor" % "2.0.5" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-remote" % "2.0.5" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-slf4j" % "2.0.5" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", "net.liftweb" % "lift-json_2.9.2" % "2.5", - "org.apache.mesos" % "mesos" % "0.9.0-incubating", + "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", - "org.apache.derby" % "derby" % "10.4.2.0" % "test" - ) ++ ( - if (HADOOP_MAJOR_VERSION == "2") { - if (HADOOP_YARN) { - Seq( - // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) - ) - } else { - Seq( - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) - ) - } - } else { - Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) - }), - unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / - ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { - "src/hadoop2-yarn/scala" - } else { - "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala" - } ) - } - ) ++ assemblySettings ++ extraAssemblySettings + "org.apache.derby" % "derby" % "10.4.2.0" % "test", + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.avro" % "avro" % "1.7.4", + "org.apache.avro" % "avro-ipc" % "1.7.4" excludeAll(excludeNetty), + "com.codahale.metrics" % "metrics-core" % "3.0.0", + "com.codahale.metrics" % "metrics-jvm" % "3.0.0", + "com.codahale.metrics" % "metrics-json" % "3.0.0", + "com.twitter" % "chill_2.9.3" % "0.3.1", + "com.twitter" % "chill-java" % "0.3.1" + ) + ) def rootSettings = sharedSettings ++ Seq( publish := {} @@ -213,14 +214,14 @@ object SparkBuild extends Build { def replSettings = sharedSettings ++ Seq( name := "spark-repl", libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _) - ) ++ assemblySettings ++ extraAssemblySettings + ) def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", libraryDependencies ++= Seq( "com.twitter" % "algebird-core_2.9.2" % "0.1.11", - "org.apache.hbase" % "hbase" % "0.94.6" excludeAll(excludeNetty, excludeAsm), + "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm), "org.apache.cassandra" % "cassandra-all" % "1.2.5" exclude("com.google.guava", "guava") @@ -230,10 +231,17 @@ object SparkBuild extends Build { exclude("jline","jline") exclude("log4j","log4j") exclude("org.apache.cassandra.deps", "avro") + excludeAll(excludeSnappy) ) + ) ++ assemblySettings ++ extraAssemblySettings + + def toolsSettings = sharedSettings ++ Seq( + name := "spark-tools" ) - def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel") + def bagelSettings = sharedSettings ++ Seq( + name := "spark-bagel" + ) def mllibSettings = sharedSettings ++ Seq( name := "spark-mllib", @@ -248,14 +256,38 @@ object SparkBuild extends Build { "Akka Repository" at "http://repo.akka.io/releases/" ), libraryDependencies ++= Seq( - "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty), + "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy), "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), - "com.typesafe.akka" % "akka-zeromq" % "2.0.3" excludeAll(excludeNetty) + "com.typesafe.akka" % "akka-zeromq" % "2.0.5" excludeAll(excludeNetty) ) + ) + + def yarnSettings = sharedSettings ++ Seq( + name := "spark-yarn" + ) ++ extraYarnSettings + + // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain + // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). + def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq() + + def yarnEnabledSettings = Seq( + libraryDependencies ++= Seq( + // Exclude rule required for all ? + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) + ) + ) + + def assemblyProjSettings = sharedSettings ++ Seq( + name := "spark-assembly", + jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" } ) ++ assemblySettings ++ extraAssemblySettings - def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( + def extraAssemblySettings() = Seq( + test in assembly := {}, mergeStrategy in assembly := { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard diff --git a/project/build.properties b/project/build.properties index 08e17131f6..9647277162 100644 --- a/project/build.properties +++ b/project/build.properties @@ -15,4 +15,4 @@ # limitations under the License. # -sbt.version=0.12.3 +sbt.version=0.12.4 diff --git a/project/plugins.sbt b/project/plugins.sbt index 1b0f879b94..cfcd85082a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,11 +4,11 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "Spray Repository" at "http://repo.spray.cc/" -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.5") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1") -addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.1") +addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") -addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0") +addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1") // For Sonatype publishing //resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns) @@ -23,11 +23,17 @@ FWDIR="$(cd `dirname $0`; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" +SCALA_VERSION=2.9.3 + # Exit if the user hasn't compiled Spark -if [ ! -e "$SPARK_HOME/repl/target" ]; then - echo "Failed to find Spark classes in $SPARK_HOME/repl/target" >&2 - echo "You need to compile Spark before running this program" >&2 - exit 1 +if [ ! -f "$FWDIR/RELEASE" ]; then + # Exit if the user hasn't compiled Spark + ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null + if [[ $? != 0 ]]; then + echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2 + echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 + exit 1 + fi fi # Load environment variables from conf/spark-env.sh, if it exists @@ -48,14 +54,13 @@ export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH export OLD_PYTHONSTARTUP=$PYTHONSTARTUP export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py -# Launch with `scala` by default: -if [[ "$SPARK_LAUNCH_WITH_SCALA" != "0" ]] ; then - export SPARK_LAUNCH_WITH_SCALA=1 +if [ -n "$IPYTHON_OPTS" ]; then + IPYTHON=1 fi if [[ "$IPYTHON" = "1" ]] ; then - export PYSPARK_PYTHON="ipython" - exec "$PYSPARK_PYTHON" -i -c "%run $PYTHONSTARTUP" + IPYTHON_OPTS=${IPYTHON_OPTS:--i} + exec ipython "$IPYTHON_OPTS" -c "%run $PYTHONSTARTUP" else - exec "$PYSPARK_PYTHON" "$@" + exec "$PYSPARK_PYTHON" "$@" fi diff --git a/python/examples/als.py b/python/examples/als.py index f2b2eee64c..a77dfb2577 100755 --- a/python/examples/als.py +++ b/python/examples/als.py @@ -48,8 +48,7 @@ def update(i, vec, mat, ratings): if __name__ == "__main__": if len(sys.argv) < 2: - print >> sys.stderr, \ - "Usage: PythonALS <master> <M> <U> <F> <iters> <slices>" + print >> sys.stderr, "Usage: als <master> <M> <U> <F> <iters> <slices>" exit(-1) sc = SparkContext(sys.argv[1], "PythonALS", pyFiles=[realpath(__file__)]) M = int(sys.argv[2]) if len(sys.argv) > 2 else 100 @@ -84,5 +83,5 @@ if __name__ == "__main__": usb = sc.broadcast(us) error = rmse(R, ms, us) - print "Iteration %d:" % i + print "Iteration %d:" % i print "\nRMSE: %5.4f\n" % error diff --git a/python/examples/kmeans.py b/python/examples/kmeans.py index c670556f2b..ba31af92fc 100644..100755 --- a/python/examples/kmeans.py +++ b/python/examples/kmeans.py @@ -41,8 +41,7 @@ def closestPoint(p, centers): if __name__ == "__main__": if len(sys.argv) < 5: - print >> sys.stderr, \ - "Usage: PythonKMeans <master> <file> <k> <convergeDist>" + print >> sys.stderr, "Usage: kmeans <master> <file> <k> <convergeDist>" exit(-1) sc = SparkContext(sys.argv[1], "PythonKMeans") lines = sc.textFile(sys.argv[2]) diff --git a/python/examples/logistic_regression.py b/python/examples/logistic_regression.py index 54d227d0d3..1117dea538 100755 --- a/python/examples/logistic_regression.py +++ b/python/examples/logistic_regression.py @@ -16,7 +16,8 @@ # """ -This example requires numpy (http://www.numpy.org/) +A logistic regression implementation that uses NumPy (http://www.numpy.org) to act on batches +of input data using efficient matrix operations. """ from collections import namedtuple from math import exp @@ -27,48 +28,45 @@ import numpy as np from pyspark import SparkContext -N = 100000 # Number of data points D = 10 # Number of dimensions -R = 0.7 # Scaling factor -ITERATIONS = 5 -np.random.seed(42) -DataPoint = namedtuple("DataPoint", ['x', 'y']) -from lr import DataPoint # So that DataPoint is properly serialized - - -def generateData(): - def generatePoint(i): - y = -1 if i % 2 == 0 else 1 - x = np.random.normal(size=D) + (y * R) - return DataPoint(x, y) - return [generatePoint(i) for i in range(N)] - +# Read a batch of points from the input file into a NumPy matrix object. We operate on batches to +# make further computations faster. +# The data file contains lines of the form <label> <x1> <x2> ... <xD>. We load each block of these +# into a NumPy array of size numLines * (D + 1) and pull out column 0 vs the others in gradient(). +def readPointBatch(iterator): + strs = list(iterator) + matrix = np.zeros((len(strs), D + 1)) + for i in xrange(len(strs)): + matrix[i] = np.fromstring(strs[i].replace(',', ' '), dtype=np.float32, sep=' ') + return [matrix] if __name__ == "__main__": - if len(sys.argv) == 1: - print >> sys.stderr, \ - "Usage: PythonLR <master> [<slices>]" + if len(sys.argv) != 4: + print >> sys.stderr, "Usage: logistic_regression <master> <file> <iters>" exit(-1) sc = SparkContext(sys.argv[1], "PythonLR", pyFiles=[realpath(__file__)]) - slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 - points = sc.parallelize(generateData(), slices).cache() + points = sc.textFile(sys.argv[2]).mapPartitions(readPointBatch).cache() + iterations = int(sys.argv[3]) # Initialize w to a random value w = 2 * np.random.ranf(size=D) - 1 print "Initial w: " + str(w) + # Compute logistic regression gradient for a matrix of data points + def gradient(matrix, w): + Y = matrix[:,0] # point labels (first column of input file) + X = matrix[:,1:] # point coordinates + # For each point (x, y), compute gradient function, then sum these up + return ((1.0 / (1.0 + np.exp(-Y * X.dot(w))) - 1.0) * Y * X.T).sum(1) + def add(x, y): x += y return x - for i in range(1, ITERATIONS + 1): - print "On iteration %i" % i - - gradient = points.map(lambda p: - (1.0 / (1.0 + exp(-p.y * np.dot(w, p.x)))) * p.y * p.x - ).reduce(add) - w -= gradient + for i in range(iterations): + print "On iteration %i" % (i + 1) + w -= points.map(lambda m: gradient(m, w)).reduce(add) print "Final w: " + str(w) diff --git a/python/examples/pagerank.py b/python/examples/pagerank.py new file mode 100755 index 0000000000..cd774cf3a3 --- /dev/null +++ b/python/examples/pagerank.py @@ -0,0 +1,70 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#!/usr/bin/env python + +import re, sys +from operator import add + +from pyspark import SparkContext + + +def computeContribs(urls, rank): + """Calculates URL contributions to the rank of other URLs.""" + num_urls = len(urls) + for url in urls: yield (url, rank / num_urls) + + +def parseNeighbors(urls): + """Parses a urls pair string into urls pair.""" + parts = re.split(r'\s+', urls) + return parts[0], parts[1] + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print >> sys.stderr, "Usage: pagerank <master> <file> <number_of_iterations>" + exit(-1) + + # Initialize the spark context. + sc = SparkContext(sys.argv[1], "PythonPageRank") + + # Loads in input file. It should be in format of: + # URL neighbor URL + # URL neighbor URL + # URL neighbor URL + # ... + lines = sc.textFile(sys.argv[2], 1) + + # Loads all URLs from input file and initialize their neighbors. + links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache() + + # Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. + ranks = links.map(lambda (url, neighbors): (url, 1.0)) + + # Calculates and updates URL ranks continuously using PageRank algorithm. + for iteration in xrange(int(sys.argv[3])): + # Calculates URL contributions to the rank of other URLs. + contribs = links.join(ranks).flatMap(lambda (url, (urls, rank)): + computeContribs(urls, rank)) + + # Re-calculates URL ranks based on neighbor contributions. + ranks = contribs.reduceByKey(add).mapValues(lambda rank: rank * 0.85 + 0.15) + + # Collects all URL ranks and dump them to console. + for (link, rank) in ranks.collect(): + print "%s has rank: %s." % (link, rank) diff --git a/python/examples/pi.py b/python/examples/pi.py index 33c026e824..ab0645fc2f 100644..100755 --- a/python/examples/pi.py +++ b/python/examples/pi.py @@ -24,8 +24,7 @@ from pyspark import SparkContext if __name__ == "__main__": if len(sys.argv) == 1: - print >> sys.stderr, \ - "Usage: PythonPi <master> [<slices>]" + print >> sys.stderr, "Usage: pi <master> [<slices>]" exit(-1) sc = SparkContext(sys.argv[1], "PythonPi") slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 diff --git a/python/examples/transitive_closure.py b/python/examples/transitive_closure.py index 40be3b5000..744cce6651 100644..100755 --- a/python/examples/transitive_closure.py +++ b/python/examples/transitive_closure.py @@ -37,10 +37,9 @@ def generateGraph(): if __name__ == "__main__": if len(sys.argv) == 1: - print >> sys.stderr, \ - "Usage: PythonTC <master> [<slices>]" + print >> sys.stderr, "Usage: transitive_closure <master> [<slices>]" exit(-1) - sc = SparkContext(sys.argv[1], "PythonTC") + sc = SparkContext(sys.argv[1], "PythonTransitiveClosure") slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 tc = sc.parallelize(generateGraph(), slices).cache() diff --git a/python/examples/wordcount.py b/python/examples/wordcount.py index 41c846ba79..b9139b9d76 100644..100755 --- a/python/examples/wordcount.py +++ b/python/examples/wordcount.py @@ -23,8 +23,7 @@ from pyspark import SparkContext if __name__ == "__main__": if len(sys.argv) < 3: - print >> sys.stderr, \ - "Usage: PythonWordCount <master> <file>" + print >> sys.stderr, "Usage: wordcount <master> <file>" exit(-1) sc = SparkContext(sys.argv[1], "PythonWordCount") lines = sc.textFile(sys.argv[2], 1) @@ -33,4 +32,4 @@ if __name__ == "__main__": .reduceByKey(add) output = counts.collect() for (word, count) in output: - print "%s : %i" % (word, count) + print "%s: %i" % (word, count) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 2f741cb345..8fbf296509 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -46,6 +46,7 @@ class SparkContext(object): _next_accum_id = 0 _active_spark_context = None _lock = Lock() + _python_includes = None # zip and egg files that need to be added to PYTHONPATH def __init__(self, master, jobName, sparkHome=None, pyFiles=None, environment=None, batchSize=1024): @@ -103,16 +104,19 @@ class SparkContext(object): # send. self._pickled_broadcast_vars = set() + SparkFiles._sc = self + root_dir = SparkFiles.getRootDirectory() + sys.path.append(root_dir) + # Deploy any code dependencies specified in the constructor + self._python_includes = list() for path in (pyFiles or []): self.addPyFile(path) - SparkFiles._sc = self - sys.path.append(SparkFiles.getRootDirectory()) # Create a temporary directory inside spark.local.dir: - local_dir = self._jvm.spark.Utils.getLocalDir() + local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir() self._temp_dir = \ - self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath() + self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath() @property def defaultParallelism(self): @@ -141,14 +145,21 @@ class SparkContext(object): def parallelize(self, c, numSlices=None): """ Distribute a local Python collection to form an RDD. + + >>> sc.parallelize(range(5), 5).glom().collect() + [[0], [1], [2], [3], [4]] """ numSlices = numSlices or self.defaultParallelism # Calling the Java parallelize() method with an ArrayList is too slow, # because it sends O(n) Py4J commands. As an alternative, serialized # objects are written to a file and loaded through textFile(). tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir) - if self.batchSize != 1: - c = batched(c, self.batchSize) + # Make sure we distribute data evenly if it's smaller than self.batchSize + if "__len__" not in dir(c): + c = list(c) # Make it a list so we can compute its length + batchSize = min(len(c) // numSlices, self.batchSize) + if batchSize > 1: + c = batched(c, batchSize) for x in c: write_with_length(dump_pickle(x), tempFile) tempFile.close() @@ -250,7 +261,11 @@ class SparkContext(object): HTTP, HTTPS or FTP URI. """ self.addFile(path) - filename = path.split("/")[-1] + (dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix + + if filename.endswith('.zip') or filename.endswith('.ZIP') or filename.endswith('.egg'): + self._python_includes.append(filename) + sys.path.append(os.path.join(SparkFiles.getRootDirectory(), filename)) # for tests in local mode def setCheckpointDir(self, dirName, useExisting=False): """ diff --git a/python/pyspark/files.py b/python/pyspark/files.py index 89bcbcfe06..57ee14eeb7 100644 --- a/python/pyspark/files.py +++ b/python/pyspark/files.py @@ -52,4 +52,4 @@ class SparkFiles(object): return cls._root_directory else: # This will have to change if we support multiple SparkContexts: - return cls._sc._jvm.spark.SparkFiles.getRootDirectory() + return cls._sc._jvm.org.apache.spark.SparkFiles.getRootDirectory() diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index e503fb7621..26fbe0f080 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -17,6 +17,7 @@ import os import sys +import signal from subprocess import Popen, PIPE from threading import Thread from py4j.java_gateway import java_import, JavaGateway, GatewayClient @@ -28,9 +29,12 @@ SPARK_HOME = os.environ["SPARK_HOME"] def launch_gateway(): # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and SPARK_MEM settings from spark-env.sh - command = [os.path.join(SPARK_HOME, "run"), "py4j.GatewayServer", + command = [os.path.join(SPARK_HOME, "spark-class"), "py4j.GatewayServer", "--die-on-broken-pipe", "0"] - proc = Popen(command, stdout=PIPE, stdin=PIPE) + # Don't send ctrl-c / SIGINT to the Java gateway: + def preexec_function(): + signal.signal(signal.SIGINT, signal.SIG_IGN) + proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_function) # Determine which ephemeral port the server started on: port = int(proc.stdout.readline()) # Create a thread to echo output from the GatewayServer, which is required @@ -49,7 +53,7 @@ def launch_gateway(): # Connect to the gateway gateway = JavaGateway(GatewayClient(port=port), auto_convert=False) # Import the classes used by PySpark - java_import(gateway.jvm, "spark.api.java.*") - java_import(gateway.jvm, "spark.api.python.*") + java_import(gateway.jvm, "org.apache.spark.api.java.*") + java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "scala.Tuple2") return gateway diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index c6a6b24c5a..914118ccdd 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -21,6 +21,7 @@ from collections import defaultdict from itertools import chain, ifilter, imap, product import operator import os +import sys import shlex from subprocess import Popen, PIPE from tempfile import NamedTemporaryFile @@ -31,6 +32,8 @@ from pyspark.serializers import batched, Batch, dump_pickle, load_pickle, \ read_from_pickle_file from pyspark.join import python_join, python_left_outer_join, \ python_right_outer_join, python_cogroup +from pyspark.statcounter import StatCounter +from pyspark.rddsampler import RDDSampler from py4j.java_collections import ListConverter, MapConverter @@ -160,18 +163,64 @@ class RDD(object): >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect()) [1, 2, 3] """ - return self.map(lambda x: (x, "")) \ + return self.map(lambda x: (x, None)) \ .reduceByKey(lambda x, _: x) \ .map(lambda (x, _): x) - # TODO: sampling needs to be re-implemented due to Batch - #def sample(self, withReplacement, fraction, seed): - # jrdd = self._jrdd.sample(withReplacement, fraction, seed) - # return RDD(jrdd, self.ctx) + def sample(self, withReplacement, fraction, seed): + """ + Return a sampled subset of this RDD (relies on numpy and falls back + on default random generator if numpy is unavailable). + + >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP + [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98] + """ + return self.mapPartitionsWithSplit(RDDSampler(withReplacement, fraction, seed).func, True) + + # this is ported from scala/spark/RDD.scala + def takeSample(self, withReplacement, num, seed): + """ + Return a fixed-size sampled subset of this RDD (currently requires numpy). + + >>> sc.parallelize(range(0, 10)).takeSample(True, 10, 1) #doctest: +SKIP + [4, 2, 1, 8, 2, 7, 0, 4, 1, 4] + """ + + fraction = 0.0 + total = 0 + multiplier = 3.0 + initialCount = self.count() + maxSelected = 0 + + if (num < 0): + raise ValueError - #def takeSample(self, withReplacement, num, seed): - # vals = self._jrdd.takeSample(withReplacement, num, seed) - # return [load_pickle(bytes(x)) for x in vals] + if initialCount > sys.maxint - 1: + maxSelected = sys.maxint - 1 + else: + maxSelected = initialCount + + if num > initialCount and not withReplacement: + total = maxSelected + fraction = multiplier * (maxSelected + 1) / initialCount + else: + fraction = multiplier * (num + 1) / initialCount + total = num + + samples = self.sample(withReplacement, fraction, seed).collect() + + # If the first sample didn't turn out large enough, keep trying to take samples; + # this shouldn't happen often because we use a big multiplier for their initial size. + # See: scala/spark/RDD.scala + while len(samples) < total: + if seed > sys.maxint - 2: + seed = -1 + seed += 1 + samples = self.sample(withReplacement, fraction, seed).collect() + + sampler = RDDSampler(withReplacement, fraction, seed+1) + sampler.shuffle(samples) + return samples[0:total] def union(self, other): """ @@ -267,7 +316,11 @@ class RDD(object): >>> def f(x): print x >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f) """ - self.map(f).collect() # Force evaluation + def processPartition(iterator): + for x in iterator: + f(x) + yield None + self.mapPartitions(processPartition).collect() # Force evaluation def collect(self): """ @@ -353,6 +406,63 @@ class RDD(object): 3 """ return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum() + + def stats(self): + """ + Return a L{StatCounter} object that captures the mean, variance + and count of the RDD's elements in one operation. + """ + def redFunc(left_counter, right_counter): + return left_counter.mergeStats(right_counter) + + return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc) + + def mean(self): + """ + Compute the mean of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).mean() + 2.0 + """ + return self.stats().mean() + + def variance(self): + """ + Compute the variance of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).variance() + 0.666... + """ + return self.stats().variance() + + def stdev(self): + """ + Compute the standard deviation of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).stdev() + 0.816... + """ + return self.stats().stdev() + + def sampleStdev(self): + """ + Compute the sample standard deviation of this RDD's elements (which corrects for bias in + estimating the standard deviation by dividing by N-1 instead of N). + + >>> sc.parallelize([1, 2, 3]).sampleStdev() + 1.0 + """ + return self.stats().sampleStdev() + + def sampleVariance(self): + """ + Compute the sample variance of this RDD's elements (which corrects for bias in + estimating the variance by dividing by N-1 instead of N). + + >>> sc.parallelize([1, 2, 3]).sampleVariance() + 1.0 + """ + return self.stats().sampleVariance() def countByValue(self): """ @@ -386,13 +496,16 @@ class RDD(object): >>> sc.parallelize([2, 3, 4, 5, 6]).take(10) [2, 3, 4, 5, 6] """ + def takeUpToNum(iterator): + taken = 0 + while taken < num: + yield next(iterator) + taken += 1 + # Take only up to num elements from each partition we try + mapped = self.mapPartitions(takeUpToNum) items = [] - for partition in range(self._jrdd.splits().size()): - iterator = self.ctx._takePartition(self._jrdd.rdd(), partition) - # Each item in the iterator is a string, Python object, batch of - # Python objects. Regardless, it is sufficient to take `num` - # of these objects in order to collect `num` Python objects: - iterator = iterator.take(num) + for partition in range(mapped._jrdd.splits().size()): + iterator = self.ctx._takePartition(mapped._jrdd.rdd(), partition) items.extend(self._collect_iterator_through_file(iterator)) if len(items) >= num: break @@ -689,6 +802,43 @@ class RDD(object): """ return python_cogroup(self, other, numPartitions) + def subtractByKey(self, other, numPartitions=None): + """ + Return each (key, value) pair in C{self} that has no pair with matching key + in C{other}. + + >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)]) + >>> y = sc.parallelize([("a", 3), ("c", None)]) + >>> sorted(x.subtractByKey(y).collect()) + [('b', 4), ('b', 5)] + """ + filter_func = lambda tpl: len(tpl[1][0]) > 0 and len(tpl[1][1]) == 0 + map_func = lambda tpl: [(tpl[0], val) for val in tpl[1][0]] + return self.cogroup(other, numPartitions).filter(filter_func).flatMap(map_func) + + def subtract(self, other, numPartitions=None): + """ + Return each value in C{self} that is not contained in C{other}. + + >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)]) + >>> y = sc.parallelize([("a", 3), ("c", None)]) + >>> sorted(x.subtract(y).collect()) + [('a', 1), ('b', 4), ('b', 5)] + """ + rdd = other.map(lambda x: (x, True)) # note: here 'True' is just a placeholder + return self.map(lambda x: (x, True)).subtractByKey(rdd).map(lambda tpl: tpl[0]) # note: here 'True' is just a placeholder + + def keyBy(self, f): + """ + Creates tuples of the elements in this RDD by applying C{f}. + + >>> x = sc.parallelize(range(0,3)).keyBy(lambda x: x*x) + >>> y = sc.parallelize(zip(range(0,5), range(0,5))) + >>> sorted(x.cogroup(y).collect()) + [(0, ([0], [0])), (1, ([1], [1])), (2, ([], [2])), (3, ([], [3])), (4, ([2], [4]))] + """ + return self.map(lambda x: (f(x), x)) + # TODO: `lookup` is disabled because we can't make direct comparisons based # on the key; we need to compare the hash of the key to the hash of the # keys in the pairs. This could be an expensive operation, since those @@ -749,11 +899,12 @@ class PipelinedRDD(RDD): self.ctx._gateway._gateway_client) self.ctx._pickled_broadcast_vars.clear() class_manifest = self._prev_jrdd.classManifest() - env = copy.copy(self.ctx.environment) - env['PYTHONPATH'] = os.environ.get("PYTHONPATH", "") - env = MapConverter().convert(env, self.ctx._gateway._gateway_client) + env = MapConverter().convert(self.ctx.environment, + self.ctx._gateway._gateway_client) + includes = ListConverter().convert(self.ctx._python_includes, + self.ctx._gateway._gateway_client) python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), - pipe_command, env, self.preservesPartitioning, self.ctx.pythonExec, + pipe_command, env, includes, self.preservesPartitioning, self.ctx.pythonExec, broadcast_vars, self.ctx._javaAccumulator, class_manifest) self._jrdd_val = python_rdd.asJavaRDD() return self._jrdd_val @@ -769,7 +920,7 @@ def _test(): # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs) + (failure_count, test_count) = doctest.testmod(globs=globs,optionflags=doctest.ELLIPSIS) globs['sc'].stop() if failure_count: exit(-1) diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py new file mode 100644 index 0000000000..aca2ef3b51 --- /dev/null +++ b/python/pyspark/rddsampler.py @@ -0,0 +1,112 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import random + +class RDDSampler(object): + def __init__(self, withReplacement, fraction, seed): + try: + import numpy + self._use_numpy = True + except ImportError: + print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling." + self._use_numpy = False + + self._seed = seed + self._withReplacement = withReplacement + self._fraction = fraction + self._random = None + self._split = None + self._rand_initialized = False + + def initRandomGenerator(self, split): + if self._use_numpy: + import numpy + self._random = numpy.random.RandomState(self._seed) + for _ in range(0, split): + # discard the next few values in the sequence to have a + # different seed for the different splits + self._random.randint(sys.maxint) + else: + import random + random.seed(self._seed) + for _ in range(0, split): + # discard the next few values in the sequence to have a + # different seed for the different splits + random.randint(0, sys.maxint) + self._split = split + self._rand_initialized = True + + def getUniformSample(self, split): + if not self._rand_initialized or split != self._split: + self.initRandomGenerator(split) + + if self._use_numpy: + return self._random.random_sample() + else: + return random.uniform(0.0, 1.0) + + def getPoissonSample(self, split, mean): + if not self._rand_initialized or split != self._split: + self.initRandomGenerator(split) + + if self._use_numpy: + return self._random.poisson(mean) + else: + # here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by + # drawing a sequence of numbers delta_j ~ Exp(mean) + num_arrivals = 1 + cur_time = 0.0 + + cur_time += random.expovariate(mean) + + if cur_time > 1.0: + return 0 + + while(cur_time <= 1.0): + cur_time += random.expovariate(mean) + num_arrivals += 1 + + return (num_arrivals - 1) + + def shuffle(self, vals): + if self._random == None or split != self._split: + self.initRandomGenerator(0) # this should only ever called on the master so + # the split does not matter + + if self._use_numpy: + self._random.shuffle(vals) + else: + random.shuffle(vals, self._random) + + def func(self, split, iterator): + if self._withReplacement: + for obj in iterator: + # For large datasets, the expected number of occurrences of each element in a sample with + # replacement is Poisson(frac). We use that to get a count for each element. + count = self.getPoissonSample(split, mean = self._fraction) + for _ in range(0, count): + yield obj + else: + for obj in iterator: + if self.getUniformSample(split) <= self._fraction: + yield obj + + + + diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index cc8cd9e3c4..54823f8037 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -21,13 +21,31 @@ An interactive shell. This file is designed to be launched as a PYTHONSTARTUP script. """ import os +import platform import pyspark from pyspark.context import SparkContext +# this is the equivalent of ADD_JARS +add_files = os.environ.get("ADD_FILES").split(',') if os.environ.get("ADD_FILES") != None else None -sc = SparkContext(os.environ.get("MASTER", "local"), "PySparkShell") +sc = SparkContext(os.environ.get("MASTER", "local"), "PySparkShell", pyFiles=add_files) + +print """Welcome to + ____ __ + / __/__ ___ _____/ /__ + _\ \/ _ \/ _ `/ __/ '_/ + /__ / .__/\_,_/_/ /_/\_\ version 0.8.0 + /_/ +""" +print "Using Python version %s (%s, %s)" % ( + platform.python_version(), + platform.python_build()[0], + platform.python_build()[1]) print "Spark context avaiable as sc." +if add_files != None: + print "Adding files: [%s]" % ", ".join(add_files) + # The ./pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP, # which allows us to execute the user's PYTHONSTARTUP file: _pythonstartup = os.environ.get('OLD_PYTHONSTARTUP') diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py new file mode 100644 index 0000000000..8e1cbd4ad9 --- /dev/null +++ b/python/pyspark/statcounter.py @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is ported from spark/util/StatCounter.scala + +import copy +import math + +class StatCounter(object): + + def __init__(self, values=[]): + self.n = 0L # Running count of our values + self.mu = 0.0 # Running mean of our values + self.m2 = 0.0 # Running variance numerator (sum of (x - mean)^2) + + for v in values: + self.merge(v) + + # Add a value into this StatCounter, updating the internal statistics. + def merge(self, value): + delta = value - self.mu + self.n += 1 + self.mu += delta / self.n + self.m2 += delta * (value - self.mu) + return self + + # Merge another StatCounter into this one, adding up the internal statistics. + def mergeStats(self, other): + if not isinstance(other, StatCounter): + raise Exception("Can only merge Statcounters!") + + if other is self: # reference equality holds + self.merge(copy.deepcopy(other)) # Avoid overwriting fields in a weird order + else: + if self.n == 0: + self.mu = other.mu + self.m2 = other.m2 + self.n = other.n + elif other.n != 0: + delta = other.mu - self.mu + if other.n * 10 < self.n: + self.mu = self.mu + (delta * other.n) / (self.n + other.n) + elif self.n * 10 < other.n: + self.mu = other.mu - (delta * self.n) / (self.n + other.n) + else: + self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n) + + self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n) + self.n += other.n + return self + + # Clone this StatCounter + def copy(self): + return copy.deepcopy(self) + + def count(self): + return self.n + + def mean(self): + return self.mu + + def sum(self): + return self.n * self.mu + + # Return the variance of the values. + def variance(self): + if self.n == 0: + return float('nan') + else: + return self.m2 / self.n + + # + # Return the sample variance, which corrects for bias in estimating the variance by dividing + # by N-1 instead of N. + # + def sampleVariance(self): + if self.n <= 1: + return float('nan') + else: + return self.m2 / (self.n - 1) + + # Return the standard deviation of the values. + def stdev(self): + return math.sqrt(self.variance()) + + # + # Return the sample standard deviation of the values, which corrects for bias in estimating the + # variance by dividing by N-1 instead of N. + # + def sampleStdev(self): + return math.sqrt(self.sampleVariance()) + + def __repr__(self): + return "(count: %s, mean: %s, stdev: %s)" % (self.count(), self.mean(), self.stdev()) + diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index dfd841b10a..29d6a128f6 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -64,7 +64,7 @@ class TestCheckpoint(PySparkTestCase): flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1)) self.assertFalse(flatMappedRDD.isCheckpointed()) - self.assertIsNone(flatMappedRDD.getCheckpointFile()) + self.assertTrue(flatMappedRDD.getCheckpointFile() is None) flatMappedRDD.checkpoint() result = flatMappedRDD.collect() @@ -79,13 +79,13 @@ class TestCheckpoint(PySparkTestCase): flatMappedRDD = parCollection.flatMap(lambda x: [x]) self.assertFalse(flatMappedRDD.isCheckpointed()) - self.assertIsNone(flatMappedRDD.getCheckpointFile()) + self.assertTrue(flatMappedRDD.getCheckpointFile() is None) flatMappedRDD.checkpoint() flatMappedRDD.count() # forces a checkpoint to be computed time.sleep(1) # 1 second - self.assertIsNotNone(flatMappedRDD.getCheckpointFile()) + self.assertTrue(flatMappedRDD.getCheckpointFile() is not None) recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile()) self.assertEquals([1, 2, 3, 4], recovered.collect()) @@ -125,6 +125,17 @@ class TestAddFile(PySparkTestCase): from userlibrary import UserClass self.assertEqual("Hello World!", UserClass().hello()) + def test_add_egg_file_locally(self): + # To ensure that we're actually testing addPyFile's effects, check that + # this fails due to `userlibrary` not being on the Python path: + def func(): + from userlib import UserClass + self.assertRaises(ImportError, func) + path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1-py2.7.egg") + self.sc.addPyFile(path) + from userlib import UserClass + self.assertEqual("Hello World from inside a package!", UserClass().hello()) + class TestIO(PySparkTestCase): @@ -164,9 +175,12 @@ class TestDaemon(unittest.TestCase): time.sleep(1) # daemon should no longer accept connections - with self.assertRaises(EnvironmentError) as trap: + try: self.connect(port) - self.assertEqual(trap.exception.errno, ECONNREFUSED) + except EnvironmentError as exception: + self.assertEqual(exception.errno, ECONNREFUSED) + else: + self.fail("Expected EnvironmentError to be raised") def test_termination_stdin(self): """Ensure that daemon and workers terminate when stdin is closed.""" diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 75d692beeb..695f6dfb84 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -49,15 +49,26 @@ def main(infile, outfile): split_index = read_int(infile) if split_index == -1: # for unit tests return + + # fetch name of workdir spark_files_dir = load_pickle(read_with_length(infile)) SparkFiles._root_directory = spark_files_dir SparkFiles._is_running_on_worker = True - sys.path.append(spark_files_dir) + + # fetch names and values of broadcast variables num_broadcast_variables = read_int(infile) for _ in range(num_broadcast_variables): bid = read_long(infile) value = read_with_length(infile) _broadcastRegistry[bid] = Broadcast(bid, load_pickle(value)) + + # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH + sys.path.append(spark_files_dir) # *.py files that were added will be copied here + num_python_includes = read_int(infile) + for _ in range(num_python_includes): + sys.path.append(os.path.join(spark_files_dir, load_pickle(read_with_length(infile)))) + + # now load function func = load_obj(infile) bypassSerializer = load_obj(infile) if bypassSerializer: diff --git a/python/run-tests b/python/run-tests index 1ee947d414..cbc554ea9d 100755 --- a/python/run-tests +++ b/python/run-tests @@ -21,22 +21,23 @@ # Figure out where the Spark framework is installed FWDIR="$(cd `dirname $0`; cd ../; pwd)" -FAILED=0 - -$FWDIR/pyspark pyspark/rdd.py -FAILED=$(($?||$FAILED)) +# CD into the python directory to find things on the right path +cd "$FWDIR/python" -$FWDIR/pyspark pyspark/context.py -FAILED=$(($?||$FAILED)) +FAILED=0 -$FWDIR/pyspark -m doctest pyspark/broadcast.py -FAILED=$(($?||$FAILED)) +rm -f unit-tests.log -$FWDIR/pyspark -m doctest pyspark/accumulators.py -FAILED=$(($?||$FAILED)) +function run_test() { + $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log + FAILED=$((PIPESTATUS[0]||$FAILED)) +} -$FWDIR/pyspark -m unittest pyspark.tests -FAILED=$(($?||$FAILED)) +run_test "pyspark/rdd.py" +run_test "pyspark/context.py" +run_test "-m doctest pyspark/broadcast.py" +run_test "-m doctest pyspark/accumulators.py" +run_test "pyspark/tests.py" if [[ $FAILED != 0 ]]; then echo -en "\033[31m" # Red diff --git a/python/test_support/userlib-0.1-py2.7.egg b/python/test_support/userlib-0.1-py2.7.egg Binary files differnew file mode 100644 index 0000000000..1674c9cb22 --- /dev/null +++ b/python/test_support/userlib-0.1-py2.7.egg diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index 7c4e722cc1..d61b36a61a 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -19,24 +19,44 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-repl-bin</artifactId> <packaging>pom</packaging> <name>Spark Project REPL binary packaging</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <properties> - <deb.pkg.name>spark-${classifier}</deb.pkg.name> - <deb.install.path>/usr/share/spark-${classifier}</deb.install.path> + <deb.pkg.name>spark</deb.pkg.name> + <deb.install.path>/usr/share/spark</deb.install.path> <deb.user>root</deb.user> </properties> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-bagel</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-repl</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + </dependencies> + <build> <plugins> <plugin> @@ -44,7 +64,7 @@ <artifactId>maven-shade-plugin</artifactId> <configuration> <shadedArtifactAttached>false</shadedArtifactAttached> - <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar</outputFile> + <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile> <artifactSet> <includes> <include>*:*</include> @@ -86,142 +106,12 @@ <profiles> <profile> - <id>hadoop1</id> - <properties> - <classifier>hadoop1</classifier> - </properties> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-repl</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>runtime</scope> - </dependency> - </dependencies> - </profile> - <profile> - <id>hadoop2</id> - <properties> - <classifier>hadoop2</classifier> - </properties> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-repl</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>runtime</scope> - </dependency> - </dependencies> - </profile> - <profile> <id>hadoop2-yarn</id> - <properties> - <classifier>hadoop2-yarn</classifier> - </properties> <dependencies> <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn</artifactId> <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-repl</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <scope>runtime</scope> </dependency> </dependencies> </profile> @@ -261,7 +151,7 @@ <compression>gzip</compression> <dataSet> <data> - <src>${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar</src> + <src>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</src> <type>file</type> <mapper> <type>perm</type> diff --git a/repl-bin/src/deb/bin/run b/repl-bin/src/deb/bin/run index 0e5100e00b..8b5d8300f2 100755 --- a/repl-bin/src/deb/bin/run +++ b/repl-bin/src/deb/bin/run @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/repl-bin/src/deb/bin/spark-executor b/repl-bin/src/deb/bin/spark-executor index d3c1c23413..bcfae22677 100755 --- a/repl-bin/src/deb/bin/spark-executor +++ b/repl-bin/src/deb/bin/spark-executor @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/repl-bin/src/deb/bin/spark-shell b/repl-bin/src/deb/bin/spark-shell index 8b258543db..ec7e33e1e3 100755 --- a/repl-bin/src/deb/bin/spark-shell +++ b/repl-bin/src/deb/bin/spark-shell @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/repl/pom.xml b/repl/pom.xml index 7d8da03254..a1c87d7618 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -19,17 +19,17 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-repl</artifactId> <packaging>jar</packaging> <name>Spark Project REPL</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <properties> <deb.install.path>/usr/share/spark</deb.install.path> @@ -38,6 +38,23 @@ <dependencies> <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-bagel</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> </dependency> @@ -57,7 +74,6 @@ <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </dependency> - <dependency> <groupId>org.scalatest</groupId> <artifactId>scalatest_${scala.version}</artifactId> @@ -74,192 +90,57 @@ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> <plugins> <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <phase>test</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <exportAntProperties>true</exportAntProperties> + <tasks> + <property name="spark.classpath" refid="maven.test.classpath"/> + <property environment="env"/> + <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry."> + <condition> + <not> + <or> + <isset property="env.SCALA_HOME"/> + <isset property="env.SCALA_LIBRARY_PATH"/> + </or> + </not> + </condition> + </fail> + </tasks> + </configuration> + </execution> + </executions> + </plugin> + <plugin> <groupId>org.scalatest</groupId> <artifactId>scalatest-maven-plugin</artifactId> <configuration> <environmentVariables> <SPARK_HOME>${basedir}/..</SPARK_HOME> <SPARK_TESTING>1</SPARK_TESTING> + <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH> </environmentVariables> </configuration> </plugin> </plugins> </build> - <profiles> <profile> - <id>hadoop1</id> - <properties> - <classifier>hadoop1</classifier> - </properties> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop1</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2</id> - <properties> - <classifier>hadoop2</classifier> - </properties> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> <id>hadoop2-yarn</id> - <properties> - <classifier>hadoop2-yarn</classifier> - </properties> <dependencies> <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-bagel</artifactId> + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn</artifactId> <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-examples</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-streaming</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - <scope>runtime</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <scope>provided</scope> </dependency> </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2-yarn</classifier> - </configuration> - </plugin> - </plugins> - </build> </profile> </profiles> </project> diff --git a/repl/src/main/scala/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala index 274bc585db..3e171849e3 100644 --- a/repl/src/main/scala/spark/repl/ExecutorClassLoader.scala +++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.repl +package org.apache.spark.repl import java.io.{ByteArrayOutputStream, InputStream} import java.net.{URI, URL, URLClassLoader, URLEncoder} diff --git a/repl/src/main/scala/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala index d824d62fd1..17e149f8ab 100644 --- a/repl/src/main/scala/spark/repl/Main.scala +++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.repl +package org.apache.spark.repl import scala.collection.mutable.Set diff --git a/repl/src/main/scala/spark/repl/SparkHelper.scala b/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala index d8fb7191b4..d8fb7191b4 100644 --- a/repl/src/main/scala/spark/repl/SparkHelper.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala diff --git a/repl/src/main/scala/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 59f9d05683..193ccb48ee 100644 --- a/repl/src/main/scala/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -3,7 +3,7 @@ * @author Alexander Spoon */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ @@ -22,8 +22,8 @@ import util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream } import interpreter._ import io.{ File, Sources } -import spark.Logging -import spark.SparkContext +import org.apache.spark.Logging +import org.apache.spark.SparkContext /** The Scala interactive shell. It provides a read-eval-print loop * around the Interpreter class. @@ -816,13 +816,13 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master: def initializeSpark() { intp.beQuietDuring { command(""" - spark.repl.Main.interp.out.println("Creating SparkContext..."); - spark.repl.Main.interp.out.flush(); - @transient val sc = spark.repl.Main.interp.createSparkContext(); - spark.repl.Main.interp.out.println("Spark context available as sc."); - spark.repl.Main.interp.out.flush(); + org.apache.spark.repl.Main.interp.out.println("Creating SparkContext..."); + org.apache.spark.repl.Main.interp.out.flush(); + @transient val sc = org.apache.spark.repl.Main.interp.createSparkContext(); + org.apache.spark.repl.Main.interp.out.println("Spark context available as sc."); + org.apache.spark.repl.Main.interp.out.flush(); """) - command("import spark.SparkContext._") + command("import org.apache.spark.SparkContext._") } echo("Type in expressions to have them evaluated.") echo("Type :help for more information.") @@ -831,6 +831,10 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master: var sparkContext: SparkContext = null def createSparkContext(): SparkContext = { + val uri = System.getenv("SPARK_EXECUTOR_URI") + if (uri != null) { + System.setProperty("spark.executor.uri", uri) + } val master = this.master match { case Some(m) => m case None => { diff --git a/repl/src/main/scala/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala index 43b6a6c950..e6e35c9b5d 100644 --- a/repl/src/main/scala/spark/repl/SparkIMain.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala @@ -3,7 +3,7 @@ * @author Martin Odersky */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ @@ -27,9 +27,9 @@ import scala.util.control.Exception.{ ultimately } import scala.reflect.NameTransformer import SparkIMain._ -import spark.HttpServer -import spark.Utils -import spark.SparkEnv +import org.apache.spark.HttpServer +import org.apache.spark.util.Utils +import org.apache.spark.SparkEnv /** An interpreter for Scala code. * @@ -883,7 +883,7 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends val execution = lineManager.set(originalLine) { // MATEI: set the right SparkEnv for our SparkContext, because // this execution will happen in a separate thread - val sc = spark.repl.Main.interp.sparkContext + val sc = org.apache.spark.repl.Main.interp.sparkContext if (sc != null && sc.env != null) SparkEnv.set(sc.env) // Execute the line diff --git a/repl/src/main/scala/spark/repl/SparkISettings.scala b/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala index 8ebb01d146..605b7b259b 100644 --- a/repl/src/main/scala/spark/repl/SparkISettings.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala @@ -3,7 +3,7 @@ * @author Alexander Spoon */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ diff --git a/repl/src/main/scala/spark/repl/SparkImports.scala b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala index 5caf5ca51a..41a1731d60 100644 --- a/repl/src/main/scala/spark/repl/SparkImports.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala @@ -3,7 +3,7 @@ * @author Paul Phillips */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ diff --git a/repl/src/main/scala/spark/repl/SparkJLineCompletion.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala index 0069d8b2f4..fdc172d753 100644 --- a/repl/src/main/scala/spark/repl/SparkJLineCompletion.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala @@ -3,7 +3,7 @@ * @author Paul Phillips */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ diff --git a/repl/src/main/scala/spark/repl/SparkJLineReader.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala index ef6b6e092e..d9e1de105c 100644 --- a/repl/src/main/scala/spark/repl/SparkJLineReader.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala @@ -3,7 +3,7 @@ * @author Stepan Koltsov */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ diff --git a/repl/src/main/scala/spark/repl/SparkMemberHandlers.scala b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala index 2980dfcd76..a3409bf665 100644 --- a/repl/src/main/scala/spark/repl/SparkMemberHandlers.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala @@ -3,7 +3,7 @@ * @author Martin Odersky */ -package spark.repl +package org.apache.spark.repl import scala.tools.nsc._ import scala.tools.nsc.interpreter._ diff --git a/repl/src/test/scala/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 80ae605558..8f9b632c0e 100644 --- a/repl/src/test/scala/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.repl +package org.apache.spark.repl import java.io._ import java.net.URLClassLoader @@ -41,10 +41,10 @@ class ReplSuite extends FunSuite { } } val interp = new SparkILoop(in, new PrintWriter(out), master) - spark.repl.Main.interp = interp + org.apache.spark.repl.Main.interp = interp val separator = System.getProperty("path.separator") interp.process(Array("-classpath", paths.mkString(separator))) - spark.repl.Main.interp = null + org.apache.spark.repl.Main.interp = null if (interp.sparkContext != null) { interp.sparkContext.stop() } diff --git a/run-example b/run-example new file mode 100755 index 0000000000..24d83ba5cf --- /dev/null +++ b/run-example @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCALA_VERSION=2.9.3 + +# Figure out where the Scala framework is installed +FWDIR="$(cd `dirname $0`; pwd)" + +# Export this as SPARK_HOME +export SPARK_HOME="$FWDIR" + +# Load environment variables from conf/spark-env.sh, if it exists +if [ -e $FWDIR/conf/spark-env.sh ] ; then + . $FWDIR/conf/spark-env.sh +fi + +if [ -z "$1" ]; then + echo "Usage: run-example <example-class> [<args>]" >&2 + exit 1 +fi + +# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack +# to avoid the -sources and -doc packages that are built by publish-local. +EXAMPLES_DIR="$FWDIR"/examples +SPARK_EXAMPLES_JAR="" +if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9T].jar ]; then + # Use the JAR from the SBT build + export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9T].jar` +fi +if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9T].jar ]; then + # Use the JAR from the Maven build + # TODO: this also needs to become an assembly! + export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/spark-examples*[0-9T].jar` +fi +if [[ -z $SPARK_EXAMPLES_JAR ]]; then + echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2 + echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 + exit 1 +fi + +# Since the examples JAR ideally shouldn't include spark-core (that dependency should be +# "provided"), also add our standard Spark classpath, built using compute-classpath.sh. +CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH" + +# Find java binary +if [ -n "${JAVA_HOME}" ]; then + RUNNER="${JAVA_HOME}/bin/java" +else + if [ `command -v java` ]; then + RUNNER="java" + else + echo "JAVA_HOME is not set" >&2 + exit 1 + fi +fi + +if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + echo -n "Spark Command: " + echo "$RUNNER" -cp "$CLASSPATH" "$@" + echo "========================================" + echo +fi + +exec "$RUNNER" -cp "$CLASSPATH" "$@" @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -22,7 +22,7 @@ if [ "$MESOS_HOME" != "" ]; then EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java" fi -export SPARK_HOME=$(cd "$(dirname $0)/.."; pwd) +export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" +java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS $SBT_OPTS -jar "$SPARK_HOME"/sbt/sbt-launch-*.jar "$@" @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,6 +17,8 @@ # limitations under the License. # +SCALA_VERSION=2.9.3 + # Figure out where the Scala framework is installed FWDIR="$(cd `dirname $0`; pwd)" @@ -64,48 +66,20 @@ case "$1" in ;; esac -# Figure out whether to run our class with java or with the scala launcher. -# In most cases, we'd prefer to execute our process with java because scala -# creates a shell script as the parent of its Java process, which makes it -# hard to kill the child with stuff like Process.destroy(). However, for -# the Spark shell, the wrapper is necessary to properly reset the terminal -# when we exit, so we allow it to set a variable to launch with scala. -if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then - if [ "$SCALA_HOME" ]; then - RUNNER="${SCALA_HOME}/bin/scala" - else - if [ `command -v scala` ]; then - RUNNER="scala" - else - echo "SCALA_HOME is not set and scala is not in PATH" >&2 - exit 1 - fi - fi +# Find the java binary +if [ -n "${JAVA_HOME}" ]; then + RUNNER="${JAVA_HOME}/bin/java" else - if [ -n "${JAVA_HOME}" ]; then - RUNNER="${JAVA_HOME}/bin/java" + if [ `command -v java` ]; then + RUNNER="java" else - if [ `command -v java` ]; then - RUNNER="java" - else - echo "JAVA_HOME is not set" >&2 - exit 1 - fi - fi - if [[ ! -f "$FWDIR/RELEASE" && -z "$SCALA_LIBRARY_PATH" ]]; then - if [ -z "$SCALA_HOME" ]; then - echo "SCALA_HOME is not set" >&2 - exit 1 - fi - SCALA_LIBRARY_PATH="$SCALA_HOME/lib" + echo "JAVA_HOME is not set" >&2 + exit 1 fi fi -# Figure out how much memory to use per executor and set it as an environment -# variable so that our process sees it and can report it to Mesos -if [ -z "$SPARK_MEM" ] ; then - SPARK_MEM="512m" -fi +# Set SPARK_MEM if it isn't already set since we also use it for this process +SPARK_MEM=${SPARK_MEM:-512m} export SPARK_MEM # Set JAVA_OPTS to be able to load native libraries and to set heap size @@ -120,20 +94,11 @@ export JAVA_OPTS # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala! if [ ! -f "$FWDIR/RELEASE" ]; then - CORE_DIR="$FWDIR/core" - EXAMPLES_DIR="$FWDIR/examples" - REPL_DIR="$FWDIR/repl" - # Exit if the user hasn't compiled Spark - if [ ! -e "$CORE_DIR/target" ]; then - echo "Failed to find Spark classes in $CORE_DIR/target" >&2 - echo "You need to compile Spark before running this program" >&2 - exit 1 - fi - - if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then - echo "Failed to find Spark classes in $REPL_DIR/target" >&2 - echo "You need to compile Spark repl module before running this program" >&2 + ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null + if [[ $? != 0 ]]; then + echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2 + echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 exit 1 fi fi @@ -142,10 +107,11 @@ fi CLASSPATH=`$FWDIR/bin/compute-classpath.sh` export CLASSPATH -if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then - EXTRA_ARGS="" # Java options will be passed to scala as JAVA_OPTS -else - # The JVM doesn't read JAVA_OPTS by default so we need to pass it in - EXTRA_ARGS="$JAVA_OPTS" +if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + echo -n "Spark Command: " + echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" + echo "========================================" + echo fi -exec "$RUNNER" -cp "$CLASSPATH" $EXTRA_ARGS "$@" + +exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" diff --git a/spark-executor b/spark-executor index feccbf5cc2..2c07c54843 100755 --- a/spark-executor +++ b/spark-executor @@ -19,4 +19,4 @@ FWDIR="`dirname $0`" echo "Running spark-executor with framework dir = $FWDIR" -exec $FWDIR/run spark.executor.MesosExecutorBackend +exec $FWDIR/spark-class org.apache.spark.executor.MesosExecutorBackend diff --git a/spark-shell b/spark-shell index 5371fc540a..9608bd3f30 100755 --- a/spark-shell +++ b/spark-shell @@ -1,4 +1,4 @@ -#!/bin/bash --posix +#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -25,6 +25,9 @@ # -c <cores> Set the number of cores for REPL to use # +# Enter posix mode for bash +set -o posix + FWDIR="`dirname $0`" for o in "$@"; do @@ -76,7 +79,7 @@ if [[ ! $? ]]; then saved_stty="" fi -$FWDIR/run $OPTIONS spark.repl.Main "$@" +$FWDIR/spark-class $OPTIONS org.apache.spark.repl.Main "$@" # record the exit status lest it be overwritten: # then reenable echo and propagate the code. diff --git a/spark-shell.cmd b/spark-shell.cmd index ec65eabb74..b9b4d4bfb2 100644 --- a/spark-shell.cmd +++ b/spark-shell.cmd @@ -19,4 +19,4 @@ rem set FWDIR=%~dp0 set SPARK_LAUNCH_WITH_SCALA=1 -cmd /V /E /C %FWDIR%run2.cmd spark.repl.Main %* +cmd /V /E /C %FWDIR%run2.cmd org.apache.spark.repl.Main %* diff --git a/streaming/pom.xml b/streaming/pom.xml index 7e6b06d772..7bea069b61 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -19,17 +19,17 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-parent</artifactId> <version>0.8.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <groupId>org.spark-project</groupId> + <groupId>org.apache.spark</groupId> <artifactId>spark-streaming</artifactId> <packaging>jar</packaging> <name>Spark Project Streaming</name> - <url>http://spark-project.org/</url> + <url>http://spark.incubator.apache.org/</url> <repositories> <!-- A repository in the local filesystem for the Kafka JAR, which we modified for Scala 2.9 --> @@ -41,6 +41,11 @@ <dependencies> <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> </dependency> @@ -115,103 +120,4 @@ </plugin> </plugins> </build> - - <profiles> - <profile> - <id>hadoop1</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop1</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop1</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.spark-project</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - <classifier>hadoop2-yarn</classifier> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <configuration> - <classifier>hadoop2-yarn</classifier> - </configuration> - </plugin> - </plugins> - </build> - </profile> - </profiles> </project> diff --git a/streaming/src/main/scala/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala index 1e4c1e3742..2d8f072624 100644 --- a/streaming/src/main/scala/spark/streaming/Checkpoint.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala @@ -15,18 +15,19 @@ * limitations under the License. */ -package spark.streaming - -import spark.{Logging, Utils} - -import org.apache.hadoop.fs.{FileUtil, Path} -import org.apache.hadoop.conf.Configuration +package org.apache.spark.streaming import java.io._ -import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} import java.util.concurrent.Executors import java.util.concurrent.RejectedExecutionException +import org.apache.hadoop.fs.Path +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.Logging +import org.apache.spark.io.CompressionCodec + + private[streaming] class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time) extends Logging with Serializable { @@ -49,6 +50,7 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time) } } + /** * Convenience class to speed up the writing of graph checkpoint to file */ @@ -66,6 +68,8 @@ class CheckpointWriter(checkpointDir: String) extends Logging { val maxAttempts = 3 val executor = Executors.newFixedThreadPool(1) + private val compressionCodec = CompressionCodec.createCodec() + // Removed code which validates whether there is only one CheckpointWriter per path 'file' since // I did not notice any errors - reintroduce it ? @@ -103,7 +107,7 @@ class CheckpointWriter(checkpointDir: String) extends Logging { def write(checkpoint: Checkpoint) { val bos = new ByteArrayOutputStream() - val zos = new LZFOutputStream(bos) + val zos = compressionCodec.compressedOutputStream(bos) val oos = new ObjectOutputStream(zos) oos.writeObject(checkpoint) oos.close() @@ -137,6 +141,8 @@ object CheckpointReader extends Logging { val fs = new Path(path).getFileSystem(new Configuration()) val attempts = Seq(new Path(path, "graph"), new Path(path, "graph.bk"), new Path(path), new Path(path + ".bk")) + val compressionCodec = CompressionCodec.createCodec() + attempts.foreach(file => { if (fs.exists(file)) { logInfo("Attempting to load checkpoint from file '" + file + "'") @@ -147,7 +153,7 @@ object CheckpointReader extends Logging { // of ObjectInputStream is used to explicitly use the current thread's default class // loader to find and load classes. This is a well know Java issue and has popped up // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627) - val zis = new LZFInputStream(fis) + val zis = compressionCodec.compressedInputStream(fis) val ois = new ObjectInputStreamWithLoader(zis, Thread.currentThread().getContextClassLoader) val cp = ois.readObject.asInstanceOf[Checkpoint] ois.close() @@ -170,7 +176,9 @@ object CheckpointReader extends Logging { } private[streaming] -class ObjectInputStreamWithLoader(inputStream_ : InputStream, loader: ClassLoader) extends ObjectInputStream(inputStream_) { +class ObjectInputStreamWithLoader(inputStream_ : InputStream, loader: ClassLoader) + extends ObjectInputStream(inputStream_) { + override def resolveClass(desc: ObjectStreamClass): Class[_] = { try { return loader.loadClass(desc.getName()) diff --git a/streaming/src/main/scala/spark/streaming/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala index 684d3abb56..80da6bd30b 100644 --- a/streaming/src/main/scala/spark/streaming/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala @@ -15,14 +15,17 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.dstream._ +import org.apache.spark.streaming.dstream._ import StreamingContext._ +import org.apache.spark.util.MetadataCleaner + //import Time._ -import spark.{RDD, Logging} -import spark.storage.StorageLevel +import org.apache.spark.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap @@ -34,7 +37,7 @@ import org.apache.hadoop.conf.Configuration /** * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous - * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]] + * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]] * for more details on RDDs). DStreams can either be created from live data (such as, data from * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each @@ -42,7 +45,7 @@ import org.apache.hadoop.conf.Configuration * by a parent DStream. * * This class contains the basic operations available on all DStreams, such as `map`, `filter` and - * `window`. In addition, [[spark.streaming.PairDStreamFunctions]] contains operations available + * `window`. In addition, [[org.apache.spark.streaming.PairDStreamFunctions]] contains operations available * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. These operations * are automatically available on any DStream of the right type (e.g., DStream[(Int, Int)] through * implicit conversions when `spark.streaming.StreamingContext._` is imported. @@ -209,7 +212,7 @@ abstract class DStream[T: ClassManifest] ( checkpointDuration + "). Please set it to higher than " + checkpointDuration + "." ) - val metadataCleanerDelay = spark.util.MetadataCleaner.getDelaySeconds + val metadataCleanerDelay = MetadataCleaner.getDelaySeconds logInfo("metadataCleanupDelay = " + metadataCleanerDelay) assert( metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000, diff --git a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala index 399ca1c63d..58a0da2870 100644 --- a/streaming/src/main/scala/spark/streaming/DStreamCheckpointData.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import org.apache.hadoop.fs.Path import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.conf.Configuration import collection.mutable.HashMap -import spark.Logging +import org.apache.spark.Logging diff --git a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala index c09a332d44..b9a58fded6 100644 --- a/streaming/src/main/scala/spark/streaming/DStreamGraph.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import dstream.InputDStream import java.io.{ObjectInputStream, IOException, ObjectOutputStream} import collection.mutable.ArrayBuffer -import spark.Logging +import org.apache.spark.Logging final private[streaming] class DStreamGraph extends Serializable with Logging { initLogging() diff --git a/streaming/src/main/scala/spark/streaming/Duration.scala b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala index 12a14e233d..6bf275f5af 100644 --- a/streaming/src/main/scala/spark/streaming/Duration.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.Utils +import org.apache.spark.util.Utils case class Duration (private val millis: Long) { @@ -57,7 +57,7 @@ case class Duration (private val millis: Long) { } /** - * Helper object that creates instance of [[spark.streaming.Duration]] representing + * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing * a given number of milliseconds. */ object Milliseconds { @@ -65,7 +65,7 @@ object Milliseconds { } /** - * Helper object that creates instance of [[spark.streaming.Duration]] representing + * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing * a given number of seconds. */ object Seconds { @@ -73,7 +73,7 @@ object Seconds { } /** - * Helper object that creates instance of [[spark.streaming.Duration]] representing + * Helper object that creates instance of [[org.apache.spark.streaming.Duration]] representing * a given number of minutes. */ object Minutes { diff --git a/streaming/src/main/scala/spark/streaming/Interval.scala b/streaming/src/main/scala/org/apache/spark/streaming/Interval.scala index b30cd969e9..04c994c136 100644 --- a/streaming/src/main/scala/spark/streaming/Interval.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Interval.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming private[streaming] class Interval(val beginTime: Time, val endTime: Time) { diff --git a/streaming/src/main/scala/spark/streaming/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/Job.scala index ceb3f92b65..2128b7c7a6 100644 --- a/streaming/src/main/scala/spark/streaming/Job.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Job.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import java.util.concurrent.atomic.AtomicLong diff --git a/streaming/src/main/scala/spark/streaming/JobManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala index a31230689f..5233129506 100644 --- a/streaming/src/main/scala/spark/streaming/JobManager.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.Logging -import spark.SparkEnv +import org.apache.spark.Logging +import org.apache.spark.SparkEnv import java.util.concurrent.Executors import collection.mutable.HashMap import collection.mutable.ArrayBuffer diff --git a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala index d4cf2e568c..aae79a4e6f 100644 --- a/streaming/src/main/scala/spark/streaming/NetworkInputTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver} -import spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError} -import spark.Logging -import spark.SparkEnv -import spark.SparkContext._ +import org.apache.spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver} +import org.apache.spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError} +import org.apache.spark.Logging +import org.apache.spark.SparkEnv +import org.apache.spark.SparkContext._ import scala.collection.mutable.HashMap import scala.collection.mutable.Queue diff --git a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala index 47bf07bee1..757bc98981 100644 --- a/streaming/src/main/scala/spark/streaming/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.StreamingContext._ -import spark.streaming.dstream.{ReducedWindowedDStream, StateDStream} -import spark.streaming.dstream.{CoGroupedDStream, ShuffledDStream} -import spark.streaming.dstream.{MapValuedDStream, FlatMapValuedDStream} +import org.apache.spark.streaming.StreamingContext._ +import org.apache.spark.streaming.dstream.{ReducedWindowedDStream, StateDStream} +import org.apache.spark.streaming.dstream.{CoGroupedDStream, ShuffledDStream} +import org.apache.spark.streaming.dstream.{MapValuedDStream, FlatMapValuedDStream} -import spark.{Manifests, RDD, Partitioner, HashPartitioner} -import spark.SparkContext._ -import spark.storage.StorageLevel +import org.apache.spark.{Partitioner, HashPartitioner} +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.{Manifests, RDD, PairRDDFunctions} +import org.apache.spark.storage.StorageLevel import scala.collection.mutable.ArrayBuffer @@ -60,7 +61,7 @@ extends Serializable { } /** - * Return a new DStream by applying `groupByKey` on each RDD. The supplied [[spark.Partitioner]] + * Return a new DStream by applying `groupByKey` on each RDD. The supplied [[org.apache.spark.Partitioner]] * is used to control the partitioning of each RDD. */ def groupByKey(partitioner: Partitioner): DStream[(K, Seq[V])] = { @@ -91,7 +92,7 @@ extends Serializable { /** * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are - * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the + * merged using the supplied reduce function. [[org.apache.spark.Partitioner]] is used to control the * partitioning of each RDD. */ def reduceByKey(reduceFunc: (V, V) => V, partitioner: Partitioner): DStream[(K, V)] = { @@ -101,8 +102,8 @@ extends Serializable { /** * Combine elements of each key in DStream's RDDs using custom functions. This is similar to the - * combineByKey for RDDs. Please refer to combineByKey in [[spark.PairRDDFunctions]] for more - * information. + * combineByKey for RDDs. Please refer to combineByKey in + * [[org.apache.spark.rdd.PairRDDFunctions]] for more information. */ def combineByKey[C: ClassManifest]( createCombiner: V => C, @@ -360,7 +361,7 @@ extends Serializable { /** * Create a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of the key. - * [[spark.Partitioner]] is used to control the partitioning of each RDD. + * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream. @@ -379,7 +380,7 @@ extends Serializable { /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of each key. - * [[spark.Paxrtitioner]] is used to control the partitioning of each RDD. + * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. Note, that * this function may generate a different a tuple with a different key diff --git a/streaming/src/main/scala/spark/streaming/Scheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala index 252cc2a303..ed892e33e6 100644 --- a/streaming/src/main/scala/spark/streaming/Scheduler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import util.{ManualClock, RecurringTimer, Clock} -import spark.SparkEnv -import spark.Logging +import org.apache.spark.SparkEnv +import org.apache.spark.Logging private[streaming] class Scheduler(ssc: StreamingContext) extends Logging { @@ -34,7 +34,8 @@ class Scheduler(ssc: StreamingContext) extends Logging { null } - val clockClass = System.getProperty("spark.streaming.clock", "spark.streaming.util.SystemClock") + val clockClass = System.getProperty( + "spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock") val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock] val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds, longTime => generateJobs(new Time(longTime))) diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index ffd656227d..878725c705 100644 --- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -15,21 +15,22 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import akka.actor.Props import akka.actor.SupervisorStrategy import akka.zeromq.Subscribe -import spark.streaming.dstream._ +import org.apache.spark.streaming.dstream._ -import spark._ -import spark.streaming.receivers.ActorReceiver -import spark.streaming.receivers.ReceiverSupervisorStrategy -import spark.streaming.receivers.ZeroMQReceiver -import spark.storage.StorageLevel -import spark.util.MetadataCleaner -import spark.streaming.receivers.ActorReceiver +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.receivers.ActorReceiver +import org.apache.spark.streaming.receivers.ReceiverSupervisorStrategy +import org.apache.spark.streaming.receivers.ZeroMQReceiver +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.MetadataCleaner +import org.apache.spark.streaming.receivers.ActorReceiver import scala.collection.mutable.Queue import scala.collection.Map @@ -183,6 +184,7 @@ class StreamingContext private ( /** * Create an input stream with any arbitrary user implemented network receiver. + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of NetworkReceiver */ def networkStream[T: ClassManifest]( @@ -195,6 +197,7 @@ class StreamingContext private ( /** * Create an input stream with any arbitrary user implemented actor receiver. + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html * @param props Props object defining creation of the actor * @param name Name of the actor * @param storageLevel RDD storage level. Defaults to memory-only. diff --git a/streaming/src/main/scala/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala index ad5eab9dd2..2678334f53 100644 --- a/streaming/src/main/scala/spark/streaming/Time.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming /** * This is a simple class that represents an absolute instant of time. diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala index 7dcb1d713d..d1932b6b05 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala @@ -15,17 +15,17 @@ * limitations under the License. */ -package spark.streaming.api.java +package org.apache.spark.streaming.api.java -import spark.streaming.{Duration, Time, DStream} -import spark.api.java.function.{Function => JFunction} -import spark.api.java.JavaRDD -import spark.storage.StorageLevel -import spark.RDD +import org.apache.spark.streaming.{Duration, Time, DStream} +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.api.java.JavaRDD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.rdd.RDD /** * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous - * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]] + * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]] * for more details on RDDs). DStreams can either be created from live data (such as, data from * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each @@ -33,7 +33,7 @@ import spark.RDD * by a parent DStream. * * This class contains the basic operations available on all DStreams, such as `map`, `filter` and - * `window`. In addition, [[spark.streaming.api.java.JavaPairDStream]] contains operations available + * `window`. In addition, [[org.apache.spark.streaming.api.java.JavaPairDStream]] contains operations available * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. * * DStreams internally is characterized by a few basic properties: diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala index 3ab5c1fdde..459695b7ca 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStreamLike.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala @@ -15,18 +15,18 @@ * limitations under the License. */ -package spark.streaming.api.java +package org.apache.spark.streaming.api.java import java.util.{List => JList} import java.lang.{Long => JLong} import scala.collection.JavaConversions._ -import spark.streaming._ -import spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD} -import spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _} +import org.apache.spark.streaming._ +import org.apache.spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD} +import org.apache.spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _} import java.util -import spark.RDD +import org.apache.spark.rdd.RDD import JavaDStream._ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]] diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index ccd15563b0..978fca33ad 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -15,24 +15,25 @@ * limitations under the License. */ -package spark.streaming.api.java +package org.apache.spark.streaming.api.java import java.util.{List => JList} import java.lang.{Long => JLong} import scala.collection.JavaConversions._ -import spark.streaming._ -import spark.streaming.StreamingContext._ -import spark.api.java.function.{Function => JFunction, Function2 => JFunction2} -import spark.{RDD, Partitioner} +import org.apache.spark.streaming._ +import org.apache.spark.streaming.StreamingContext._ +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} +import org.apache.spark.Partitioner import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.conf.Configuration -import spark.api.java.{JavaRDD, JavaPairRDD} -import spark.storage.StorageLevel +import org.apache.spark.api.java.{JavaUtils, JavaRDD, JavaPairRDD} +import org.apache.spark.storage.StorageLevel import com.google.common.base.Optional -import spark.RDD +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.PairRDDFunctions class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( implicit val kManifiest: ClassManifest[K], @@ -114,7 +115,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( /** * Return a new DStream by applying `groupByKey` on each RDD of `this` DStream. * Therefore, the values for each key in `this` DStream's RDDs are grouped into a - * single sequence to generate the RDDs of the new DStream. [[spark.Partitioner]] + * single sequence to generate the RDDs of the new DStream. [[org.apache.spark.Partitioner]] * is used to control the partitioning of each RDD. */ def groupByKey(partitioner: Partitioner): JavaPairDStream[K, JList[V]] = @@ -138,7 +139,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( /** * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are - * merged using the supplied reduce function. [[spark.Partitioner]] is used to control the + * merged using the supplied reduce function. [[org.apache.spark.Partitioner]] is used to control the * partitioning of each RDD. */ def reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner): JavaPairDStream[K, V] = { @@ -147,7 +148,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( /** * Combine elements of each key in DStream's RDDs using custom function. This is similar to the - * combineByKey for RDDs. Please refer to combineByKey in [[spark.PairRDDFunctions]] for more + * combineByKey for RDDs. Please refer to combineByKey in [[PairRDDFunctions]] for more * information. */ def combineByKey[C](createCombiner: JFunction[V, C], @@ -401,10 +402,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( (Seq[V], Option[S]) => Option[S] = { val scalaFunc: (Seq[V], Option[S]) => Option[S] = (values, state) => { val list: JList[V] = values - val scalaState: Optional[S] = state match { - case Some(s) => Optional.of(s) - case _ => Optional.absent() - } + val scalaState: Optional[S] = JavaUtils.optionToOptional(state) val result: Optional[S] = in.apply(list, scalaState) result.isPresent match { case true => Some(result.get()) @@ -448,7 +446,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( /** * Create a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of the key. - * [[spark.Partitioner]] is used to control the partitioning of each RDD. + * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream. diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index b7720ad0ea..54ba3e6025 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -15,25 +15,29 @@ * limitations under the License. */ -package spark.streaming.api.java - -import spark.streaming._ -import receivers.{ActorReceiver, ReceiverSupervisorStrategy} -import spark.streaming.dstream._ -import spark.storage.StorageLevel -import spark.api.java.function.{Function => JFunction, Function2 => JFunction2} -import spark.api.java.{JavaSparkContext, JavaRDD} +package org.apache.spark.streaming.api.java + +import java.lang.{Long => JLong, Integer => JInt} +import java.io.InputStream +import java.util.{Map => JMap} + +import scala.collection.JavaConversions._ + import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import twitter4j.Status import akka.actor.Props import akka.actor.SupervisorStrategy import akka.zeromq.Subscribe -import scala.collection.JavaConversions._ -import java.lang.{Long => JLong, Integer => JInt} -import java.io.InputStream -import java.util.{Map => JMap} import twitter4j.auth.Authorization +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} +import org.apache.spark.api.java.{JavaSparkContext, JavaRDD} +import org.apache.spark.streaming._ +import org.apache.spark.streaming.dstream._ +import org.apache.spark.streaming.receivers.{ActorReceiver, ReceiverSupervisorStrategy} + /** * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic * information (such as, cluster URL and job name) to internally create a SparkContext, it provides @@ -537,7 +541,7 @@ class JavaStreamingContext(val ssc: StreamingContext) { def queueStream[T](queue: java.util.Queue[JavaRDD[T]]): JavaDStream[T] = { implicit val cm: ClassManifest[T] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]] - val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]] + val sQueue = new scala.collection.mutable.Queue[RDD[T]] sQueue.enqueue(queue.map(_.rdd).toSeq: _*) ssc.queueStream(sQueue) } @@ -554,7 +558,7 @@ class JavaStreamingContext(val ssc: StreamingContext) { def queueStream[T](queue: java.util.Queue[JavaRDD[T]], oneAtATime: Boolean): JavaDStream[T] = { implicit val cm: ClassManifest[T] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]] - val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]] + val sQueue = new scala.collection.mutable.Queue[RDD[T]] sQueue.enqueue(queue.map(_.rdd).toSeq: _*) ssc.queueStream(sQueue, oneAtATime) } @@ -575,7 +579,7 @@ class JavaStreamingContext(val ssc: StreamingContext) { defaultRDD: JavaRDD[T]): JavaDStream[T] = { implicit val cm: ClassManifest[T] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]] - val sQueue = new scala.collection.mutable.Queue[spark.RDD[T]] + val sQueue = new scala.collection.mutable.Queue[RDD[T]] sQueue.enqueue(queue.map(_.rdd).toSeq: _*) ssc.queueStream(sQueue, oneAtATime, defaultRDD.rdd) } diff --git a/streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala index 99553d295d..4eddc755b9 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/CoGroupedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/CoGroupedDStream.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.{RDD, Partitioner} -import spark.rdd.CoGroupedRDD -import spark.streaming.{Time, DStream, Duration} +import org.apache.spark.Partitioner +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.CoGroupedRDD +import org.apache.spark.streaming.{Time, DStream, Duration} private[streaming] class CoGroupedDStream[K : ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala index 095137092a..a9a05c9981 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/ConstantInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.streaming.{Time, StreamingContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.{Time, StreamingContext} /** * An input stream that always returns the same RDD on each timestep. Useful for testing. diff --git a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala index de0536125d..fea0573b77 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/FileInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.rdd.UnionRDD -import spark.streaming.{DStreamCheckpointData, StreamingContext, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.UnionRDD +import org.apache.spark.streaming.{DStreamCheckpointData, StreamingContext, Time} import org.apache.hadoop.fs.{FileSystem, Path, PathFilter} import org.apache.hadoop.conf.Configuration diff --git a/streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala index 9d8c5c3175..91ee2c1a36 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/FilteredDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD private[streaming] class FilteredDStream[T: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala index 78d7117f0f..ca7d7ca49e 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/FlatMapValuedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD -import spark.SparkContext._ +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ private[streaming] class FlatMapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala index d13bebb10f..b37966f9a7 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/FlatMappedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD private[streaming] class FlatMappedDStream[T: ClassManifest, U: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala index 4906f503c2..18de772946 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/FlumeInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala @@ -15,12 +15,13 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.StreamingContext +import java.net.InetSocketAddress +import java.io.{ObjectInput, ObjectOutput, Externalizable} +import java.nio.ByteBuffer -import spark.Utils -import spark.storage.StorageLevel +import scala.collection.JavaConversions._ import org.apache.flume.source.avro.AvroSourceProtocol import org.apache.flume.source.avro.AvroFlumeEvent @@ -28,11 +29,9 @@ import org.apache.flume.source.avro.Status import org.apache.avro.ipc.specific.SpecificResponder import org.apache.avro.ipc.NettyServer -import scala.collection.JavaConversions._ - -import java.net.InetSocketAddress -import java.io.{ObjectInput, ObjectOutput, Externalizable} -import java.nio.ByteBuffer +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.util.Utils +import org.apache.spark.storage.StorageLevel private[streaming] class FlumeInputDStream[T: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala index 7df537eb56..e21bac4602 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/ForEachDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.streaming.{Duration, DStream, Job, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.{Duration, DStream, Job, Time} private[streaming] class ForEachDStream[T: ClassManifest] ( diff --git a/streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala index 06fda6fe8e..4294b07d91 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/GlommedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD private[streaming] class GlommedDStream[T: ClassManifest](parent: DStream[T]) diff --git a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala index 4dbdec459d..674b27118c 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/InputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Time, Duration, StreamingContext, DStream} +import org.apache.spark.streaming.{Time, Duration, StreamingContext, DStream} /** * This is the abstract base class for all input streams. This class provides to methods diff --git a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala index 6ee588af15..51e913675d 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/KafkaInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.Logging -import spark.storage.StorageLevel -import spark.streaming.{Time, DStreamCheckpointData, StreamingContext} +import org.apache.spark.Logging +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Time, DStreamCheckpointData, StreamingContext} import java.util.Properties import java.util.concurrent.Executors diff --git a/streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala index af41a1b9ac..5329601a6f 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/MapPartitionedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD private[streaming] class MapPartitionedDStream[T: ClassManifest, U: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala index 8d8a6161c6..8290df90a2 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/MapValuedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD -import spark.SparkContext._ +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ private[streaming] class MapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala index 3fda84a38a..b1682afea3 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/MappedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD private[streaming] class MappedDStream[T: ClassManifest, U: ClassManifest] ( diff --git a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala index 344b41c4d0..31f9891560 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala @@ -15,30 +15,29 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Time, StreamingContext, AddBlocks, RegisterReceiver, DeregisterReceiver} - -import spark.{Logging, SparkEnv, RDD} -import spark.rdd.BlockRDD -import spark.storage.StorageLevel +import java.util.concurrent.ArrayBlockingQueue +import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer -import java.nio.ByteBuffer - import akka.actor.{Props, Actor} import akka.pattern.ask import akka.dispatch.Await import akka.util.duration._ -import spark.streaming.util.{RecurringTimer, SystemClock} -import java.util.concurrent.ArrayBlockingQueue + +import org.apache.spark.streaming.util.{RecurringTimer, SystemClock} +import org.apache.spark.streaming._ +import org.apache.spark.{Logging, SparkEnv} +import org.apache.spark.rdd.{RDD, BlockRDD} +import org.apache.spark.storage.StorageLevel /** * Abstract class for defining any InputDStream that has to start a receiver on worker * nodes to receive external data. Specific implementations of NetworkInputDStream must * define the getReceiver() function that gets the receiver object of type - * [[spark.streaming.dstream.NetworkReceiver]] that will be sent to the workers to receive + * [[org.apache.spark.streaming.dstream.NetworkReceiver]] that will be sent to the workers to receive * data. * @param ssc_ Streaming context that will execute this input stream * @tparam T Class type of the object of this stream @@ -83,7 +82,7 @@ private[streaming] case class ReportError(msg: String) extends NetworkReceiverMe /** * Abstract class of a receiver that can be run on worker nodes to receive external data. See - * [[spark.streaming.dstream.NetworkInputDStream]] for an explanation. + * [[org.apache.spark.streaming.dstream.NetworkInputDStream]] for an explanation. */ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Logging { @@ -145,8 +144,8 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log } /** - * Stops the receiver and reports to exception to the tracker. - * This should be called whenever an exception has happened on any thread + * Stops the receiver and reports exception to the tracker. + * This should be called whenever an exception is to be handled on any thread * of the receiver. */ protected def stopOnError(e: Exception) { @@ -202,7 +201,7 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log } /** - * Batches objects created by a [[spark.streaming.NetworkReceiver]] and puts them into + * Batches objects created by a [[org.apache.spark.streaming.dstream.NetworkReceiver]] and puts them into * appropriately named blocks at regular intervals. This class starts two threads, * one to periodically start a new batch and prepare the previous batch of as a block, * the other to push the blocks into the block manager. diff --git a/streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala index 33f7cd063f..15782f5c11 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/PluggableInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.StreamingContext +import org.apache.spark.streaming.StreamingContext private[streaming] class PluggableInputDStream[T: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala index b269061b73..7d9f3521b1 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/QueueInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.rdd.UnionRDD +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.UnionRDD import scala.collection.mutable.Queue import scala.collection.mutable.ArrayBuffer -import spark.streaming.{Time, StreamingContext} +import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala index 236f74f575..c91f12ecd7 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/RawInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.Logging -import spark.storage.StorageLevel -import spark.streaming.StreamingContext +import org.apache.spark.Logging +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.StreamingContext import java.net.InetSocketAddress import java.nio.ByteBuffer diff --git a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala index 96260501ab..b88a4db959 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/ReducedWindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala @@ -15,18 +15,18 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.StreamingContext._ -import spark.RDD -import spark.rdd.{CoGroupedRDD, MapPartitionsRDD} -import spark.Partitioner -import spark.SparkContext._ -import spark.storage.StorageLevel +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.{CoGroupedRDD, MapPartitionsRDD} +import org.apache.spark.Partitioner +import org.apache.spark.SparkContext._ +import org.apache.spark.storage.StorageLevel import scala.collection.mutable.ArrayBuffer -import spark.streaming.{Duration, Interval, Time, DStream} +import org.apache.spark.streaming.{Duration, Interval, Time, DStream} private[streaming] class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala index 83b57b27f7..a95e66d761 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/ShuffledDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.{RDD, Partitioner} -import spark.SparkContext._ -import spark.streaming.{Duration, DStream, Time} +import org.apache.spark.Partitioner +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ +import org.apache.spark.streaming.{Duration, DStream, Time} private[streaming] class ShuffledDStream[K: ClassManifest, V: ClassManifest, C: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 5877b10e0e..e2539c7396 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -15,11 +15,11 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.StreamingContext -import spark.storage.StorageLevel -import spark.util.NextIterator +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.NextIterator import java.io._ import java.net.Socket diff --git a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala index 4b46613d5e..362a6bf4cc 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/StateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.Partitioner -import spark.SparkContext._ -import spark.storage.StorageLevel -import spark.streaming.{Duration, Time, DStream} +import org.apache.spark.rdd.RDD +import org.apache.spark.Partitioner +import org.apache.spark.SparkContext._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Duration, Time, DStream} private[streaming] class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala index e7fbc5bbcf..60485adef9 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/TransformedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} private[streaming] class TransformedDStream[T: ClassManifest, U: ClassManifest] ( diff --git a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala index f09a8b9f90..387e15b0e6 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/TwitterInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark._ -import spark.streaming._ +import org.apache.spark._ +import org.apache.spark.streaming._ import storage.StorageLevel import twitter4j._ import twitter4j.auth.Authorization diff --git a/streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala index 3eaa9a7e7f..c696bb70a8 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/UnionDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.streaming.{Duration, DStream, Time} -import spark.RDD +import org.apache.spark.streaming.{Duration, DStream, Time} +import org.apache.spark.rdd.RDD import collection.mutable.ArrayBuffer -import spark.rdd.UnionRDD +import org.apache.spark.rdd.UnionRDD private[streaming] class UnionDStream[T: ClassManifest](parents: Array[DStream[T]]) diff --git a/streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala index fd24d61730..3c57294269 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/WindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.streaming.dstream +package org.apache.spark.streaming.dstream -import spark.RDD -import spark.rdd.UnionRDD -import spark.storage.StorageLevel -import spark.streaming.{Duration, Interval, Time, DStream} +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.UnionRDD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Duration, Interval, Time, DStream} private[streaming] class WindowedDStream[T: ClassManifest]( diff --git a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala index 2d9937eab8..4b5d8c467e 100644 --- a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.streaming.receivers +package org.apache.spark.streaming.receivers import akka.actor.{ Actor, PoisonPill, Props, SupervisorStrategy } import akka.actor.{ actorRef2Scala, ActorRef } import akka.actor.{ PossiblyHarmful, OneForOneStrategy } -import spark.storage.StorageLevel -import spark.streaming.dstream.NetworkReceiver +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.dstream.NetworkReceiver import java.util.concurrent.atomic.AtomicInteger @@ -45,6 +45,8 @@ object ReceiverSupervisorStrategy { * A receiver trait to be mixed in with your Actor to gain access to * pushBlock API. * + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html + * * @example {{{ * class MyActor extends Actor with Receiver{ * def receive { diff --git a/streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala index 22d554e7e4..043bb8c8bf 100644 --- a/streaming/src/main/scala/spark/streaming/receivers/ZeroMQReceiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.streaming.receivers +package org.apache.spark.streaming.receivers import akka.actor.Actor import akka.zeromq._ -import spark.Logging +import org.apache.spark.Logging /** * A receiver to subscribe to ZeroMQ stream. diff --git a/streaming/src/main/scala/spark/streaming/util/Clock.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala index d9ac722df5..f67bb2f6ac 100644 --- a/streaming/src/main/scala/spark/streaming/util/Clock.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming.util +package org.apache.spark.streaming.util private[streaming] trait Clock { diff --git a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala index 8ce5d8daf5..6977957126 100644 --- a/streaming/src/main/scala/spark/streaming/util/MasterFailureTest.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package spark.streaming.util +package org.apache.spark.streaming.util -import spark.{Logging, RDD} -import spark.streaming._ -import spark.streaming.dstream.ForEachDStream +import org.apache.spark.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming._ +import org.apache.spark.streaming.dstream.ForEachDStream import StreamingContext._ import scala.util.Random diff --git a/streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala index bf04120293..4e6ce6eabd 100644 --- a/streaming/src/main/scala/spark/streaming/util/RawTextHelper.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming.util +package org.apache.spark.streaming.util -import spark.SparkContext -import spark.SparkContext._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap} import scala.collection.JavaConversions.mapAsScalaMap diff --git a/streaming/src/main/scala/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala index 5cc6ad9dee..fc8655a083 100644 --- a/streaming/src/main/scala/spark/streaming/util/RawTextSender.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala @@ -15,15 +15,16 @@ * limitations under the License. */ -package spark.streaming.util +package org.apache.spark.streaming.util import java.nio.ByteBuffer -import spark.util.{RateLimitedOutputStream, IntParam} +import org.apache.spark.util.{RateLimitedOutputStream, IntParam} import java.net.ServerSocket -import spark.{Logging, KryoSerializer} +import org.apache.spark.{Logging} import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream import scala.io.Source import java.io.IOException +import org.apache.spark.serializer.KryoSerializer /** * A helper program that sends blocks of Kryo-serialized text strings out on a socket at a diff --git a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala index 7ecc44236d..d644240405 100644 --- a/streaming/src/main/scala/spark/streaming/util/RecurringTimer.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming.util +package org.apache.spark.streaming.util private[streaming] class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) => Unit) { diff --git a/streaming/src/test/java/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index 3b93790baa..c0d729ff87 100644 --- a/streaming/src/test/java/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming; +package org.apache.spark.streaming; import com.google.common.base.Optional; import com.google.common.collect.Lists; @@ -28,20 +28,20 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import scala.Tuple2; -import spark.HashPartitioner; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaRDD; -import spark.api.java.JavaRDDLike; -import spark.api.java.JavaPairRDD; -import spark.api.java.JavaSparkContext; -import spark.api.java.function.*; -import spark.storage.StorageLevel; -import spark.streaming.api.java.JavaDStream; -import spark.streaming.api.java.JavaPairDStream; -import spark.streaming.api.java.JavaStreamingContext; -import spark.streaming.JavaTestUtils; -import spark.streaming.JavaCheckpointTestUtils; -import spark.streaming.InputStreamsSuite; +import org.apache.spark.HashPartitioner; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaRDDLike; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.*; +import org.apache.spark.storage.StorageLevel; +import org.apache.spark.streaming.api.java.JavaDStream; +import org.apache.spark.streaming.api.java.JavaPairDStream; +import org.apache.spark.streaming.api.java.JavaStreamingContext; +import org.apache.spark.streaming.JavaTestUtils; +import org.apache.spark.streaming.JavaCheckpointTestUtils; +import org.apache.spark.streaming.InputStreamsSuite; import java.io.*; import java.util.*; @@ -59,7 +59,7 @@ public class JavaAPISuite implements Serializable { @Before public void setUp() { - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock"); + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock"); ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000)); ssc.checkpoint("checkpoint"); } diff --git a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala index f9d25db8da..8a6604904d 100644 --- a/streaming/src/test/java/spark/streaming/JavaTestUtils.scala +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala @@ -15,20 +15,21 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import collection.mutable.{SynchronizedBuffer, ArrayBuffer} import java.util.{List => JList} -import spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext} -import spark.streaming._ +import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext} +import org.apache.spark.streaming._ import java.util.ArrayList import collection.JavaConversions._ +import org.apache.spark.api.java.JavaRDDLike /** Exposes streaming test functionality in a Java-friendly way. */ trait JavaTestBase extends TestSuiteBase { /** - * Create a [[spark.streaming.TestInputStream]] and attach it to the supplied context. + * Create a [[org.apache.spark.streaming.TestInputStream]] and attach it to the supplied context. * The stream will be derived from the supplied lists of Java objects. **/ def attachTestInputStream[T]( @@ -46,11 +47,11 @@ trait JavaTestBase extends TestSuiteBase { /** * Attach a provided stream to it's associated StreamingContext as a - * [[spark.streaming.TestOutputStream]]. + * [[org.apache.spark.streaming.TestOutputStream]]. **/ - def attachTestOutputStream[T, This <: spark.streaming.api.java.JavaDStreamLike[T, This, R], - R <: spark.api.java.JavaRDDLike[T, R]]( - dstream: JavaDStreamLike[T, This, R]) = { + def attachTestOutputStream[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]]( + dstream: JavaDStreamLike[T, This, R]) = + { implicit val cm: ClassManifest[T] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]] val ostream = new TestOutputStream(dstream.dstream, diff --git a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala index 67e3e0cd30..11586f72b6 100644 --- a/streaming/src/test/scala/spark/streaming/BasicOperationsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.StreamingContext._ import scala.runtime.RichInt import util.ManualClock @@ -26,7 +26,7 @@ class BasicOperationsSuite extends TestSuiteBase { override def framework() = "BasicOperationsSuite" before { - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") } after { diff --git a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index 8c639648f0..a327de80b3 100644 --- a/streaming/src/test/scala/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import dstream.FileInputDStream -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.StreamingContext._ import java.io.File import runtime.RichInt import org.scalatest.BeforeAndAfter @@ -36,7 +36,7 @@ import com.google.common.io.Files */ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter { - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") before { FileUtils.deleteDirectory(new File(checkpointDir)) @@ -63,7 +63,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter { assert(batchDuration === Milliseconds(500), "batchDuration for this test must be 1 second") - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") val stateStreamCheckpointInterval = Seconds(1) diff --git a/streaming/src/test/scala/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala index 7fc649fe27..6337c5359c 100644 --- a/streaming/src/test/scala/spark/streaming/FailureSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.Logging -import spark.streaming.util.MasterFailureTest +import org.apache.spark.Logging +import org.apache.spark.streaming.util.MasterFailureTest import StreamingContext._ import org.scalatest.{FunSuite, BeforeAndAfter} diff --git a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala index 1c5419b16d..42e3e51e3f 100644 --- a/streaming/src/test/scala/spark/streaming/InputStreamsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming import akka.actor.Actor import akka.actor.IO @@ -29,9 +29,9 @@ import java.io.{File, BufferedWriter, OutputStreamWriter} import java.util.concurrent.{TimeUnit, ArrayBlockingQueue} import collection.mutable.{SynchronizedBuffer, ArrayBuffer} import util.ManualClock -import spark.storage.StorageLevel -import spark.streaming.receivers.Receiver -import spark.Logging +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.receivers.Receiver +import org.apache.spark.Logging import scala.util.Random import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfter @@ -52,7 +52,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { override def checkpointDir = "checkpoint" before { - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") } after { @@ -207,7 +207,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { FileUtils.deleteDirectory(testDir) // Enable manual clock back again for other tests - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") } diff --git a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala index cb34b5a7cc..37dd9c4cc6 100644 --- a/streaming/src/test/scala/spark/streaming/TestSuiteBase.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala @@ -15,12 +15,10 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.dstream.{InputDStream, ForEachDStream} -import spark.streaming.util.ManualClock - -import spark.{RDD, Logging} +import org.apache.spark.streaming.dstream.{InputDStream, ForEachDStream} +import org.apache.spark.streaming.util.ManualClock import collection.mutable.ArrayBuffer import collection.mutable.SynchronizedBuffer @@ -29,6 +27,9 @@ import java.io.{ObjectInputStream, IOException} import org.scalatest.{BeforeAndAfter, FunSuite} +import org.apache.spark.Logging +import org.apache.spark.rdd.RDD + /** * This is a input stream just for the testsuites. This is equivalent to a checkpointable, * replayable, reliable message queue like Kafka. It requires a sequence as input, and diff --git a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala index 894b765fc6..f50e05c0d8 100644 --- a/streaming/src/test/scala/spark/streaming/WindowOperationsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala @@ -15,14 +15,14 @@ * limitations under the License. */ -package spark.streaming +package org.apache.spark.streaming -import spark.streaming.StreamingContext._ +import org.apache.spark.streaming.StreamingContext._ import collection.mutable.ArrayBuffer class WindowOperationsSuite extends TestSuiteBase { - System.setProperty("spark.streaming.clock", "spark.streaming.util.ManualClock") + System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") override def framework = "WindowOperationsSuite" diff --git a/tools/pom.xml b/tools/pom.xml new file mode 100644 index 0000000000..77646a6816 --- /dev/null +++ b/tools/pom.xml @@ -0,0 +1,69 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent</artifactId> + <version>0.8.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.spark</groupId> + <artifactId>spark-tools</artifactId> + <packaging>jar</packaging> + <name>Spark Project Tools</name> + <url>http://spark.incubator.apache.org/</url> + + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-streaming</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_${scala.version}</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.scalatest</groupId> + <artifactId>scalatest-maven-plugin</artifactId> + </plugin> + </plugins> + </build> +</project> diff --git a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala new file mode 100644 index 0000000000..f824c472ae --- /dev/null +++ b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tools + +import java.lang.reflect.Method + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark._ +import org.apache.spark.api.java._ +import org.apache.spark.rdd.{RDD, DoubleRDDFunctions, PairRDDFunctions, OrderedRDDFunctions} +import org.apache.spark.streaming.{PairDStreamFunctions, DStream, StreamingContext} +import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream, JavaStreamingContext} + + +private[spark] abstract class SparkType(val name: String) + +private[spark] case class BaseType(override val name: String) extends SparkType(name) { + override def toString: String = { + name + } +} + +private[spark] +case class ParameterizedType(override val name: String, + parameters: Seq[SparkType], + typebounds: String = "") extends SparkType(name) { + override def toString: String = { + if (typebounds != "") { + typebounds + " " + name + "<" + parameters.mkString(", ") + ">" + } else { + name + "<" + parameters.mkString(", ") + ">" + } + } +} + +private[spark] +case class SparkMethod(name: String, returnType: SparkType, parameters: Seq[SparkType]) { + override def toString: String = { + returnType + " " + name + "(" + parameters.mkString(", ") + ")" + } +} + +/** + * A tool for identifying methods that need to be ported from Scala to the Java API. + * + * It uses reflection to find methods in the Scala API and rewrites those methods' signatures + * into appropriate Java equivalents. If those equivalent methods have not been implemented in + * the Java API, they are printed. + */ +object JavaAPICompletenessChecker { + + private def parseType(typeStr: String): SparkType = { + if (!typeStr.contains("<")) { + // Base types might begin with "class" or "interface", so we have to strip that off: + BaseType(typeStr.trim.split(" ").last) + } else if (typeStr.endsWith("[]")) { + ParameterizedType("Array", Seq(parseType(typeStr.stripSuffix("[]")))) + } else { + val parts = typeStr.split("<", 2) + val name = parts(0).trim + assert (parts(1).last == '>') + val parameters = parts(1).dropRight(1) + ParameterizedType(name, parseTypeList(parameters)) + } + } + + private def parseTypeList(typeStr: String): Seq[SparkType] = { + val types: ArrayBuffer[SparkType] = new ArrayBuffer[SparkType] + var stack = 0 + var token: StringBuffer = new StringBuffer() + for (c <- typeStr.trim) { + if (c == ',' && stack == 0) { + types += parseType(token.toString) + token = new StringBuffer() + } else if (c == ' ' && stack != 0) { + // continue + } else { + if (c == '<') { + stack += 1 + } else if (c == '>') { + stack -= 1 + } + token.append(c) + } + } + assert (stack == 0) + if (token.toString != "") { + types += parseType(token.toString) + } + types.toSeq + } + + private def parseReturnType(typeStr: String): SparkType = { + if (typeStr(0) == '<') { + val parts = typeStr.drop(0).split(">", 2) + val parsed = parseType(parts(1)).asInstanceOf[ParameterizedType] + ParameterizedType(parsed.name, parsed.parameters, parts(0)) + } else { + parseType(typeStr) + } + } + + private def toSparkMethod(method: Method): SparkMethod = { + val returnType = parseReturnType(method.getGenericReturnType.toString) + val name = method.getName + val parameters = method.getGenericParameterTypes.map(t => parseType(t.toString)) + SparkMethod(name, returnType, parameters) + } + + private def toJavaType(scalaType: SparkType, isReturnType: Boolean): SparkType = { + val renameSubstitutions = Map( + "scala.collection.Map" -> "java.util.Map", + // TODO: the JavaStreamingContext API accepts Array arguments + // instead of Lists, so this isn't a trivial translation / sub: + "scala.collection.Seq" -> "java.util.List", + "scala.Function2" -> "org.apache.spark.api.java.function.Function2", + "scala.collection.Iterator" -> "java.util.Iterator", + "scala.collection.mutable.Queue" -> "java.util.Queue", + "double" -> "java.lang.Double" + ) + // Keep applying the substitutions until we've reached a fixedpoint. + def applySubs(scalaType: SparkType): SparkType = { + scalaType match { + case ParameterizedType(name, parameters, typebounds) => + name match { + case "org.apache.spark.rdd.RDD" => + if (parameters(0).name == classOf[Tuple2[_, _]].getName) { + val tupleParams = + parameters(0).asInstanceOf[ParameterizedType].parameters.map(applySubs) + ParameterizedType(classOf[JavaPairRDD[_, _]].getName, tupleParams) + } else { + ParameterizedType(classOf[JavaRDD[_]].getName, parameters.map(applySubs)) + } + case "org.apache.spark.streaming.DStream" => + if (parameters(0).name == classOf[Tuple2[_, _]].getName) { + val tupleParams = + parameters(0).asInstanceOf[ParameterizedType].parameters.map(applySubs) + ParameterizedType("org.apache.spark.streaming.api.java.JavaPairDStream", tupleParams) + } else { + ParameterizedType("org.apache.spark.streaming.api.java.JavaDStream", + parameters.map(applySubs)) + } + case "scala.Option" => { + if (isReturnType) { + ParameterizedType("com.google.common.base.Optional", parameters.map(applySubs)) + } else { + applySubs(parameters(0)) + } + } + case "scala.Function1" => + val firstParamName = parameters.last.name + if (firstParamName.startsWith("scala.collection.Traversable") || + firstParamName.startsWith("scala.collection.Iterator")) { + ParameterizedType("org.apache.spark.api.java.function.FlatMapFunction", + Seq(parameters(0), + parameters.last.asInstanceOf[ParameterizedType].parameters(0)).map(applySubs)) + } else if (firstParamName == "scala.runtime.BoxedUnit") { + ParameterizedType("org.apache.spark.api.java.function.VoidFunction", + parameters.dropRight(1).map(applySubs)) + } else { + ParameterizedType("org.apache.spark.api.java.function.Function", parameters.map(applySubs)) + } + case _ => + ParameterizedType(renameSubstitutions.getOrElse(name, name), + parameters.map(applySubs)) + } + case BaseType(name) => + if (renameSubstitutions.contains(name)) { + BaseType(renameSubstitutions(name)) + } else { + scalaType + } + } + } + var oldType = scalaType + var newType = applySubs(scalaType) + while (oldType != newType) { + oldType = newType + newType = applySubs(scalaType) + } + newType + } + + private def toJavaMethod(method: SparkMethod): SparkMethod = { + val params = method.parameters + .filterNot(_.name == "scala.reflect.ClassManifest") + .map(toJavaType(_, isReturnType = false)) + SparkMethod(method.name, toJavaType(method.returnType, isReturnType = true), params) + } + + private def isExcludedByName(method: Method): Boolean = { + val name = method.getDeclaringClass.getName + "." + method.getName + // Scala methods that are declared as private[mypackage] become public in the resulting + // Java bytecode. As a result, we need to manually exclude those methods here. + // This list also includes a few methods that are only used by the web UI or other + // internal Spark components. + val excludedNames = Seq( + "org.apache.spark.rdd.RDD.origin", + "org.apache.spark.rdd.RDD.elementClassManifest", + "org.apache.spark.rdd.RDD.checkpointData", + "org.apache.spark.rdd.RDD.partitioner", + "org.apache.spark.rdd.RDD.partitions", + "org.apache.spark.rdd.RDD.firstParent", + "org.apache.spark.rdd.RDD.doCheckpoint", + "org.apache.spark.rdd.RDD.markCheckpointed", + "org.apache.spark.rdd.RDD.clearDependencies", + "org.apache.spark.rdd.RDD.getDependencies", + "org.apache.spark.rdd.RDD.getPartitions", + "org.apache.spark.rdd.RDD.dependencies", + "org.apache.spark.rdd.RDD.getPreferredLocations", + "org.apache.spark.rdd.RDD.collectPartitions", + "org.apache.spark.rdd.RDD.computeOrReadCheckpoint", + "org.apache.spark.rdd.PairRDDFunctions.getKeyClass", + "org.apache.spark.rdd.PairRDDFunctions.getValueClass", + "org.apache.spark.SparkContext.stringToText", + "org.apache.spark.SparkContext.makeRDD", + "org.apache.spark.SparkContext.runJob", + "org.apache.spark.SparkContext.runApproximateJob", + "org.apache.spark.SparkContext.clean", + "org.apache.spark.SparkContext.metadataCleaner", + "org.apache.spark.SparkContext.ui", + "org.apache.spark.SparkContext.newShuffleId", + "org.apache.spark.SparkContext.newRddId", + "org.apache.spark.SparkContext.cleanup", + "org.apache.spark.SparkContext.receiverJobThread", + "org.apache.spark.SparkContext.getRDDStorageInfo", + "org.apache.spark.SparkContext.addedFiles", + "org.apache.spark.SparkContext.addedJars", + "org.apache.spark.SparkContext.persistentRdds", + "org.apache.spark.SparkContext.executorEnvs", + "org.apache.spark.SparkContext.checkpointDir", + "org.apache.spark.SparkContext.getSparkHome", + "org.apache.spark.SparkContext.executorMemoryRequested", + "org.apache.spark.SparkContext.getExecutorStorageStatus", + "org.apache.spark.streaming.DStream.generatedRDDs", + "org.apache.spark.streaming.DStream.zeroTime", + "org.apache.spark.streaming.DStream.rememberDuration", + "org.apache.spark.streaming.DStream.storageLevel", + "org.apache.spark.streaming.DStream.mustCheckpoint", + "org.apache.spark.streaming.DStream.checkpointDuration", + "org.apache.spark.streaming.DStream.checkpointData", + "org.apache.spark.streaming.DStream.graph", + "org.apache.spark.streaming.DStream.isInitialized", + "org.apache.spark.streaming.DStream.parentRememberDuration", + "org.apache.spark.streaming.DStream.initialize", + "org.apache.spark.streaming.DStream.validate", + "org.apache.spark.streaming.DStream.setContext", + "org.apache.spark.streaming.DStream.setGraph", + "org.apache.spark.streaming.DStream.remember", + "org.apache.spark.streaming.DStream.getOrCompute", + "org.apache.spark.streaming.DStream.generateJob", + "org.apache.spark.streaming.DStream.clearOldMetadata", + "org.apache.spark.streaming.DStream.addMetadata", + "org.apache.spark.streaming.DStream.updateCheckpointData", + "org.apache.spark.streaming.DStream.restoreCheckpointData", + "org.apache.spark.streaming.DStream.isTimeValid", + "org.apache.spark.streaming.StreamingContext.nextNetworkInputStreamId", + "org.apache.spark.streaming.StreamingContext.networkInputTracker", + "org.apache.spark.streaming.StreamingContext.checkpointDir", + "org.apache.spark.streaming.StreamingContext.checkpointDuration", + "org.apache.spark.streaming.StreamingContext.receiverJobThread", + "org.apache.spark.streaming.StreamingContext.scheduler", + "org.apache.spark.streaming.StreamingContext.initialCheckpoint", + "org.apache.spark.streaming.StreamingContext.getNewNetworkStreamId", + "org.apache.spark.streaming.StreamingContext.validate", + "org.apache.spark.streaming.StreamingContext.createNewSparkContext", + "org.apache.spark.streaming.StreamingContext.rddToFileName", + "org.apache.spark.streaming.StreamingContext.getSparkCheckpointDir", + "org.apache.spark.streaming.StreamingContext.env", + "org.apache.spark.streaming.StreamingContext.graph", + "org.apache.spark.streaming.StreamingContext.isCheckpointPresent" + ) + val excludedPatterns = Seq( + """^org\.apache\.spark\.SparkContext\..*To.*Functions""", + """^org\.apache\.spark\.SparkContext\..*WritableConverter""", + """^org\.apache\.spark\.SparkContext\..*To.*Writable""" + ).map(_.r) + lazy val excludedByPattern = + !excludedPatterns.map(_.findFirstIn(name)).filter(_.isDefined).isEmpty + name.contains("$") || excludedNames.contains(name) || excludedByPattern + } + + private def isExcludedByInterface(method: Method): Boolean = { + val excludedInterfaces = + Set("org.apache.spark.Logging", "org.apache.hadoop.mapreduce.HadoopMapReduceUtil") + def toComparisionKey(method: Method) = + (method.getReturnType, method.getName, method.getGenericReturnType) + val interfaces = method.getDeclaringClass.getInterfaces.filter { i => + excludedInterfaces.contains(i.getName) + } + val excludedMethods = interfaces.flatMap(_.getMethods.map(toComparisionKey)) + excludedMethods.contains(toComparisionKey(method)) + } + + private def printMissingMethods(scalaClass: Class[_], javaClass: Class[_]) { + val methods = scalaClass.getMethods + .filterNot(_.isAccessible) + .filterNot(isExcludedByName) + .filterNot(isExcludedByInterface) + val javaEquivalents = methods.map(m => toJavaMethod(toSparkMethod(m))).toSet + + val javaMethods = javaClass.getMethods.map(toSparkMethod).toSet + + val missingMethods = javaEquivalents -- javaMethods + + for (method <- missingMethods) { + println(method) + } + } + + def main(args: Array[String]) { + println("Missing RDD methods") + printMissingMethods(classOf[RDD[_]], classOf[JavaRDD[_]]) + println() + + println("Missing PairRDD methods") + printMissingMethods(classOf[PairRDDFunctions[_, _]], classOf[JavaPairRDD[_, _]]) + println() + + println("Missing DoubleRDD methods") + printMissingMethods(classOf[DoubleRDDFunctions], classOf[JavaDoubleRDD]) + println() + + println("Missing OrderedRDD methods") + printMissingMethods(classOf[OrderedRDDFunctions[_, _, _]], classOf[JavaPairRDD[_, _]]) + println() + + println("Missing SparkContext methods") + printMissingMethods(classOf[SparkContext], classOf[JavaSparkContext]) + println() + + println("Missing StreamingContext methods") + printMissingMethods(classOf[StreamingContext], classOf[JavaStreamingContext]) + println() + + println("Missing DStream methods") + printMissingMethods(classOf[DStream[_]], classOf[JavaDStream[_]]) + println() + + println("Missing PairDStream methods") + printMissingMethods(classOf[PairDStreamFunctions[_, _]], classOf[JavaPairDStream[_, _]]) + println() + } +} diff --git a/yarn/pom.xml b/yarn/pom.xml new file mode 100644 index 0000000000..fcacdac9eb --- /dev/null +++ b/yarn/pom.xml @@ -0,0 +1,111 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent</artifactId> + <version>0.8.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn</artifactId> + <packaging>jar</packaging> + <name>Spark Project YARN Support</name> + <url>http://spark.incubator.apache.org/</url> + + <build> + <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile> + <artifactSet> + <includes> + <include>*:*</include> + </includes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>hadoop2-yarn</id> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> +</project> diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 1b06169739..139a977a03 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn import java.net.Socket import java.util.concurrent.CopyOnWriteArrayList @@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.collection.JavaConversions._ -import spark.{SparkContext, Logging, Utils} +import org.apache.spark.{SparkContext, Logging, Utils} import org.apache.hadoop.security.UserGroupInformation import java.security.PrivilegedExceptionAction @@ -47,6 +47,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e private var isFinished:Boolean = false def run() { + // setup the directories so things go to yarn approved directories rather + // then user specified and /tmp + System.setProperty("spark.local.dir", getLocalDirs()) appAttemptId = getApplicationAttemptId() resourceManager = registerWithResourceManager() @@ -89,6 +92,21 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e System.exit(0) } + + /** Get the Yarn approved local directories. */ + private def getLocalDirs(): String = { + // Hadoop 0.23 and 2.x have different Environment variable names for the + // local dirs, so lets check both. We assume one of the 2 is set. + // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X + val localDirs = Option(System.getenv("YARN_LOCAL_DIRS")) + .getOrElse(Option(System.getenv("LOCAL_DIRS")) + .getOrElse("")) + + if (localDirs.isEmpty()) { + throw new Exception("Yarn Local dirs can't be empty") + } + return localDirs + } private def getApplicationAttemptId(): ApplicationAttemptId = { val envs = System.getenv() @@ -124,18 +142,20 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e private def waitForSparkMaster() { logInfo("Waiting for spark driver to be reachable.") var driverUp = false - while(!driverUp) { + var tries = 0 + while(!driverUp && tries < 10) { val driverHost = System.getProperty("spark.driver.host") val driverPort = System.getProperty("spark.driver.port") try { val socket = new Socket(driverHost, driverPort.toInt) socket.close() - logInfo("Master now available: " + driverHost + ":" + driverPort) + logInfo("Driver now available: " + driverHost + ":" + driverPort) driverUp = true } catch { case e: Exception => - logError("Failed to connect to driver at " + driverHost + ":" + driverPort) + logWarning("Failed to connect to driver at " + driverHost + ":" + driverPort + ", retrying") Thread.sleep(100) + tries = tries + 1 } } } @@ -176,7 +196,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e var sparkContext: SparkContext = null ApplicationMaster.sparkContextRef.synchronized { var count = 0 - while (ApplicationMaster.sparkContextRef.get() == null) { + while (ApplicationMaster.sparkContextRef.get() == null && count < 10) { logInfo("Waiting for spark context initialization ... " + count) count = count + 1 ApplicationMaster.sparkContextRef.wait(10000L) diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala index 8de44b1f66..f47e23b63f 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn -import spark.util.IntParam +import org.apache.spark.util.IntParam import collection.mutable.ArrayBuffer class ApplicationMasterArguments(val args: Array[String]) { diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 8bcbfc2735..48e737ed79 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn import java.net.{InetSocketAddress, URI} import java.nio.ByteBuffer @@ -33,10 +33,10 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import scala.collection.mutable.HashMap import scala.collection.JavaConversions._ -import spark.{Logging, Utils} +import org.apache.spark.{Logging, Utils} import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils} import org.apache.hadoop.yarn.api.ApplicationConstants.Environment -import spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.SparkHadoopUtil class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging { @@ -165,7 +165,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl Apps.addToEnvironment(env, Environment.CLASSPATH.name, "./*") Apps.addToEnvironment(env, Environment.CLASSPATH.name, "$CLASSPATH") Client.populateHadoopClasspath(yarnConf, env) - SparkHadoopUtil.setYarnMode(env) + env("SPARK_YARN_MODE") = "true" env("SPARK_YARN_JAR_PATH") = localResources("spark.jar").getResource().getScheme.toString() + "://" + localResources("spark.jar").getResource().getFile().toString() @@ -185,6 +185,8 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl env("SPARK_YARN_LOG4J_SIZE") = log4jConfLocalRes.getSize().toString() } + // allow users to specify some environment variables + Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV")) // Add each SPARK-* key to the environment System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v } @@ -221,6 +223,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl // Add Xmx for am memory JAVA_OPTS += "-Xmx" + amMemory + "m " + JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(), + YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + + // Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out. // The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same // node, spark gc effects all other containers performance (which can also be other spark containers) @@ -241,7 +247,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl // Command for the ApplicationMaster var javaCommand = "java"; val javaHome = System.getenv("JAVA_HOME") - if (javaHome != null && !javaHome.isEmpty()) { + if ((javaHome != null && !javaHome.isEmpty()) || env.isDefinedAt("JAVA_HOME")) { javaCommand = Environment.JAVA_HOME.$() + "/bin/java" } @@ -313,8 +319,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl object Client { def main(argStrings: Array[String]) { + // Set an env variable indicating we are running in YARN mode. + // Note that anything with SPARK prefix gets propagated to all (remote) processes + System.setProperty("SPARK_YARN_MODE", "true") + val args = new ClientArguments(argStrings) - SparkHadoopUtil.setYarnMode() new Client(args).run } diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index 67aff03781..6cbfadc23b 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn -import spark.util.MemoryParam -import spark.util.IntParam +import org.apache.spark.util.MemoryParam +import org.apache.spark.util.IntParam import collection.mutable.{ArrayBuffer, HashMap} -import spark.scheduler.{InputFormatInfo, SplitInfo} +import org.apache.spark.scheduler.{InputFormatInfo, SplitInfo} // TODO: Add code and support for ensuring that yarn resource 'asks' are location aware ! class ClientArguments(val args: Array[String]) { diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala index f458f2f6a1..72dcf7178e 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn import java.net.URI import java.nio.ByteBuffer @@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap -import spark.{Logging, Utils} +import org.apache.spark.{Logging, Utils} class WorkerRunnable(container: Container, conf: Configuration, masterAddress: String, slaveId: String, hostname: String, workerMemory: Int, workerCores: Int) @@ -75,6 +75,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S if (env.isDefinedAt("SPARK_JAVA_OPTS")) { JAVA_OPTS += env("SPARK_JAVA_OPTS") + " " } + + JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(), + YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + // Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out. // The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same // node, spark gc effects all other containers performance (which can also be other spark containers) @@ -104,7 +108,7 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S var javaCommand = "java"; val javaHome = System.getenv("JAVA_HOME") - if (javaHome != null && !javaHome.isEmpty()) { + if ((javaHome != null && !javaHome.isEmpty()) || env.isDefinedAt("JAVA_HOME")) { javaCommand = Environment.JAVA_HOME.$() + "/bin/java" } @@ -115,7 +119,7 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S // TODO: If the OOM is not recoverable by rescheduling it on different node, then do 'something' to fail job ... akin to blacklisting trackers in mapred ? " -XX:OnOutOfMemoryError='kill %p' " + JAVA_OPTS + - " spark.executor.StandaloneExecutorBackend " + + " org.apache.spark.executor.StandaloneExecutorBackend " + masterAddress + " " + slaveId + " " + hostname + " " + @@ -187,6 +191,9 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S Apps.addToEnvironment(env, Environment.CLASSPATH.name, "$CLASSPATH") Client.populateHadoopClasspath(yarnConf, env) + // allow users to specify some environment variables + Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV")) + System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v } return env } diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index b0af8baf08..26ff214e12 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package spark.deploy.yarn +package org.apache.spark.deploy.yarn -import spark.{Logging, Utils} -import spark.scheduler.SplitInfo +import org.apache.spark.{Logging, Utils} +import org.apache.spark.scheduler.SplitInfo import scala.collection import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId, ContainerId, Priority, Resource, ResourceRequest, ContainerStatus, Container} -import spark.scheduler.cluster.{ClusterScheduler, StandaloneSchedulerBackend} +import org.apache.spark.scheduler.cluster.{ClusterScheduler, StandaloneSchedulerBackend} import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResponse} import org.apache.hadoop.yarn.util.{RackResolver, Records} import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap} diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 6122fdced0..ca2f1e2565 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -15,8 +15,9 @@ * limitations under the License. */ -package spark.deploy +package org.apache.spark.deploy.yarn +import org.apache.spark.deploy.SparkHadoopUtil import collection.mutable.HashMap import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.UserGroupInformation @@ -28,48 +29,17 @@ import java.security.PrivilegedExceptionAction /** * Contains util methods to interact with Hadoop from spark. */ -object SparkHadoopUtil { - - val yarnConf = newConfiguration() - - def getUserNameFromEnvironment(): String = { - // defaulting to env if -D is not present ... - val retval = System.getProperty(Environment.USER.name, System.getenv(Environment.USER.name)) - - // If nothing found, default to user we are running as - if (retval == null) System.getProperty("user.name") else retval - } - - def runAsUser(func: (Product) => Unit, args: Product) { - runAsUser(func, args, getUserNameFromEnvironment()) - } - - def runAsUser(func: (Product) => Unit, args: Product, user: String) { - func(args) - } +class YarnSparkHadoopUtil extends SparkHadoopUtil { // Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true. - def isYarnMode(): Boolean = { - val yarnMode = System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")) - java.lang.Boolean.valueOf(yarnMode) - } - - // Set an env variable indicating we are running in YARN mode. - // Note that anything with SPARK prefix gets propagated to all (remote) processes - def setYarnMode() { - System.setProperty("SPARK_YARN_MODE", "true") - } - - def setYarnMode(env: HashMap[String, String]) { - env("SPARK_YARN_MODE") = "true" - } + override def isYarnMode(): Boolean = { true } // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems // Always create a new config, dont reuse yarnConf. - def newConfiguration(): Configuration = new YarnConfiguration(new Configuration()) + override def newConfiguration(): Configuration = new YarnConfiguration(new Configuration()) // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster - def addCredentials(conf: JobConf) { + override def addCredentials(conf: JobConf) { val jobCreds = conf.getCredentials(); jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials()) } diff --git a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala index 307d96111c..3828ddfc4f 100644 --- a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala @@ -15,10 +15,10 @@ * limitations under the License. */ -package spark.scheduler.cluster +package org.apache.spark.scheduler.cluster -import spark._ -import spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler} +import org.apache.spark._ +import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler} import org.apache.hadoop.conf.Configuration /** @@ -41,13 +41,6 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) if (retval != null) Some(retval) else None } - // By default, if rack is unknown, return nothing - override def getCachedHostsForRack(rack: String): Option[Set[String]] = { - if (rack == None || rack == null) return None - - YarnAllocationHandler.fetchCachedHostsForRack(rack) - } - override def postStartHook() { val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc) if (sparkContextInitialized){ |