Merge branch 'master' into ec2-updates

Conflicts: ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh
author: Patrick Wendell <pwendell@gmail.com> 2013-07-31 21:35:12 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2013-07-31 21:35:12 -0700
commit: 5cc725a0e3ef523affae8ff54dd74707e49d64e3 (patch)
tree: ebd1698333d2df4194f17a9ea93a2f2eac2c7acd /conf
parent: b7b627d5bb1a1331ea580950834533f84735df4c (diff)
parent: f3cf09491a2b63e19a15e98cf815da503e4fb69b (diff)
download: spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.gz
spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.bz2
spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.zip
3 files changed, 108 insertions, 12 deletions
diff --git a/conf/fairscheduler.xml.template b/conf/fairscheduler.xml.template
new file mode 100644
index 0000000000..04a6b418dc
--- /dev/null
+++ b/conf/fairscheduler.xml.template
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<allocations>
+<pool name="production">
+    <minShare>2</minShare>
+    <weight>1</weight>
+    <schedulingMode>FAIR</schedulingMode>
+</pool>
+<pool name="test">
+    <minShare>3</minShare>
+    <weight>2</weight>
+    <schedulingMode>FIFO</schedulingMode>
+</pool>
+<pool name="data">
+</pool>
+</allocations>
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
new file mode 100644
index 0000000000..0486ca4c79
--- /dev/null
+++ b/conf/metrics.properties.template
@@ -0,0 +1,87 @@
+# syntax: [instance].[sink|source].[name].[options]
+
+#  "instance" specify "who" (the role) use metrics system. In spark there are
+#  several roles like master, worker, executor, driver, these roles will
+#  create metrics system for monitoring. So instance represents these roles.
+#  Currently in Spark, several instances have already implemented: master,
+#  worker, executor, driver.
+#
+#  [instance] field can be "master", "worker", "executor", "driver", which means
+#  only the specified instance has this property.
+#  a wild card "*" can be used to represent instance name, which means all the
+#  instances will have this property.
+#
+#  "source" specify "where" (source) to collect metrics data. In metrics system,
+#  there exists two kinds of source:
+#    1. Spark internal source, like MasterSource, WorkerSource, etc, which will
+#    collect Spark component's internal state, these sources are related to
+#    instance and will be added after specific metrics system is created.
+#    2. Common source, like JvmSource, which will collect low level state, is
+#    configured by configuration and loaded through reflection.
+#
+#  "sink" specify "where" (destination) to output metrics data to. Several sinks
+#  can be coexisted and flush metrics to all these sinks.
+#
+#  [sink|source] field specify this property is source related or sink, this
+#  field can only be source or sink.
+#
+#  [name] field specify the name of source or sink, this is custom defined.
+#
+#  [options] field is the specific property of this source or sink, this source
+#  or sink is responsible for parsing this property.
+#
+#  Notes:
+#    1. Sinks should be added through configuration, like console sink, class
+#    full name should be specified by class property.
+#    2. Some sinks can specify polling period, like console sink, which is 10 seconds,
+#    it should be attention minimal polling period is 1 seconds, any period
+#    below than 1s is illegal.
+#    3. Wild card property can be overlapped by specific instance property, for
+#    example, *.sink.console.period can be overlapped by master.sink.console.period.
+#    4. A metrics specific configuration
+#    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+#    added to Java property using -Dspark.metrics.conf=xxx if you want to
+#    customize metrics system, or you can put it in ${SPARK_HOME}/conf,
+#    metrics system will search and load it automatically.
+
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=spark.metrics.sink.ConsoleSink
+
+# Polling period for ConsoleSink
+#*.sink.console.period=10
+
+#*.sink.console.unit=seconds
+
+# Master instance overlap polling period
+#master.sink.console.period=15
+
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances
+#*.sink.csv.class=spark.metrics.sink.CsvSink
+
+# Polling period for CsvSink
+#*.sink.csv.period=1
+
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Worker instance overlap polling period
+#worker.sink.csv.period=10
+
+#worker.sink.csv.unit=minutes
+
+# Enable jvm source for instance master, worker, driver and executor
+#master.source.jvm.class=spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=spark.metrics.source.JvmSource
+
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 37565ca827..b8936314ec 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -3,8 +3,10 @@
 # This file contains environment variables required to run Spark. Copy it as
 # spark-env.sh and edit that to configure Spark for your site. At a minimum,
 # the following two variables should be set:
-# - MESOS_NATIVE_LIBRARY, to point to your Mesos native library (libmesos.so)
-# - SCALA_HOME, to point to your Scala installation
+# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to
+#   point to the directory for Scala library JARs (if you install Scala as a
+#   Debian or RPM package, these are in a separate path, often /usr/share/java)
+# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
 #
 # If using the standalone deploy mode, you can also set variables for it:
 # - SPARK_MASTER_IP, to bind the master to a different IP address
@@ -12,14 +14,6 @@
 # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
 # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
 # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
-# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes to be spawned on every slave machine
-#
-# Finally, Spark also relies on the following variables, but these can be set
-# on just the *master* (i.e. in your driver program), and will automatically
-# be propagated to workers:
-# - SPARK_MEM, to change the amount of memory used per node (this should
-#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g)
-# - SPARK_CLASSPATH, to add elements to Spark's classpath
-# - SPARK_JAVA_OPTS, to add JVM options
-# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
+# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes
+#   to be spawned on every slave machine
author	Patrick Wendell <pwendell@gmail.com>	2013-07-31 21:35:12 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2013-07-31 21:35:12 -0700
commit	5cc725a0e3ef523affae8ff54dd74707e49d64e3 (patch)
tree	ebd1698333d2df4194f17a9ea93a2f2eac2c7acd /conf
parent	b7b627d5bb1a1331ea580950834533f84735df4c (diff)
parent	f3cf09491a2b63e19a15e98cf815da503e4fb69b (diff)
download	spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.gz spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.bz2 spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.zip