aboutsummaryrefslogtreecommitdiff
path: root/conf
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2013-07-31 21:35:12 -0700
committerPatrick Wendell <pwendell@gmail.com>2013-07-31 21:35:12 -0700
commit5cc725a0e3ef523affae8ff54dd74707e49d64e3 (patch)
treeebd1698333d2df4194f17a9ea93a2f2eac2c7acd /conf
parentb7b627d5bb1a1331ea580950834533f84735df4c (diff)
parentf3cf09491a2b63e19a15e98cf815da503e4fb69b (diff)
downloadspark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.gz
spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.tar.bz2
spark-5cc725a0e3ef523affae8ff54dd74707e49d64e3.zip
Merge branch 'master' into ec2-updates
Conflicts: ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh
Diffstat (limited to 'conf')
-rw-r--r--conf/fairscheduler.xml.template15
-rw-r--r--conf/metrics.properties.template87
-rwxr-xr-xconf/spark-env.sh.template18
3 files changed, 108 insertions, 12 deletions
diff --git a/conf/fairscheduler.xml.template b/conf/fairscheduler.xml.template
new file mode 100644
index 0000000000..04a6b418dc
--- /dev/null
+++ b/conf/fairscheduler.xml.template
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<allocations>
+<pool name="production">
+ <minShare>2</minShare>
+ <weight>1</weight>
+ <schedulingMode>FAIR</schedulingMode>
+</pool>
+<pool name="test">
+ <minShare>3</minShare>
+ <weight>2</weight>
+ <schedulingMode>FIFO</schedulingMode>
+</pool>
+<pool name="data">
+</pool>
+</allocations>
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
new file mode 100644
index 0000000000..0486ca4c79
--- /dev/null
+++ b/conf/metrics.properties.template
@@ -0,0 +1,87 @@
+# syntax: [instance].[sink|source].[name].[options]
+
+# "instance" specify "who" (the role) use metrics system. In spark there are
+# several roles like master, worker, executor, driver, these roles will
+# create metrics system for monitoring. So instance represents these roles.
+# Currently in Spark, several instances have already implemented: master,
+# worker, executor, driver.
+#
+# [instance] field can be "master", "worker", "executor", "driver", which means
+# only the specified instance has this property.
+# a wild card "*" can be used to represent instance name, which means all the
+# instances will have this property.
+#
+# "source" specify "where" (source) to collect metrics data. In metrics system,
+# there exists two kinds of source:
+# 1. Spark internal source, like MasterSource, WorkerSource, etc, which will
+# collect Spark component's internal state, these sources are related to
+# instance and will be added after specific metrics system is created.
+# 2. Common source, like JvmSource, which will collect low level state, is
+# configured by configuration and loaded through reflection.
+#
+# "sink" specify "where" (destination) to output metrics data to. Several sinks
+# can be coexisted and flush metrics to all these sinks.
+#
+# [sink|source] field specify this property is source related or sink, this
+# field can only be source or sink.
+#
+# [name] field specify the name of source or sink, this is custom defined.
+#
+# [options] field is the specific property of this source or sink, this source
+# or sink is responsible for parsing this property.
+#
+# Notes:
+# 1. Sinks should be added through configuration, like console sink, class
+# full name should be specified by class property.
+# 2. Some sinks can specify polling period, like console sink, which is 10 seconds,
+# it should be attention minimal polling period is 1 seconds, any period
+# below than 1s is illegal.
+# 3. Wild card property can be overlapped by specific instance property, for
+# example, *.sink.console.period can be overlapped by master.sink.console.period.
+# 4. A metrics specific configuration
+# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+# added to Java property using -Dspark.metrics.conf=xxx if you want to
+# customize metrics system, or you can put it in ${SPARK_HOME}/conf,
+# metrics system will search and load it automatically.
+
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=spark.metrics.sink.ConsoleSink
+
+# Polling period for ConsoleSink
+#*.sink.console.period=10
+
+#*.sink.console.unit=seconds
+
+# Master instance overlap polling period
+#master.sink.console.period=15
+
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances
+#*.sink.csv.class=spark.metrics.sink.CsvSink
+
+# Polling period for CsvSink
+#*.sink.csv.period=1
+
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Worker instance overlap polling period
+#worker.sink.csv.period=10
+
+#worker.sink.csv.unit=minutes
+
+# Enable jvm source for instance master, worker, driver and executor
+#master.source.jvm.class=spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=spark.metrics.source.JvmSource
+
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 37565ca827..b8936314ec 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -3,8 +3,10 @@
# This file contains environment variables required to run Spark. Copy it as
# spark-env.sh and edit that to configure Spark for your site. At a minimum,
# the following two variables should be set:
-# - MESOS_NATIVE_LIBRARY, to point to your Mesos native library (libmesos.so)
-# - SCALA_HOME, to point to your Scala installation
+# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to
+# point to the directory for Scala library JARs (if you install Scala as a
+# Debian or RPM package, these are in a separate path, often /usr/share/java)
+# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
#
# If using the standalone deploy mode, you can also set variables for it:
# - SPARK_MASTER_IP, to bind the master to a different IP address
@@ -12,14 +14,6 @@
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
-# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes to be spawned on every slave machine
-#
-# Finally, Spark also relies on the following variables, but these can be set
-# on just the *master* (i.e. in your driver program), and will automatically
-# be propagated to workers:
-# - SPARK_MEM, to change the amount of memory used per node (this should
-# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g)
-# - SPARK_CLASSPATH, to add elements to Spark's classpath
-# - SPARK_JAVA_OPTS, to add JVM options
-# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
+# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes
+# to be spawned on every slave machine