aboutsummaryrefslogtreecommitdiff
path: root/conf
diff options
context:
space:
mode:
Diffstat (limited to 'conf')
-rw-r--r--conf/metrics.properties.template90
-rw-r--r--conf/slaves2
-rwxr-xr-xconf/spark-env.sh.template20
3 files changed, 102 insertions, 10 deletions
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
new file mode 100644
index 0000000000..6c36f3cca4
--- /dev/null
+++ b/conf/metrics.properties.template
@@ -0,0 +1,90 @@
+# syntax: [instance].sink|source.[name].[options]=[value]
+
+# This file configures Spark's internal metrics system. The metrics system is
+# divided into instances which correspond to internal components.
+# Each instance can be configured to report its metrics to one or more sinks.
+# Accepted values for [instance] are "master", "worker", "executor", "driver",
+# and "applications". A wild card "*" can be used as an instance name, in
+# which case all instances will inherit the supplied property.
+#
+# Within an instance, a "source" specifies a particular set of grouped metrics.
+# there are two kinds of sources:
+# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
+# collect a Spark component's internal state. Each instance is paired with a
+# Spark source that is added automatically.
+# 2. Common sources, like JvmSource, which will collect low level state.
+# These can be added through configuration options and are then loaded
+# using reflection.
+#
+# A "sink" specifies where metrics are delivered to. Each instance can be
+# assigned one or more sinks.
+#
+# The sink|source field specifies whether the property relates to a sink or
+# source.
+#
+# The [name] field specifies the name of source or sink.
+#
+# The [options] field is the specific property of this source or sink. The
+# source or sink is responsible for parsing this property.
+#
+# Notes:
+# 1. To add a new sink, set the "class" option to a fully qualified class
+# name (see examples below).
+# 2. Some sinks involve a polling period. The minimum allowed polling period
+# is 1 second.
+# 3. Wild card properties can be overridden by more specific properties.
+# For example, master.sink.console.period takes precedence over
+# *.sink.console.period.
+# 4. A metrics specific configuration
+# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+# added to Java properties using -Dspark.metrics.conf=xxx if you want to
+# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
+# and it will be loaded automatically.
+# 5. MetricsServlet is added by default as a sink in master, worker and client
+# driver, you can send http request "/metrics/json" to get a snapshot of all the
+# registered metrics in json format. For master, requests "/metrics/master/json" and
+# "/metrics/applications/json" can be sent seperately to get metrics snapshot of
+# instance master and applications. MetricsServlet may not be configured by self.
+#
+
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=spark.metrics.sink.ConsoleSink
+
+# Polling period for ConsoleSink
+#*.sink.console.period=10
+
+#*.sink.console.unit=seconds
+
+# Master instance overlap polling period
+#master.sink.console.period=15
+
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances
+#*.sink.csv.class=spark.metrics.sink.CsvSink
+
+# Polling period for CsvSink
+#*.sink.csv.period=1
+
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Worker instance overlap polling period
+#worker.sink.csv.period=10
+
+#worker.sink.csv.unit=minutes
+
+# Enable jvm source for instance master, worker, driver and executor
+#master.source.jvm.class=spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=spark.metrics.source.JvmSource
+
diff --git a/conf/slaves b/conf/slaves
index 6e315a8540..da0a01343d 100644
--- a/conf/slaves
+++ b/conf/slaves
@@ -1,2 +1,2 @@
-# A Spark Worker will be started on each of the machines listes below.
+# A Spark Worker will be started on each of the machines listed below.
localhost \ No newline at end of file
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b8936314ec..0a35ee7c79 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -1,19 +1,21 @@
#!/usr/bin/env bash
# This file contains environment variables required to run Spark. Copy it as
-# spark-env.sh and edit that to configure Spark for your site. At a minimum,
-# the following two variables should be set:
-# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to
-# point to the directory for Scala library JARs (if you install Scala as a
-# Debian or RPM package, these are in a separate path, often /usr/share/java)
+# spark-env.sh and edit that to configure Spark for your site.
+#
+# The following variables can be set in this file:
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
+# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
+# we recommend setting app-wide options in the application's driver program.
+# Examples of node-specific options : -Dspark.local.dir, GC options
+# Examples of app-wide options : -Dspark.serializer
#
-# If using the standalone deploy mode, you can also set variables for it:
-# - SPARK_MASTER_IP, to bind the master to a different IP address
+# If using the standalone deploy mode, you can also set variables for it here:
+# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
-# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes
-# to be spawned on every slave machine
+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node