# syntax: [instance].sink|source.[name].[options]=[value] # This file configures Spark's internal metrics system. The metrics system is # divided into instances which correspond to internal components. # Each instance can be configured to report its metrics to one or more sinks. # Accepted values for [instance] are "master", "worker", "executor", "driver", # and "applications". A wild card "*" can be used as an instance name, in # which case all instances will inherit the supplied property. # # Within an instance, a "source" specifies a particular set of grouped metrics. # there are two kinds of sources: # 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will # collect a Spark component's internal state. Each instance is paired with a # Spark source that is added automatically. # 2. Common sources, like JvmSource, which will collect low level state. # These can be added through configuration options and are then loaded # using reflection. # # A "sink" specifies where metrics are delivered to. Each instance can be # assigned one or more sinks. # # The sink|source field specifies whether the property relates to a sink or # source. # # The [name] field specifies the name of source or sink. # # The [options] field is the specific property of this source or sink. The # source or sink is responsible for parsing this property. # # Notes: # 1. To add a new sink, set the "class" option to a fully qualified class # name (see examples below). # 2. Some sinks involve a polling period. The minimum allowed polling period # is 1 second. # 3. Wild card properties can be overridden by more specific properties. # For example, master.sink.console.period takes precedence over # *.sink.console.period. # 4. A metrics specific configuration # "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be # added to Java properties using -Dspark.metrics.conf=xxx if you want to # customize metrics system. You can also put the file in ${SPARK_HOME}/conf # and it will be loaded automatically. # 5. MetricsServlet is added by default as a sink in master, worker and client # driver, you can send http request "/metrics/json" to get a snapshot of all the # registered metrics in json format. For master, requests "/metrics/master/json" and # "/metrics/applications/json" can be sent seperately to get metrics snapshot of # instance master and applications. MetricsServlet may not be configured by self. # ## List of available sinks and their properties. # org.apache.spark.metrics.sink.ConsoleSink # Name: Default: Description: # period 10 Poll period # unit seconds Units of poll period # org.apache.spark.metrics.sink.CSVSink # Name: Default: Description: # period 10 Poll period # unit seconds Units of poll period # directory /tmp Where to store CSV files # org.apache.spark.metrics.sink.GangliaSink # Name: Default: Description: # host NONE Hostname or multicast group of Ganglia server # port NONE Port of Ganglia server(s) # period 10 Poll period # unit seconds Units of poll period # ttl 1 TTL of messages sent by Ganglia # mode multicast Ganglia network mode ('unicast' or 'mulitcast') # org.apache.spark.metrics.sink.JmxSink # org.apache.spark.metrics.sink.MetricsServlet # Name: Default: Description: # path VARIES* Path prefix from the web server root # sample false Whether to show entire set of samples for histograms ('false' or 'true') # # * Default path is /metrics/json for all instances except the master. The master has two paths: # /metrics/aplications/json # App information # /metrics/master/json # Master information # org.apache.spark.metrics.sink.GraphiteSink # Name: Default: Description: # host NONE Hostname of Graphite server # port NONE Port of Graphite server # period 10 Poll period # unit seconds Units of poll period # prefix EMPTY STRING Prefix to prepend to metric name ## Examples # Enable JmxSink for all instances by class name #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink # Enable ConsoleSink for all instances by class name #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink # Polling period for ConsoleSink #*.sink.console.period=10 #*.sink.console.unit=seconds # Master instance overlap polling period #master.sink.console.period=15 #master.sink.console.unit=seconds # Enable CsvSink for all instances #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink # Polling period for CsvSink #*.sink.csv.period=1 #*.sink.csv.unit=minutes # Polling directory for CsvSink #*.sink.csv.directory=/tmp/ # Worker instance overlap polling period #worker.sink.csv.period=10 #worker.sink.csv.unit=minutes # Enable jvm source for instance master, worker, driver and executor #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource