From 89fcd96702d6aa963192f0221922d2702820048f Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sat, 13 Nov 2010 22:07:08 -0800 Subject: Initial work to get Spark compiling with SBT 0.7.5 RC0 --- lib/hadoop-0.20.0/conf/capacity-scheduler.xml | 156 +++++++++++++++++++++++ lib/hadoop-0.20.0/conf/configuration.xsl | 24 ++++ lib/hadoop-0.20.0/conf/core-site.xml | 8 ++ lib/hadoop-0.20.0/conf/hadoop-env.sh | 54 ++++++++ lib/hadoop-0.20.0/conf/hadoop-metrics.properties | 40 ++++++ lib/hadoop-0.20.0/conf/hadoop-policy.xml | 97 ++++++++++++++ lib/hadoop-0.20.0/conf/hdfs-site.xml | 8 ++ lib/hadoop-0.20.0/conf/log4j.properties | 94 ++++++++++++++ lib/hadoop-0.20.0/conf/mapred-site.xml | 8 ++ lib/hadoop-0.20.0/conf/masters | 1 + lib/hadoop-0.20.0/conf/slaves | 1 + lib/hadoop-0.20.0/conf/ssl-client.xml.example | 57 +++++++++ lib/hadoop-0.20.0/conf/ssl-server.xml.example | 55 ++++++++ 13 files changed, 603 insertions(+) create mode 100644 lib/hadoop-0.20.0/conf/capacity-scheduler.xml create mode 100644 lib/hadoop-0.20.0/conf/configuration.xsl create mode 100644 lib/hadoop-0.20.0/conf/core-site.xml create mode 100644 lib/hadoop-0.20.0/conf/hadoop-env.sh create mode 100644 lib/hadoop-0.20.0/conf/hadoop-metrics.properties create mode 100644 lib/hadoop-0.20.0/conf/hadoop-policy.xml create mode 100644 lib/hadoop-0.20.0/conf/hdfs-site.xml create mode 100644 lib/hadoop-0.20.0/conf/log4j.properties create mode 100644 lib/hadoop-0.20.0/conf/mapred-site.xml create mode 100644 lib/hadoop-0.20.0/conf/masters create mode 100644 lib/hadoop-0.20.0/conf/slaves create mode 100644 lib/hadoop-0.20.0/conf/ssl-client.xml.example create mode 100644 lib/hadoop-0.20.0/conf/ssl-server.xml.example (limited to 'lib/hadoop-0.20.0/conf') diff --git a/lib/hadoop-0.20.0/conf/capacity-scheduler.xml b/lib/hadoop-0.20.0/conf/capacity-scheduler.xml new file mode 100644 index 0000000000..d22a3964b4 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/capacity-scheduler.xml @@ -0,0 +1,156 @@ + + + + + + + + + + + mapred.capacity-scheduler.queue.default.guaranteed-capacity + 100 + Percentage of the number of slots in the cluster that are + guaranteed to be available for jobs in this queue. + + + + + mapred.capacity-scheduler.queue.default.reclaim-time-limit + 300 + The amount of time, in seconds, before which + resources distributed to other queues will be reclaimed. + + + + + mapred.capacity-scheduler.queue.default.supports-priority + false + If true, priorities of jobs will be taken into + account in scheduling decisions. + + + + + mapred.capacity-scheduler.queue.default.minimum-user-limit-percent + 100 + Each queue enforces a limit on the percentage of resources + allocated to a user at any given time, if there is competition for them. + This user limit can vary between a minimum and maximum value. The former + depends on the number of users who have submitted jobs, and the latter is + set to this property value. For example, suppose the value of this + property is 25. If two users have submitted jobs to a queue, no single + user can use more than 50% of the queue resources. If a third user submits + a job, no single user can use more than 33% of the queue resources. With 4 + or more users, no user can use more than 25% of the queue's resources. A + value of 100 implies no user limits are imposed. + + + + mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user + 2 + The maximum number of jobs to be pre-initialized for a user + of the job queue. + + + + + + mapred.capacity-scheduler.reclaimCapacity.interval + 5 + The time interval, in seconds, between which the scheduler + periodically determines whether capacity needs to be reclaimed for + any queue. + + + + + + + + mapred.capacity-scheduler.default-reclaim-time-limit + 300 + The amount of time, in seconds, before which + resources distributed to other queues will be reclaimed by default + in a job queue. + + + + + mapred.capacity-scheduler.default-supports-priority + false + If true, priorities of jobs will be taken into + account in scheduling decisions by default in a job queue. + + + + + mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem + -1 + If mapred.task.maxpmem is set to -1, this configuration will + be used to calculate job's physical memory requirements as a percentage of + the job's virtual memory requirements set via mapred.task.maxvmem. This + property thus provides default value of physical memory for job's that + don't explicitly specify physical memory requirements. + + If not explicitly set to a valid value, scheduler will not consider + physical memory for scheduling even if virtual memory based scheduling is + enabled(by setting valid values for both mapred.task.default.maxvmem and + mapred.task.limit.maxvmem). + + + + + mapred.capacity-scheduler.task.limit.maxpmem + -1 + Configuration that provides an upper limit on the maximum + physical memory that can be specified by a job. The job configuration + mapred.task.maxpmem should be less than this value. If not, the job will + be rejected by the scheduler. + + If it is set to -1, scheduler will not consider physical memory for + scheduling even if virtual memory based scheduling is enabled(by setting + valid values for both mapred.task.default.maxvmem and + mapred.task.limit.maxvmem). + + + + + mapred.capacity-scheduler.default-minimum-user-limit-percent + 100 + The percentage of the resources limited to a particular user + for the job queue at any given point of time by default. + + + + + mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user + 2 + The maximum number of jobs to be pre-initialized for a user + of the job queue. + + + + + + + mapred.capacity-scheduler.init-poll-interval + 5000 + The amount of time in miliseconds which is used to poll + the job queues for jobs to initialize. + + + + mapred.capacity-scheduler.init-worker-threads + 5 + Number of worker threads which would be used by + Initialization poller to initialize jobs in a set of queue. + If number mentioned in property is equal to number of job queues + then a single thread would initialize jobs in a queue. If lesser + then a thread would get a set of queues assigned. If the number + is greater then number of threads would be equal to number of + job queues. + + + + diff --git a/lib/hadoop-0.20.0/conf/configuration.xsl b/lib/hadoop-0.20.0/conf/configuration.xsl new file mode 100644 index 0000000000..377cdbeb93 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/configuration.xsl @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + +

name	value	description

+ + + + diff --git a/lib/hadoop-0.20.0/conf/core-site.xml b/lib/hadoop-0.20.0/conf/core-site.xml new file mode 100644 index 0000000000..970c8fe0e8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/core-site.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/lib/hadoop-0.20.0/conf/hadoop-env.sh b/lib/hadoop-0.20.0/conf/hadoop-env.sh new file mode 100644 index 0000000000..ada5bef1c7 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/hadoop-env.sh @@ -0,0 +1,54 @@ +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. Required. +# export JAVA_HOME=/usr/lib/j2sdk1.5-sun + +# Extra Java CLASSPATH elements. Optional. +# export HADOOP_CLASSPATH= + +# The maximum amount of heap to use, in MB. Default is 1000. +# export HADOOP_HEAPSIZE=2000 + +# Extra Java runtime options. Empty by default. +# export HADOOP_OPTS=-server + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" +export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS" +export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS" +export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS" +export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS" +# export HADOOP_TASKTRACKER_OPTS= +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +# export HADOOP_CLIENT_OPTS + +# Extra ssh options. Empty by default. +# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR" + +# Where log files are stored. $HADOOP_HOME/logs by default. +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs + +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves + +# host:path where hadoop code should be rsync'd from. Unset by default. +# export HADOOP_MASTER=master:/home/$USER/src/hadoop + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HADOOP_SLAVE_SLEEP=0.1 + +# The directory where pid files are stored. /tmp by default. +# export HADOOP_PID_DIR=/var/hadoop/pids + +# A string representing this instance of hadoop. $USER by default. +# export HADOOP_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HADOOP_NICENESS=10 diff --git a/lib/hadoop-0.20.0/conf/hadoop-metrics.properties b/lib/hadoop-0.20.0/conf/hadoop-metrics.properties new file mode 100644 index 0000000000..d04dffc438 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/hadoop-metrics.properties @@ -0,0 +1,40 @@ +# Configuration of the "dfs" context for null +dfs.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "dfs" context for file +#dfs.class=org.apache.hadoop.metrics.file.FileContext +#dfs.period=10 +#dfs.fileName=/tmp/dfsmetrics.log + +# Configuration of the "dfs" context for ganglia +# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# dfs.period=10 +# dfs.servers=localhost:8649 + + +# Configuration of the "mapred" context for null +mapred.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "mapred" context for file +#mapred.class=org.apache.hadoop.metrics.file.FileContext +#mapred.period=10 +#mapred.fileName=/tmp/mrmetrics.log + +# Configuration of the "mapred" context for ganglia +# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# mapred.period=10 +# mapred.servers=localhost:8649 + + +# Configuration of the "jvm" context for null +jvm.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "jvm" context for file +#jvm.class=org.apache.hadoop.metrics.file.FileContext +#jvm.period=10 +#jvm.fileName=/tmp/jvmmetrics.log + +# Configuration of the "jvm" context for ganglia +# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# jvm.period=10 +# jvm.servers=localhost:8649 diff --git a/lib/hadoop-0.20.0/conf/hadoop-policy.xml b/lib/hadoop-0.20.0/conf/hadoop-policy.xml new file mode 100644 index 0000000000..ef48f2bbed --- /dev/null +++ b/lib/hadoop-0.20.0/conf/hadoop-policy.xml @@ -0,0 +1,97 @@ + + + + + + + + security.client.protocol.acl + * + ACL for ClientProtocol, which is used by user code + via the DistributedFileSystem. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.client.datanode.protocol.acl + * + ACL for ClientDatanodeProtocol, the client-to-datanode protocol + for block recovery. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.datanode.protocol.acl + * + ACL for DatanodeProtocol, which is used by datanodes to + communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.inter.datanode.protocol.acl + * + ACL for InterDatanodeProtocol, the inter-datanode protocol + for updating generation timestamp. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.namenode.protocol.acl + * + ACL for NamenodeProtocol, the protocol used by the secondary + namenode to communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.inter.tracker.protocol.acl + * + ACL for InterTrackerProtocol, used by the tasktrackers to + communicate with the jobtracker. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.job.submission.protocol.acl + * + ACL for JobSubmissionProtocol, used by job clients to + communciate with the jobtracker for job submission, querying job status etc. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.task.umbilical.protocol.acl + * + ACL for TaskUmbilicalProtocol, used by the map and reduce + tasks to communicate with the parent tasktracker. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.refresh.policy.protocol.acl + * + ACL for RefreshAuthorizationPolicyProtocol, used by the + dfsadmin and mradmin commands to refresh the security policy in-effect. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + diff --git a/lib/hadoop-0.20.0/conf/hdfs-site.xml b/lib/hadoop-0.20.0/conf/hdfs-site.xml new file mode 100644 index 0000000000..970c8fe0e8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/hdfs-site.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/lib/hadoop-0.20.0/conf/log4j.properties b/lib/hadoop-0.20.0/conf/log4j.properties new file mode 100644 index 0000000000..d797df6dab --- /dev/null +++ b/lib/hadoop-0.20.0/conf/log4j.properties @@ -0,0 +1,94 @@ +# Define some default values that can be overridden by system properties +hadoop.root.logger=INFO,console +hadoop.log.dir=. +hadoop.log.file=hadoop.log + +# Define the root logger to the system property "hadoop.root.logger". +log4j.rootLogger=${hadoop.root.logger}, EventCounter + +# Logging Threshold +log4j.threshhold=ALL + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Rollver at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +# 30-day backup +#log4j.appender.DRFA.MaxBackupIndex=30 +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# console +# Add "console" to rootlogger above if you want to use this +# + +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n + +# +# TaskLog Appender +# + +#Default values +hadoop.tasklog.taskid=null +hadoop.tasklog.noKeepSplits=4 +hadoop.tasklog.totalLogFileSize=100 +hadoop.tasklog.purgeLogSplits=true +hadoop.tasklog.logsRetainHours=12 + +log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender +log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} +log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} + +log4j.appender.TLA.layout=org.apache.log4j.PatternLayout +log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n + +# +# Rolling File Appender +# + +#log4j.appender.RFA=org.apache.log4j.RollingFileAppender +#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Logfile size and and 30-day backups +#log4j.appender.RFA.MaxFileSize=1MB +#log4j.appender.RFA.MaxBackupIndex=30 + +#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + +# +# FSNamesystem Audit logging +# All audit events are logged at INFO level +# +log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN + +# Custom Logging levels + +#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG +#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG +#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG + +# Jets3t library +log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR + +# +# Event Counter Appender +# Sends counts of logging messages at different severity levels to Hadoop Metrics. +# +log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter diff --git a/lib/hadoop-0.20.0/conf/mapred-site.xml b/lib/hadoop-0.20.0/conf/mapred-site.xml new file mode 100644 index 0000000000..970c8fe0e8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/mapred-site.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/lib/hadoop-0.20.0/conf/masters b/lib/hadoop-0.20.0/conf/masters new file mode 100644 index 0000000000..2fbb50c4a8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/masters @@ -0,0 +1 @@ +localhost diff --git a/lib/hadoop-0.20.0/conf/slaves b/lib/hadoop-0.20.0/conf/slaves new file mode 100644 index 0000000000..2fbb50c4a8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/slaves @@ -0,0 +1 @@ +localhost diff --git a/lib/hadoop-0.20.0/conf/ssl-client.xml.example b/lib/hadoop-0.20.0/conf/ssl-client.xml.example new file mode 100644 index 0000000000..ec3fd41fa8 --- /dev/null +++ b/lib/hadoop-0.20.0/conf/ssl-client.xml.example @@ -0,0 +1,57 @@ + + + + + + + ssl.client.truststore.location + + Truststore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.truststore.password + + Optional. Default value is "". + + + + + ssl.client.truststore.type + jks + Optional. Default value is "jks". + + + + + ssl.client.keystore.location + + Keystore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.keystore.password + + Optional. Default value is "". + + + + + ssl.client.keystore.keypassword + + Optional. Default value is "". + + + + + ssl.client.keystore.type + jks + Optional. Default value is "jks". + + + + diff --git a/lib/hadoop-0.20.0/conf/ssl-server.xml.example b/lib/hadoop-0.20.0/conf/ssl-server.xml.example new file mode 100644 index 0000000000..22e9cb0ebb --- /dev/null +++ b/lib/hadoop-0.20.0/conf/ssl-server.xml.example @@ -0,0 +1,55 @@ + + + + + + + ssl.server.truststore.location + + Truststore to be used by NN and DN. Must be specified. + + + + + ssl.server.truststore.password + + Optional. Default value is "". + + + + + ssl.server.truststore.type + jks + Optional. Default value is "jks". + + + + + ssl.server.keystore.location + + Keystore to be used by NN and DN. Must be specified. + + + + + ssl.server.keystore.password + + Must be specified. + + + + + ssl.server.keystore.keypassword + + Must be specified. + + + + + ssl.server.keystore.type + jks + Optional. Default value is "jks". + + + + -- cgit v1.2.3