aboutsummaryrefslogtreecommitdiff
path: root/third_party/hadoop-0.20.0/conf/capacity-scheduler.xml
blob: d22a3964b42dc705c9ae53aac5693f5dd12facd0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
<?xml version="1.0"?>

<!-- This is the configuration file for the resource manager in Hadoop. -->
<!-- You can configure various scheduling parameters related to queues. -->
<!-- The properties for a queue follow a naming convention,such as, -->
<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->

<configuration>

  <property>
    <name>mapred.capacity-scheduler.queue.default.guaranteed-capacity</name>
    <value>100</value>
    <description>Percentage of the number of slots in the cluster that are
      guaranteed to be available for jobs in this queue.
    </description>    
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.queue.default.reclaim-time-limit</name>
    <value>300</value>
    <description>The amount of time, in seconds, before which 
      resources distributed to other queues will be reclaimed.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
    <value>false</value>
    <description>If true, priorities of jobs will be taken into 
      account in scheduling decisions.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
    <value>100</value>
    <description> Each queue enforces a limit on the percentage of resources 
    allocated to a user at any given time, if there is competition for them. 
    This user limit can vary between a minimum and maximum value. The former
    depends on the number of users who have submitted jobs, and the latter is
    set to this property value. For example, suppose the value of this 
    property is 25. If two users have submitted jobs to a queue, no single 
    user can use more than 50% of the queue resources. If a third user submits
    a job, no single user can use more than 33% of the queue resources. With 4 
    or more users, no user can use more than 25% of the queue's resources. A 
    value of 100 implies no user limits are imposed. 
    </description>
  </property>
  <property>
    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
    <value>2</value>
    <description>The maximum number of jobs to be pre-initialized for a user
    of the job queue.
    </description>
  </property>
  
  
  <property>
    <name>mapred.capacity-scheduler.reclaimCapacity.interval</name>
    <value>5</value>
    <description>The time interval, in seconds, between which the scheduler
     periodically determines whether capacity needs to be reclaimed for 
     any queue.
    </description>
  </property>
  
  <!-- The default configuration settings for the capacity task scheduler -->
  <!-- The default values would be applied to all the queues which don't have -->
  <!-- the appropriate property for the particular queue -->
  <property>
    <name>mapred.capacity-scheduler.default-reclaim-time-limit</name>
    <value>300</value>
    <description>The amount of time, in seconds, before which 
    resources distributed to other queues will be reclaimed by default
    in a job queue.
    </description>
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.default-supports-priority</name>
    <value>false</value>
    <description>If true, priorities of jobs will be taken into 
      account in scheduling decisions by default in a job queue.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</name>
    <value>-1</value>
    <description>If mapred.task.maxpmem is set to -1, this configuration will
      be used to calculate job's physical memory requirements as a percentage of
      the job's virtual memory requirements set via mapred.task.maxvmem. This
      property thus provides default value of physical memory for job's that
      don't explicitly specify physical memory requirements.

      If not explicitly set to a valid value, scheduler will not consider
      physical memory for scheduling even if virtual memory based scheduling is
      enabled(by setting valid values for both mapred.task.default.maxvmem and
      mapred.task.limit.maxvmem).
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.task.limit.maxpmem</name>
    <value>-1</value>
    <description>Configuration that provides an upper limit on the maximum
      physical memory that can be specified by a job. The job configuration
      mapred.task.maxpmem should be less than this value. If not, the job will
      be rejected by the scheduler.
      
      If it is set to -1, scheduler will not consider physical memory for
      scheduling even if virtual memory based scheduling is enabled(by setting
      valid values for both mapred.task.default.maxvmem and
      mapred.task.limit.maxvmem).
    </description>
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
    <value>100</value>
    <description>The percentage of the resources limited to a particular user
      for the job queue at any given point of time by default.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
    <value>2</value>
    <description>The maximum number of jobs to be pre-initialized for a user
    of the job queue.
    </description>
  </property>


  <!-- Capacity scheduler Job Initialization configuration parameters -->
  <property>
    <name>mapred.capacity-scheduler.init-poll-interval</name>
    <value>5000</value>
    <description>The amount of time in miliseconds which is used to poll 
    the job queues for jobs to initialize.
    </description>
  </property>
  <property>
    <name>mapred.capacity-scheduler.init-worker-threads</name>
    <value>5</value>
    <description>Number of worker threads which would be used by
    Initialization poller to initialize jobs in a set of queue.
    If number mentioned in property is equal to number of job queues
    then a single thread would initialize jobs in a queue. If lesser
    then a thread would get a set of queues assigned. If the number
    is greater then number of threads would be equal to number of 
    job queues.
    </description>
  </property>

</configuration>