diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2010-08-16 23:16:35 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2010-08-16 23:16:35 -0700 |
commit | 75b2ca10c3d5a4e97536c133ad0ef8e22b95ceba (patch) | |
tree | bdac90c4f019530ee2b14d3919e5a97907d8f002 /third_party | |
parent | 1cbffaae6f8a8511a4e239bcdbd1785cda1ac6cc (diff) | |
download | spark-75b2ca10c3d5a4e97536c133ad0ef8e22b95ceba.tar.gz spark-75b2ca10c3d5a4e97536c133ad0ef8e22b95ceba.tar.bz2 spark-75b2ca10c3d5a4e97536c133ad0ef8e22b95ceba.zip |
Removed HOD from included Hadoop because it was making the project count
as Python on GitHub :|.
Diffstat (limited to 'third_party')
70 files changed, 0 insertions, 15196 deletions
diff --git a/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt b/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt deleted file mode 100644 index 95cf0710fe..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt +++ /dev/null @@ -1,248 +0,0 @@ -HOD Change Log - -Release 0.20.0 - (unreleased changes) - - INCOMPATIBLE CHANGES - - NEW FEATURES - - IMPROVEMENTS - - HADOOP-4705. Grant read permissions for files/directories - created by HOD. (Peeyush Bishnoi via yhemanth) - - HADOOP-4937. Include ringmaster RPC port in the notes - attribute. (Peeyush Bishnoi via yhemanth) - - OPTIMIZATIONS - - BUG FIXES - - HADOOP-4782. Revert umask changes in HADOOP-4705 so that - files are still securely created. (Peeyush Bishnoi via - yhemanth) - -Release 0.19.0 - 2008-11-18 - - INCOMPATIBLE CHANGES - - NEW FEATURES - - HADOOP-3695. Provide an ability to start multiple workers per node. - (Vinod Kumar Vavilapalli via yhemanth) - - IMPROVEMENTS - - OPTIMIZATIONS - - BUG FIXES - - HADOOP-3959. Pass options specified in resource_manager.options to - job submission. - (Craig Macdonald and Vinod Kumar Vavilapalli via yhemanth) - - HADOOP-3814. Remove generation of dfs.client.buffer.dir for the generated - hadoop-site.xml. (Vinod Kumar Vavilapalli via acmurthy) - -Release 0.18.2 - Unreleased - - BUG FIXES - - HADOOP-3786. Use HDFS instead of DFS in all docs and hyperlink to Torque. - (Vinod Kumar Vavilapalli via acmurthy) - -Release 0.18.1 - 2008-09-17 - - INCOMPATIBLE CHANGES - - HADOOP-4060. Modified HOD to rotate log files on the client side. - (Vinod Kumar Vavilapalli via yhemanth) - - IMPROVEMENTS - - HADOOP-4145. Add an accounting plugin (script) for HOD. - (Hemanth Yamijala via nigel) - - BUG FIXES - - HADOOP-4161. Fixed bug in HOD cleanup that had the potential to - hang clients. (Vinod Kumar Vavilapalli via nigel) - -Release 0.18.0 - 2008-08-19 - - INCOMPATIBLE CHANGES - - HADOOP-3483. Modified HOD to create a cluster directory if one does not - exist and to auto-deallocate a cluster while reallocating it, if it is - already dead. (Hemanth Yamijala via mukund) - - HADOOP-3184. Modified HOD to handle master failures on bad nodes by trying - to bring them up on another node in the ring. (Hemanth Yamijala via ddas) - - HADOOP-3610. Modified HOD to create cluster directory if one does not - exist when using the script option. (Vinod Kumar Vavilapalli via yhemanth) - - HADOOP-3808. Modified HOD to include RPC port of the JobTracker - into the notes attribute of the resource manager. (yhemanth) - - NEW FEATURES - - IMPROVEMENTS - - HADOOP-3376: Provide a mechanism to detect and handle violations to - resource manager limits. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-3151. Improves error messages when reporting failures due to - incorrect parameters passed to HOD. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on - compute nodes to the submit node running the HOD client, so users have good - feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund) - - HADOOP-3505. Updated HOD documentation with changes made for Hadoop - 0.18. (Vinod Kumar Vavilapalli via yhemanth) - - BUG FIXES - - HADOOP-2961. Avoids unnecessary checks for some configuration parameters - related to service configuration. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-3523. Fixes auto-deallocation of cluster if job id is not found in - Torque's job list (Hemanth Yamijala via ddas) - - HADOOP-3531. Fixes a bug related to handling JobTracker failures because of - timing issues on slow nodes. (Hemanth Yamijala via ddas) - - HADOOP-3564. HOD generates values for the parameter dfs.datanode.ipc.address - in the hadoop-site.xml created on datanodes. - (Vinod Kumar Vavilapalli via ddas) - - HADOOP-3076. Fixes a bug related to a spurious message about the - script.exitcode file when a cluster directory is specified as a relative - path. (Vinod Kumar Vavilapalli via yhemanth) - - HADOOP-3668. Makes editorial changes to HOD documentation. - (Vinod Kumar Vavilapalli via yhemanth) - - HADOOP-3703. Fixes logcondense.py to use the new format of hadoop dfs -lsr - command line output format. (Vinod Kumar Vavilapalli via yhemanth) - -Release 0.17.3 - Unreleased - - BUG FIXES - - HADOOP-3217. Decrease the rate at which the hod queries the resource - manager for job status. (Hemanth Yamijala via acmurthy) - -Release 0.17.0 - 2008-05-18 - - INCOMPATIBLE CHANGES - - HADOOP-3137. Modified build script to pick up version automatically - from Hadoop build. (yhemanth) - - IMPROVEMENTS - - HADOOP-2775. Adds unit test framework for HOD. - (Vinod Kumar Vavilapalli via ddas). - - HADOOP-2848. [HOD]hod -o list and deallocate works even after deleting - the cluster directory. (Hemanth Yamijala via ddas) - - HADOOP-2899. [HOD] Cleans up hdfs:///mapredsystem directory after - deallocation. (Hemanth Yamijala via ddas) - - HADOOP-2796. Enables distinguishing exit codes from user code vis-a-vis - HOD's exit code. (Hemanth Yamijala via ddas) - - HADOOP-2947. HOD redirects stdout and stderr of daemons to assist - getting stack traces. (Vinod Kumar Vavilapalli via yhemanth) - - BUG FIXES - - HADOOP-2924. Fixes an address problem to do with TaskTracker binding - to an address. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2970. Fixes a problem to do with Wrong class definition for - hodlib/Hod/hod.py for Python < 2.5.1. - (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2783. Fixes a problem to do with import in - hod/hodlib/Common/xmlrpc.py. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2936. Fixes HOD in a way that it generates hdfs://host:port on the - client side configs. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2983. [HOD] Fixes the problem - local_fqdn() returns None when - gethostbyname_ex doesnt return any FQDNs. (Craig Macdonald via ddas) - - HADOOP-2982. Fixes a problem in the way HOD looks for free nodes. - (Hemanth Yamijala via ddas) - - HADOOP-2855. Fixes the way HOD handles relative paths for cluster - directory, script file and other options. - (Vinod Kumar Vavilapalli via yhemanth) - - HADOOP-3153. Fixes the way HOD handles allocation if the user has no - permissions to update the clusters state file. - (Vinod Kumar Vavilapalli via yhemanth) - -Release 0.16.4 - 2008-05-05 - - BUG FIXES - - HADOOP-3304. [HOD] Fixes the way the logcondense.py utility searches - for log files that need to be deleted. (yhemanth via mukund) - -Release 0.16.2 - 2008-04-02 - - BUG FIXES - - HADOOP-3103. [HOD] Hadoop.tmp.dir should not be set to cluster - directory. (Vinod Kumar Vavilapalli via ddas). - -Release 0.16.1 - 2008-03-13 - - INCOMPATIBLE CHANGES - - HADOOP-2861. Improve the user interface for the HOD commands. - Command line structure has changed. (Hemanth Yamijala via nigel) - - IMPROVEMENTS - - HADOOP-2730. HOD documentation update. - (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2911. Make the information printed by the HOD allocate and - info commands less verbose and clearer. (Vinod Kumar via nigel) - - BUG FIXES - - HADOOP-2766. Enables setting of HADOOP_OPTS env variable for the hadoop - daemons through HOD. (Vinod Kumar Vavilapalli via ddas) - - HADOOP-2809. Fix HOD syslog config syslog-address so that it works. - (Hemanth Yamijala via nigel) - - HADOOP-2847. Ensure idle cluster cleanup works even if the JobTracker - becomes unresponsive to RPC calls. (Hemanth Yamijala via nigel) - - HADOOP-2925. Fix HOD to create the mapred system directory using a - naming convention that will avoid clashes in multi-user shared - cluster scenario. (Hemanth Yamijala via nigel) - -Release 0.16.0 - 2008-02-07 - - NEW FEATURES - - HADOOP-1301. Hadoop-On-Demand (HOD): resource management - provisioning for Hadoop. (Hemanth Yamijala via nigel) - - BUG FIXES - - HADOOP-2720. Jumbo bug fix patch to HOD. Final sync of Apache SVN with - internal Yahoo SVN. (Hemanth Yamijala via nigel) - - HADOOP-2740. Fix HOD to work with the configuration variables changed in - HADOOP-2404. (Hemanth Yamijala via omalley) - diff --git a/third_party/hadoop-0.20.0/contrib/hod/README b/third_party/hadoop-0.20.0/contrib/hod/README deleted file mode 100644 index aaa7d35c3e..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/README +++ /dev/null @@ -1,104 +0,0 @@ - Hadoop On Demand - ================ - -1. Introduction: -================ - -The Hadoop On Demand (HOD) project is a system for provisioning and -managing independent Hadoop MapReduce instances on a shared cluster -of nodes. HOD uses a resource manager for allocation. At present it -supports Torque (http://www.clusterresources.com/pages/products/torque-resource-manager.php) -out of the box. - -2. Feature List: -================ - -The following are the features provided by HOD: - -2.1 Simplified interface for managing MapReduce clusters: - -The MapReduce user interacts with the cluster through a simple -command line interface, the HOD client. HOD brings up a virtual -MapReduce cluster with the required number of nodes, which the -user can use for running Hadoop jobs. When done, HOD will -automatically clean up the resources and make the nodes available -again. - -2.2 Automatic installation of Hadoop: - -With HOD, Hadoop does not need to be even installed on the cluster. -The user can provide a Hadoop tarball that HOD will automatically -distribute to all the nodes in the cluster. - -2.3 Configuring Hadoop: - -Dynamic parameters of Hadoop configuration, such as the NameNode and -JobTracker addresses and ports, and file system temporary directories -are generated and distributed by HOD automatically to all nodes in -the cluster. - -In addition, HOD allows the user to configure Hadoop parameters -at both the server (for e.g. JobTracker) and client (for e.g. JobClient) -level, including 'final' parameters, that were introduced with -Hadoop 0.15. - -2.4 Auto-cleanup of unused clusters: - -HOD has an automatic timeout so that users cannot misuse resources they -aren't using. The timeout applies only when there is no MapReduce job -running. - -2.5 Log services: - -HOD can be used to collect all MapReduce logs to a central location -for archiving and inspection after the job is completed. - -3. HOD Components -================= - -This is a brief overview of the various components of HOD and how they -interact to provision Hadoop. - -HOD Client: The HOD client is a Unix command that users use to allocate -Hadoop MapReduce clusters. The command provides other options to list -allocated clusters and deallocate them. The HOD client generates the -hadoop-site.xml in a user specified directory. The user can point to -this configuration file while running Map/Reduce jobs on the allocated -cluster. - -RingMaster: The RingMaster is a HOD process that is started on one node -per every allocated cluster. It is submitted as a 'job' to the resource -manager by the HOD client. It controls which Hadoop daemons start on -which nodes. It provides this information to other HOD processes, -such as the HOD client, so users can also determine this information. -The RingMaster is responsible for hosting and distributing the -Hadoop tarball to all nodes in the cluster. It also automatically -cleans up unused clusters. - -HodRing: The HodRing is a HOD process that runs on every allocated node -in the cluster. These processes are run by the RingMaster through the -resource manager, using a facility of parallel execution. The HodRings -are responsible for launching Hadoop commands on the nodes to bring up -the Hadoop daemons. They get the command to launch from the RingMaster. - -Hodrc / HOD configuration file: An INI style configuration file where -the users configure various options for the HOD system, including -install locations of different software, resource manager parameters, -log and temp file directories, parameters for their MapReduce jobs, -etc. - -Submit Nodes: Nodes where the HOD Client is run, from where jobs are -submitted to the resource manager system for allocating and running -clusters. - -Compute Nodes: Nodes which get allocated by a resource manager, -and on which the Hadoop daemons are provisioned and started. - -4. Next Steps: -============== - -- Read getting_started.txt to get an idea of how to get started with -installing, configuring and running HOD. - -- Read config.txt to get more details on configuration options for HOD. - diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION b/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION deleted file mode 100755 index 5a03fb737b..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.20.0 diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes b/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes deleted file mode 100755 index 5f9f92f166..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -PBS_NODES_PATH=`which pbsnodes 2>/dev/null` -if [ -z $PBS_NODES_PATH ] -then - echo Could not find pbsnodes in path. Cannot check available number of nodes. >&2 - exit 1 -fi -if [ -z $1 ] -then - echo Usage: checknodes queue-name >&2 - exit 2 -fi -# the number of nodes marked 'free', and which do not contain a jobs attribute from the server or from the moms. -$PBS_NODES_PATH :$1 | awk 'BEGIN {c=0} /state = free/ {getline;getline;getline;getline; if ($0 !~ /jobs =/ && $0 !~ /jobs=[0-9].*/) c++ ; } END {print c}' diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hod b/third_party/hadoop-0.20.0/contrib/hod/bin/hod deleted file mode 100755 index e87b2764db..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/hod +++ /dev/null @@ -1,577 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""":" -work_dir=$(dirname $0) -base_name=$(basename $0) -original_dir=$PWD -cd $work_dir - -if [ $HOD_PYTHON_HOME ]; then - exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir -elif [ -e /usr/bin/python ]; then - exec /usr/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir -elif [ -e /usr/local/bin/python ]; then - exec /usr/local/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir -else - exec python -u -OO $base_name ${1+"$@"} --hod.original-dir $work_dir -fi -":""" - -"""The executable to be used by the user""" -import sys, os, re, pwd, threading, sys - -myName = os.path.basename(sys.argv[0]) -myName = re.sub(".*/", "", myName) -binDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/bin/.*", "", binDirectory) -libDirectory = rootDirectory - -sys.path.append(libDirectory) - -from hodlib.Hod.hod import hodRunner -from hodlib.Common.setup import * -from hodlib.Common.descGenerator import * -from hodlib.Common.util import local_fqdn, need_to_allocate, filter_warnings,\ - get_exception_error_string, hodInterrupt, \ - HOD_INTERRUPTED_MESG, HOD_INTERRUPTED_CODE,\ - TORQUE_USER_LIMITS_COMMENT_FIELD -from hodlib.Common.tcp import tcpError, tcpSocket -from hodlib.Hod.hod import hodHelp - -filter_warnings() - -reVersion = re.compile(".*(\d+_\d+).*") - -VERSION = None -if os.path.exists("./VERSION"): - vFile = open("./VERSION", 'r') - VERSION = vFile.readline() - vFile.close() - -# Always look for hodrc file here unless otherwise specified with -c: -DEFAULT_LOC = os.path.join(rootDirectory, 'conf') -DEFAULT_HOD_DIR = os.path.join(os.environ['HOME'], ".hod") - -if not os.path.isdir(DEFAULT_HOD_DIR): - os.mkdir(DEFAULT_HOD_DIR, 0777) - -DEFAULT_CONFIG = os.path.join(DEFAULT_HOD_DIR, 'hodrc') -if not os.path.exists(DEFAULT_CONFIG): - if os.environ.has_key('HOD_CONF_DIR') and os.environ['HOD_CONF_DIR'] is not None: - DEFAULT_CONFIG = os.path.join(os.environ['HOD_CONF_DIR'], 'hodrc') - -# Definition tuple is of the form: -# (name, type, description, help?, default value, required?, validate?, -# short option) -# -defList = { 'hod' : ( - ('original-dir', 'directory', 'hod original start directory', - False, None, True, True, 'r'), - - ('clusterdir', 'directory', - 'Directory where cluster state information and hadoop-site.xml' + - ' will be stored.', - True, None, False, False, 'd'), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True, 'y'), - - ('java-home', 'directory', 'Java home directory.', - True, None, True, True, 'j'), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - True, 3, True, True, 'b'), - - ('stream', 'bool', 'Output to stderr.', - False, True, False, True), - - ('nodecount', 'pos_int', - 'Number of nodes to allocate at startup. ', - True, None, False, True, 'n'), - - ('script', 'file', 'Hadoop script to execute.', - True, None, False, False, 's'), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, pwd.getpwuid(os.getuid())[0], False, True, 'u'), - - ('allocate-wait-time', 'pos_int', - 'Time to wait for cluster allocation.', - False, 300, True, True, 'e'), - - ('operation', 'string', - 'Initiate a hod operation. (help, allocate, deallocate ...)', - False, None, False, True, 'o'), - - ('cluster-factor', 'pos_float', - 'The number of grid slots per machines', False, 1.9, False, True, - 'x'), - - ('cluster', 'string', 'Name of cluster being used.', - False, None, True, True, 'w'), - - ('proxy-xrs-address', 'address', - 'Address to Allocation Manager XML RPC proxy.', - False, None, False, True, 'p'), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('client-params', 'keyval', 'Hadoop client xml key/value list', - True, None, False, True, 'C'), - - ('hadoop-ui-log-dir', 'directory', 'Directory to store Web UI Logs of Hadoop', - True, None, False, True), - - ('temp-dir', 'directory', 'HOD temporary directories.', - False, None, True, False), - - ('update-worker-info', 'bool', 'Specifies whether to update Worker Info after allocation', - False, False, False, True), - - ('job-feasibility-attr', 'string', 'Specifies whether to check job feasibility - resource manager and/or scheduler limits, also gives the attribute value', - False, None, False, True), - - ('title', 'string', 'Title for the current HOD allocation.', - True, "HOD", False, True, 'N'), - - ('walltime', 'pos_int', 'Walltime in seconds for the current HOD allocation', - True, None, False, True, 'l'), - - ('script-wait-time', 'pos_int', 'Specifies the time to wait before running the script. Used with the hod.script option.', - True, 10, False, True, 'W'), - - ('log-rollover-count', 'pos_int', 'Specifies the number of rolled-over log files of HOD client. A zero value disables rollover.', - True, 5, False, True, 'L'), - - ('job-status-query-interval', 'pos_int', 'Specifies the time between checking for job status', - False, 30, False, True), - - ('job-command-failure-interval', 'pos_int', 'Specifies the time between checking for failed job status or submission commands', - False, 10, False, True), - - ('job-status-query-failure-retries', 'pos_int', 'Specifies the number of times job status failure queries are retried', - False, 3, False, True), - - ('job-submission-failure-retries', 'pos_int', 'Specifies the number of times job submission failure queries are retried', - False, 3, False, True)), - - 'resource_manager' : ( - ('id', 'string', 'Batch scheduler ID: torque|condor.', - False, None, True, True), - - ('pbs-user', 'user_account', 'User ID jobs are submitted under.', - False, None, False, True), - - ('pbs-account', 'string', 'User Account jobs are submitted under.', - True, None, False, False, 'A'), - - ('queue', 'string', 'Queue of the batch scheduler to query.', - True, 'batch', False, True, 'Q'), - - ('batch-home', 'directory', 'Scheduler installation directory.', - False, None, True, True), - - ('options', 'keyval', 'Options to pass to the scheduler.', - False, None, False, True), - - ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.', - False, None, False, True)), - - 'ringmaster' : ( - ('work-dirs', 'list', 'hod work directories', - False, None, True, False), - - ('temp-dir', 'directory', 'Ringmaster temporary directory.', - False, None, True, False), - - ('log-dir', 'directory', 'hod logging directory.', - False, os.path.join(rootDirectory, 'logs'), False, False), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('http-port-range', 'range', 'HTTP port range n-m.', - False, None, True, True), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - False, 4, True, True), - - ('register', 'bool', 'Register with service registry?', - False, True, True, True), - - ('stream', 'bool', 'Output to stderr.', - False, False, False, True), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, pwd.getpwuid(os.getuid())[0], False, True), - - ('svcrgy-addr', 'address', 'Download HTTP address.', - False, None, False, False), - - ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.', - True, None, False, False, 't'), - - ('max-connect','pos_int','max connections allowed for a single tarball server', - False, 30, False, True), - - ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness', - False, 120, False, True), - - ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster', - False, 3600, False, True), - - ('max-master-failures', 'pos_int', - 'Defines how many times a master can fail before' \ - ' failing cluster allocation', False, 5, True, True), - - ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring', - False, 1, False, True)), - - 'gridservice-mapred' : ( - ('external', 'bool', "Connect to an already running MapRed?", - False, False, True, True), - - ('host', 'hostname', 'Mapred hostname.', - False, 'localhost', False, False), - - ('info_port', 'pos_int', 'Mapred info port.', - False, None, False, False), - - ('tracker_port', 'pos_int', 'Mapred job tracker port.', - False, None, False, False), - - ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.', - False, None, False, False), - - ('server-params', 'keyval', 'Hadoop xml key/value list', - True, None, False, True, 'M'), - - ('envs', 'keyval', 'environment to run this package in', - False, None, False, True), - - ('final-server-params', 'keyval', 'Hadoop final xml key/val list', - False, None, False, True, 'F'), - - ('pkgs', 'directory', "directory where the package is installed", - False, None, False, False)), - - - 'gridservice-hdfs' : ( - ('external', 'bool', "Connect to an already running HDFS?", - False, False, True, True), - - ('host', 'hostname', 'HDFS hostname.', - False, 'localhost', False, False), - - ('fs_port', 'pos_int', 'HDFS port.', - False, None, False, False), - - ('info_port', 'pos_int', 'HDFS info port.', - False, None, False, False), - - ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.', - False, None, False, False), - - ('server-params', 'keyval', 'Hadoop xml key/value list', - False, None, False, True, 'H'), - - ('final-server-params', 'keyval', 'Hadoop final xml key/value list', - False, None, False, True, 'S'), - - ('envs', 'keyval', 'Environment in which to run this package.', - False, None, False, True), - - ('pkgs', 'directory', "directory where the package is installed", - False, None, False, False)), - - - 'hodring' : ( - ('temp-dir', 'list', 'hodring temporary directory.', - False, None, True, False), - - ('log-dir', 'directory', 'hod logging directory.', - False, os.path.join(rootDirectory, 'logs'), False, False), - - ('log-destination-uri', 'string', - 'URI to store logs to, local://some_path or ' - + 'hdfs://host:port/some_path', - False, None, False, True), - - ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS', - False, None, False, False), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True), - - ('java-home', 'directory', 'Java home directory.', - False, None, True, False), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - False, 3, True, True), - - ('register', 'bool', 'Register with service registry?', - False, True, True, True), - - ('stream', 'bool', 'Output to stderr.', - False, False, False, True), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, pwd.getpwuid(os.getuid())[0], False, True), - - ('command', 'string', 'Command for hodring to run.', - False, None, False, True), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('http-port-range', 'range', 'HTTP port range n-m.', - False, None, True, True), - - ('service-id', 'string', 'Service ID.', - False, None, False, True), - - ('download-addr', 'string', 'Download HTTP address.', - False, None, False, True), - - ('svcrgy-addr', 'address', 'Download HTTP address.', - False, None, False, True), - - ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.', - False, None, False, True), - - ('tarball-retry-initial-time', 'pos_float','Initial Retry time for tarball download', - False, 1, False, True), - - ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download', - False, 3, False, True), - - ('cmd-retry-initial-time', 'pos_float','Initial retry time for getting commands', - False, 2, False, True), - - ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True), - - ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', - False, '/mapredsystem', False, False)) - } - -defOrder = [ 'hod', 'ringmaster', 'hodring', 'resource_manager', - 'gridservice-mapred', 'gridservice-hdfs' ] - -def printErrors(msgs): - for msg in msgs: - print msg - -def op_requires_pkgs(config): - if config['hod'].has_key('operation'): - return config['hod']['operation'].startswith('allocate') - else: - return config['hod'].has_key('script') - -if __name__ == '__main__': - try: - confDef = definition() - confDef.add_defs(defList, defOrder) - hodhelp = hodHelp() - usage = hodhelp.help() - - hodOptions = options(confDef, usage, - VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG, - name=myName ) - # hodConfig is a dict like object, hodConfig[section][name] - try: - hodConfig = config(hodOptions['config'], configDef=confDef, - originalDir=hodOptions['hod']['original-dir'], - options=hodOptions) - except IOError, e: - print >>sys.stderr,"error: %s not found. Specify the path to the HOD configuration file, or define the environment variable %s under which a file named hodrc can be found." % (hodOptions['config'], 'HOD_CONF_DIR') - sys.exit(1) - - # Conditional validation - statusMsgs = [] - - if hodConfig.normalizeValue('gridservice-hdfs', 'external'): - # For external HDFS - statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs', - 'fs_port')) - statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs', - 'info_port')) - statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs', - 'host')) - else: - hodConfig['gridservice-hdfs']['fs_port'] = 0 # Dummy - hodConfig['gridservice-hdfs']['info_port'] = 0 # Not used at all - - if hodConfig.normalizeValue('gridservice-mapred', 'external'): - statusMsgs.extend(hodConfig.validateValue('gridservice-mapred', - 'tracker_port')) - statusMsgs.extend(hodConfig.validateValue('gridservice-mapred', - 'info_port')) - statusMsgs.extend(hodConfig.validateValue('gridservice-mapred', - 'host')) - else: - hodConfig['gridservice-mapred']['tracker_port'] = 0 # Dummy - hodConfig['gridservice-mapred']['info_port'] = 0 # Not used at all - - if len(statusMsgs) != 0: - for msg in statusMsgs: - print >>sys.stderr, msg - sys.exit(1) - # End of conditional validation - - status = True - statusMsgs = [] - - (status,statusMsgs) = hodConfig.verify() - if not status: - print >>sys.stderr,"error: bin/hod failed to start." - for msg in statusMsgs: - print >>sys.stderr,"%s" % (msg) - sys.exit(1) - - ## TODO : should move the dependency verification to hodConfig.verify - if hodConfig['hod'].has_key('operation') and \ - hodConfig['hod'].has_key('script'): - print "Script operation is mutually exclusive with other HOD operations" - hodOptions.print_help(sys.stderr) - sys.exit(1) - - if 'operation' not in hodConfig['hod'] and 'script' not in hodConfig['hod']: - print "HOD requires at least a script or operation be specified." - hodOptions.print_help(sys.stderr) - sys.exit(1) - - if hodConfig['gridservice-hdfs']['external']: - hdfsAddress = "%s:%s" % (hodConfig['gridservice-hdfs']['host'], - hodConfig['gridservice-hdfs']['fs_port']) - - hdfsSocket = tcpSocket(hdfsAddress) - - try: - hdfsSocket.open() - hdfsSocket.close() - except tcpError: - printErrors(hodConfig.var_error('hod', 'gridservice-hdfs', - "Failed to open a connection to external hdfs address: %s." % - hdfsAddress)) - sys.exit(1) - else: - hodConfig['gridservice-hdfs']['host'] = 'localhost' - - if hodConfig['gridservice-mapred']['external']: - mapredAddress = "%s:%s" % (hodConfig['gridservice-mapred']['host'], - hodConfig['gridservice-mapred']['tracker_port']) - - mapredSocket = tcpSocket(mapredAddress) - - try: - mapredSocket.open() - mapredSocket.close() - except tcpError: - printErrors(hodConfig.var_error('hod', 'gridservice-mapred', - "Failed to open a connection to external mapred address: %s." % - mapredAddress)) - sys.exit(1) - else: - hodConfig['gridservice-mapred']['host'] = 'localhost' - - if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \ - not hodConfig['gridservice-hdfs'].has_key('pkgs') and \ - op_requires_pkgs(hodConfig): - printErrors(hodConfig.var_error('gridservice-hdfs', 'pkgs', - "gridservice-hdfs.pkgs must be defined if ringmaster.hadoop-tar-ball " - + "is not defined.")) - sys.exit(1) - - if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \ - not hodConfig['gridservice-mapred'].has_key('pkgs') and \ - op_requires_pkgs(hodConfig): - printErrors(hodConfig.var_error('gridservice-mapred', 'pkgs', - "gridservice-mapred.pkgs must be defined if ringmaster.hadoop-tar-ball " - + "is not defined.")) - sys.exit(1) - - if hodConfig['hodring'].has_key('log-destination-uri'): - if hodConfig['hodring']['log-destination-uri'].startswith('file://'): - pass - elif hodConfig['hodring']['log-destination-uri'].startswith('hdfs://'): - hostPort = hodConfig['hodring']['log-destination-uri'][7:].split("/") - hostPort = hostPort[0] - socket = tcpSocket(hostPort) - try: - socket.open() - socket.close() - except: - printErrors(hodConfig.var_error('hodring', 'log-destination-uri', - "Unable to contact host/port specified in log destination uri: %s" % - hodConfig['hodring']['log-destination-uri'])) - sys.exit(1) - else: - printErrors(hodConfig.var_error('hodring', 'log-destination-uri', - "The log destiniation uri must be of type local:// or hdfs://.")) - sys.exit(1) - - if hodConfig['ringmaster']['workers_per_ring'] < 1: - printErrors(hodConfig.var_error('ringmaster', 'workers_per_ring', - "ringmaster.workers_per_ring must be a positive integer " + - "greater than or equal to 1")) - sys.exit(1) - - ## TODO : end of should move the dependency verification to hodConfig.verif - - hodConfig['hod']['base-dir'] = rootDirectory - hodConfig['hod']['user_state'] = DEFAULT_HOD_DIR - - dGen = DescGenerator(hodConfig) - hodConfig = dGen.initializeDesc() - - os.environ['JAVA_HOME'] = hodConfig['hod']['java-home'] - - if hodConfig['hod']['debug'] == 4: - print "" - print "Using Python: %s" % sys.version - print "" - - hod = hodRunner(hodConfig) - - # Initiate signal handling - hodInterrupt.set_log(hod.get_logger()) - hodInterrupt.init_signals() - # Interrupts set up. Now on we handle signals only when we wish to. - except KeyboardInterrupt: - print HOD_INTERRUPTED_MESG - sys.exit(HOD_INTERRUPTED_CODE) - - opCode = 0 - try: - if hodConfig['hod'].has_key('script'): - opCode = hod.script() - else: - opCode = hod.operation() - except Exception, e: - print "Uncaught Exception : %s" % e - finally: - sys.exit(opCode) diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup b/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup deleted file mode 100755 index 51613eae0a..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup +++ /dev/null @@ -1,183 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""":" -work_dir=$(dirname $0) -base_name=$(basename $0) -original_dir=$PWD -cd $work_dir - -if [ $HOD_PYTHON_HOME ]; then - exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"} -elif [ -e /usr/bin/python ]; then - exec /usr/bin/python -u -OO $base_name ${1+"$@"} -elif [ -e /usr/local/bin/python ]; then - exec /usr/local/bin/python -u -OO $base_name ${1+"$@"} -else - exec python -u -OO $base_name ${1+"$@"} -fi -":""" - -"""The executable to be used by the user""" -import sys, os, re, pwd, threading, sys, random, time, pprint, shutil, time, re -from pprint import pformat -from optparse import OptionParser - -myName = os.path.basename(sys.argv[0]) -myName = re.sub(".*/", "", myName) -binDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/bin/.*", "", binDirectory) -libDirectory = rootDirectory - -sys.path.append(libDirectory) - -from hodlib.Common.threads import simpleCommand -from hodlib.Common.util import local_fqdn, tar, filter_warnings,\ - get_exception_string, get_exception_error_string -from hodlib.Common.logger import hodLog -from hodlib.Common.logger import getLogger -from hodlib.HodRing.hodRing import createMRSystemDirectoryManager - -filter_warnings() - -reVersion = re.compile(".*(\d+_\d+).*") -reHdfsURI = re.compile("(hdfs://.*?:\d+)(.*)") - -VERSION = None -if os.path.exists("./VERSION"): - vFile = open("./VERSION", 'r') - VERSION = vFile.readline() - vFile.close() - -def __archive_logs(conf, log): - # need log-destination-uri, __hadoopLogDirs, temp-dir - status = True - logUri = conf['log-destination-uri'] - hadoopLogDirs = conf['hadoop-log-dirs'] - if logUri: - try: - if hadoopLogDirs: - date = time.localtime() - for logDir in hadoopLogDirs: - (head, tail) = os.path.split(logDir) - (head, logType) = os.path.split(head) - tarBallFile = "%s-%s-%04d%02d%02d%02d%02d%02d-%s.tar.gz" % ( - logType, local_fqdn(), date[0], date[1], date[2], date[3], - date[4], date[5], random.randint(0,1000)) - - if logUri.startswith('file://'): - tarBallFile = os.path.join(logUri[7:], - tarBallFile) - else: - tarBallFile = os.path.join(conf['temp-dir'], tarBallFile) - - log.debug('archiving log files to: %s' % tarBallFile) - status = tar(tarBallFile, logDir, ['*',]) - log.info('archive %s status: %s' % (tarBallFile, status)) - if status and \ - logUri.startswith('hdfs://'): - __copy_archive_to_dfs(conf, tarBallFile) - log.info("copying archive to dfs finished") - dict = {} - except: - log.error(get_exception_string()) - status = False - return status - - -def __copy_archive_to_dfs(conf, archiveFile): - # need log-destination-uri, hadoopCommandstring and/or pkgs - hdfsURIMatch = reHdfsURI.match(conf['log-destination-uri']) - - (head, tail) = os.path.split(archiveFile) - destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], 'hod-logs', conf['service-id'], tail) - - log.info("copying archive %s to DFS %s ..." % (archiveFile, destFile)) - - hadoopCmd = conf['hadoop-command-string'] - if conf['pkgs']: - hadoopCmd = os.path.join(conf['pkgs'], 'bin', 'hadoop') - - copyCommand = "%s dfs -fs %s -copyFromLocal %s %s" % (hadoopCmd, - hdfsURIMatch.group(1), archiveFile, destFile) - - log.debug(copyCommand) - - copyThread = simpleCommand('hadoop', copyCommand) - copyThread.start() - copyThread.wait() - copyThread.join() - log.debug(pprint.pformat(copyThread.output())) - - os.unlink(archiveFile) - -def unpack(): - parser = OptionParser() - option_list=["--log-destination-uri", "--hadoop-log-dirs", \ - "--temp-dir", "--hadoop-command-string", "--pkgs", "--user-id", \ - "--service-id", "--hodring-debug", "--hodring-log-dir", \ - "--hodring-syslog-address", "--hodring-cleanup-list", \ - "--jt-pid", "--mr-sys-dir", "--fs-name", "--hadoop-path"] - regexp = re.compile("^--") - for opt in option_list: - parser.add_option(opt,dest=regexp.sub("",opt),action="store") - option_list.append("--hodring-stream") - parser.add_option("--hodring-stream",dest="hodring-stream",metavar="bool",\ - action="store_true") - (options, args) = parser.parse_args() - _options= {} - _options['hodring'] = {} - for opt in dir(options): - if "--"+opt in option_list: - _options[opt] = getattr(options,opt) - if _options.has_key('hadoop-log-dirs') and _options['hadoop-log-dirs']: - _options['hadoop-log-dirs'] = _options['hadoop-log-dirs'].split(",") - if _options.has_key('hodring-syslog-address') and _options['hodring-syslog-address']: - _options['hodring']['syslog-address'] = \ - _options['hodring-syslog-address'].split(':') - _options['hodring']['debug'] = int(_options['hodring-debug']) - _options['hodring']['log-dir'] = _options['hodring-log-dir'] - _options['hodring']['stream'] = _options['hodring-stream'] - _options['hodring']['userid'] = _options['user-id'] - os.putenv('PBS_JOBID', _options['service-id'] ) - return _options - -if __name__ == '__main__': - log = None - try: - conf = unpack() - # Use the same log as hodring - log = getLogger(conf['hodring'],'hodring') - log.debug("Logger initialised successfully") - mrSysDirManager = createMRSystemDirectoryManager(conf, log) - if mrSysDirManager is not None: - mrSysDirManager.removeMRSystemDirectory() - - status = __archive_logs(conf,log) - log.info("Archive status : %s" % status) - list = conf['hodring-cleanup-list'].split(',') - log.info("now removing %s" % list) - for dir in list: - if os.path.exists(dir): - log.debug('removing %s' % (dir)) - shutil.rmtree(dir, True) - log.debug("done") - log.info("Cleanup successfully completed") - except Exception, e: - if log: - log.info("Stack trace:\n%s\n%s" %(get_exception_error_string(),get_exception_string())) diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hodring b/third_party/hadoop-0.20.0/contrib/hod/bin/hodring deleted file mode 100755 index 1bb891c540..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/hodring +++ /dev/null @@ -1,287 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""":" -work_dir=$(dirname $0) -base_name=$(basename $0) -cd $work_dir - -if [ $HOD_PYTHON_HOME ]; then - exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"} -elif [ -e /usr/bin/python ]; then - exec /usr/bin/python -OO $base_name ${1+"$@"} -elif [ -e /usr/local/bin/python ]; then - exec /usr/local/bin/python -OO $base_name ${1+"$@"} -else - exec python -OO $base_name ${1+"$@"} -fi -":""" - -"""The executable to be used by the user""" -import sys, os, re - - -myName = os.path.basename(sys.argv[0]) -myName = re.sub(".*/", "", myName) -binDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/bin/.*", "", binDirectory) -libDirectory = rootDirectory - -sys.path.append(libDirectory) - -from hodlib.HodRing.hodRing import HodRing -from hodlib.Common.setup import * -from hodlib.Common.util import filter_warnings, get_exception_string, \ - get_exception_error_string, getMapredSystemDirectory, \ - to_http_url, local_fqdn -from hodlib.Common.logger import getLogger, ensureLogDir -from hodlib.Common.xmlrpc import hodXRClient - -filter_warnings() - -reVersion = re.compile(".*(\d+_\d+).*") - -VERSION = '$HeadURL$' - -reMatch = reVersion.match(VERSION) -if reMatch: - VERSION = reMatch.group(1) - VERSION = re.sub("_", ".", VERSION) -else: - VERSION = 'DEV' - -# Definition tuple is of the form: -# (name, type, description, default value, required?, validate?) -# -defList = { 'hodring' : ( - ('temp-dir', 'directory', 'hod work directories', - False, None, True, False), - - ('log-dir', 'directory', 'hod logging directory.', - False, os.path.join(rootDirectory, 'logs'), False, True), - - ('log-destination-uri', 'string', - 'URI to store logs to, local://some_path or ' - + 'hdfs://host:port/some_path', - False, None, False, True), - - ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS', - False, None, False, True), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True), - - ('java-home', 'directory', 'Java home directory.', - False, None, True, True), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - False, 3, True, True), - - ('register', 'bool', 'Register with service registry?', - False, True, True, True), - - ('stream', 'bool', 'Output to stderr.', - False, False, False, True), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, None, True, False), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('http-port-range', 'range', 'HTTP port range n-m.', - False, None, True, True), - - ('command', 'string', 'Command for hodring to run.', - False, None, False, True), - - ('service-id', 'string', 'Service ID.', - False, None, False, True), - - ('download-addr', 'string', 'Download HTTP address.', - False, None, False, True), - - ('svcrgy-addr', 'address', 'Service registry XMLRPC address.', - False, None, True, True), - - ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.', - False, None, False, True), - - ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download', - False, 1, False, True), - - ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download', - False, 3, False, True), - - ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands', - False, 2, False, True), - - ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True), - - ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', - False, '/mapredsystem', False, False)) - } - -if __name__ == '__main__': - - confDef = definition() - confDef.add_defs(defList) - hodRingOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION) - ensureLogDir(hodRingOptions['hodring']['log-dir']) - service = None - try: - (status, statusMsgs) = hodRingOptions.verify() - if not status: - raise Exception("%s" % statusMsgs) - hodRingOptions['hodring']['base-dir'] = rootDirectory - service = HodRing(hodRingOptions) - service.start() - service.wait() - - if service.log: - log = service.log - else: - log = getLogger(hodRingOptions['hodring'],'hodring') - - list = [] - - runningHadoops = service.getRunningValues() - - mrSysDirManager = None - for cmd in runningHadoops: - if cmd.name == 'jobtracker': - mrSysDirManager = cmd.getMRSystemDirectoryManager() - log.debug("addding %s to cleanup list..." % cmd) - cmd.addCleanup(list) - - list.append(service.getTempDir()) - log.debug(list) - - # archive_logs now - cmdString = os.path.join(rootDirectory, "bin", "hodcleanup") # same python - - if (len(runningHadoops) == 0): - log.info("len(runningHadoops) == 0, No running cluster?") - log.info("Skipping __copy_archive_to_dfs") - hadoopString = "" - else: hadoopString=runningHadoops[0].path - - #construct the arguments - if hodRingOptions['hodring'].has_key('log-destination-uri'): - cmdString = cmdString + " --log-destination-uri " \ - + hodRingOptions['hodring']['log-destination-uri'] - - hadoopLogDirs = service.getHadoopLogDirs() - if hadoopLogDirs: - cmdString = cmdString \ - + " --hadoop-log-dirs " \ - + ",".join(hadoopLogDirs) - - cmdString = cmdString \ - + " --temp-dir " \ - + service._cfg['temp-dir'] \ - + " --hadoop-command-string " \ - + hadoopString \ - + " --user-id " \ - + service._cfg['userid'] \ - + " --service-id " \ - + service._cfg['service-id'] \ - + " --hodring-debug " \ - + str(hodRingOptions['hodring']['debug']) \ - + " --hodring-log-dir " \ - + hodRingOptions['hodring']['log-dir'] \ - + " --hodring-cleanup-list " \ - + ",".join(list) - - if hodRingOptions['hodring'].has_key('syslog-address'): - syslogAddr = hodRingOptions['hodring']['syslog-address'][0] + \ - ':' + str(hodRingOptions['hodring']['syslog-address'][1]) - cmdString = cmdString + " --hodring-syslog-address " + syslogAddr - if service._cfg.has_key('pkgs'): - cmdString = cmdString + " --pkgs " + service._cfg['pkgs'] - - if mrSysDirManager is not None: - cmdString = "%s %s" % (cmdString, mrSysDirManager.toCleanupArgs()) - - log.info("cleanup commandstring : ") - log.info(cmdString) - - # clean up - cmd = ['/bin/sh', '-c', cmdString] - - mswindows = (sys.platform == "win32") - originalcwd = os.getcwd() - - if not mswindows: - try: - pid = os.fork() - if pid > 0: - # exit first parent - log.info("child(pid: %s) is now doing cleanup" % pid) - sys.exit(0) - except OSError, e: - log.error("fork failed: %d (%s)" % (e.errno, e.strerror)) - sys.exit(1) - - # decouple from parent environment - os.chdir("/") - os.setsid() - os.umask(0) - - MAXFD = 128 # more than enough file descriptors to close. Just in case. - for i in xrange(0, MAXFD): - try: - os.close(i) - except OSError: - pass - - try: - os.execvp(cmd[0], cmd) - finally: - log.critical("exec failed") - os._exit(1) - - except Exception, e: - if service: - if service.log: - log = service.log - else: - log = getLogger(hodRingOptions['hodring'], 'hodring') - log.error("Error in bin/hodring %s. \nStack trace:\n%s" %(get_exception_error_string(),get_exception_string())) - - log.info("now trying informing to ringmaster") - log.info(hodRingOptions['hodring']['ringmaster-xrs-addr']) - log.info(hodRingOptions.normalizeValue('hodring', 'ringmaster-xrs-addr')) - log.info(to_http_url(hodRingOptions.normalizeValue( \ - 'hodring', 'ringmaster-xrs-addr'))) - # Report errors to the Ringmaster if possible - try: - ringXRAddress = to_http_url(hodRingOptions.normalizeValue( \ - 'hodring', 'ringmaster-xrs-addr')) - log.debug("Creating ringmaster XML-RPC client.") - ringClient = hodXRClient(ringXRAddress) - if ringClient is not None: - addr = local_fqdn() + "_" + str(os.getpid()) - ringClient.setHodRingErrors(addr, str(e)) - log.info("Reported errors to ringmaster at %s" % ringXRAddress) - except Exception, e: - log.error("Failed to report errors to ringmaster at %s" % ringXRAddress) - log.error("Reason : %s" % get_exception_string()) - # End of reporting errors to the client diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster b/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster deleted file mode 100755 index fc194f6d27..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster +++ /dev/null @@ -1,349 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -""":" -work_dir=$(dirname $0) -base_name=$(basename $0) -cd $work_dir - -if [ $HOD_PYTHON_HOME ]; then - exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"} -elif [ -e /usr/bin/python ]; then - exec /usr/bin/python -OO $base_name ${1+"$@"} -elif [ -e /usr/local/bin/python ]; then - exec /usr/local/bin/python -OO $base_name ${1+"$@"} -else - exec python -OO $base_name ${1+"$@"} -fi -":""" - -"""The executable to be used by the user""" -import sys, os, re, getpass - -myName = os.path.basename(sys.argv[0]) -myName = re.sub(".*/", "", myName) -binDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/bin/.*", "", binDirectory) -libDirectory = rootDirectory - -sys.path.append(libDirectory) - -from hodlib.RingMaster.ringMaster import main -from hodlib.Common.setup import * -from hodlib.Common.descGenerator import * -from hodlib.Common.util import local_fqdn, filter_warnings, to_http_url, \ - get_exception_string, get_exception_error_string -from hodlib.Common.logger import getLogger, ensureLogDir -from hodlib.Common.xmlrpc import hodXRClient -import logging - -filter_warnings() - -reVersion = re.compile(".*(\d+_\d+).*") - -VERSION = '$HeadURL$' - -reMatch = reVersion.match(VERSION) -if reMatch: - VERSION = reMatch.group(1) - VERSION = re.sub("_", ".", VERSION) -else: - VERSION = 'DEV' - -# Definition tuple is of the form: -# (name, type, description, default value, required?, validate?) -# -defList = { 'ringmaster' : ( - ('work-dirs', 'list', 'hod work directories', - False, None, True, False), - - ('temp-dir', 'directory', 'Ringmaster temporary directory.', - False, None, True, False), - - ('log-dir', 'directory', 'hod logging directory.', - False, os.path.join(rootDirectory, 'logs'), False, True), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('http-port-range', 'range', 'HTTP port range n-m.', - False, None, True, True), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - False, 3, True, True), - - ('register', 'bool', 'Register with service registry?', - False, True, True, True), - - ('stream', 'bool', 'Output to stderr.', - False, False, False, True), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, None, True, False), - - ('svcrgy-addr', 'address', 'Download HTTP address.', - False, None, False, True), - - ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.', - False, None, False, False), - - ('max-connect','pos_int','max connections allowed for a single tarball server', - False, 30, False, True), - - ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness', - False, 120, False, True), - - ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster', - False, 3600, False, True), - - ('max-master-failures', 'pos_int', - 'Defines how many times a master can fail before' \ - ' failing cluster allocation', False, 5, True, True), - - ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring', - False, 1, False, True)), - - 'resource_manager' : ( - ('id', 'string', 'Batch scheduler ID: torque|condor.', - False, None, True, True), - - ('pbs-user', 'user_account', 'User ID jobs are submitted under.', - False, None, False, True), - - ('pbs-server', 'hostname', 'Hostname of PBS server.', - False, None, False, True), - - ('pbs-account', 'string', 'User Account jobs are submitted under.', - False, None, False, False), - - ('queue', 'string', 'Queue of the batch scheduler to query.', - False, None, False, False), - - ('batch-home', 'directory', 'Scheduler installation directory.', - False, None, True, True), - - ('options', 'keyval', 'Options to pass to the scheduler.', - False, None, False, True), - - ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.', - False, None, False, True)), - - 'gridservice-mapred' : ( - ('external', 'bool', "Connect to an already running MapRed?", - False, False, True, True), - - ('host', 'hostname', 'Mapred hostname.', - False, 'localhost', False, True), - - ('info_port', 'pos_int', 'Mapred info port.', - False, None, True, True), - - ('tracker_port', 'pos_int', 'Mapred job tracker port.', - False, None, True, True), - - ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.', - False, None, False, False), - - ('server-params', 'keyval', 'Hadoop xml key/value list', - False, None, False, False), - - ('final-server-params', 'keyval', 'Hadoop final xml params', - False, None, False, False), - - ('envs', 'keyval', 'environment to run this package in', - False, None, False, False), - - ('pkgs', 'directory', "directory where the package is installed", - False, None, False, False)), - - - 'gridservice-hdfs' : ( - ('external', 'bool', "Connect to an already running HDFS?", - False, False, True, True), - - ('host', 'hostname', 'HDFS hostname.', - False, 'localhost', True, True), - - ('fs_port', 'pos_int', 'HDFS port range.', - False, None, True, True), - - ('info_port', 'pos_int', 'HDFS info port.', - False, None, True, True), - - ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.', - False, None, False, False), - - ('server-params', 'keyval', 'Hadoop xml key/value list', - False, None, False, False), - - ('final-server-params', 'keyval', 'Hadoop final xml params', - False, None, False, False), - - ('envs', 'keyval', 'Environment in which to run this package.', - False, None, False, False), - - ('pkgs', 'directory', "directory where the package is installed", - False, None, False, False)), - - - 'hodring' : ( - ('temp-dir', 'directory', 'hod work directories', - False, None, True, False), - - ('log-dir', 'directory', 'hod logging directory.', - False, os.path.join(rootDirectory, 'logs'), False, False), - - ('log-destination-uri', 'string', - 'URI to store logs to, local://some_path or ' - + 'hdfs://host:port/some_path', - False, None, False, True), - - ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS', - False, None, False, True), - - ('syslog-address', 'address', 'Syslog address.', - False, None, False, True), - - ('java-home', 'directory', 'Java home directory.', - False, None, True, False), - - ('debug', 'pos_int', 'Debugging level, 0-4.', - False, 3, True, True), - - ('register', 'bool', 'Register with service registry?', - False, True, True, True), - - ('stream', 'bool', 'Output to stderr.', - False, False, False, True), - - ('userid', 'user_account', - 'User ID the hod shell is running under.', - False, None, True, False), - - ('xrs-port-range', 'range', 'XML-RPC port range n-m.', - False, None, True, True), - - ('http-port-range', 'range', 'HTTP port range n-m.', - False, None, True, True), - - ('command', 'string', 'Command for hodring to run.', - False, None, False, True), - - ('service-id', 'string', 'Service ID.', - False, None, False, True), - - ('download-addr', 'address', 'Download HTTP address.', - False, None, False, True), - - ('svcrgy-addr', 'address', 'Download HTTP address.', - False, None, False, True), - - ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.', - False, None, False, True), - - ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download', - False, 1, False, True), - - ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download', - False, 3, False, True), - - ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands', - False, 2, False, True), - - ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True), - - ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', - False, '/mapredsystem', False, False)) - } - - -defOrder = [ 'ringmaster', 'hodring', 'resource_manager', - 'gridservice-mapred', 'gridservice-hdfs' ] - -if __name__ == '__main__': - confDef = definition() - confDef.add_defs(defList, defOrder) - ringMasterOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION) - log = logging.getLogger() - - try: - - # Set up logging before anything else. - ensureLogDir(ringMasterOptions.normalizeValue('ringmaster', 'log-dir')) - log = getLogger(ringMasterOptions['ringmaster'],'ringmaster') - # End of setting up logging - - # Verify and process options - statusMsgs = [] - # Conditional validation - if not ringMasterOptions['ringmaster'].has_key('hadoop-tar-ball') or \ - not ringMasterOptions['ringmaster']['hadoop-tar-ball']: - # If tarball is not used - if not ringMasterOptions.normalizeValue('gridservice-hdfs', 'external'): - # And if hdfs is not external, validate gridservice-hdfs.pkgs - statusMsgs.extend(ringMasterOptions.validateValue( - 'gridservice-hdfs', 'pkgs')) - statusMsgs.extend(ringMasterOptions.validateValue( - 'gridservice-mapred', 'pkgs')) - - if len(statusMsgs) != 0: - # format status messages into a single string - errStr = '' - for msg in statusMsgs: - errStr = "%s%s\n" % (errStr, msg) - raise Exception("%s" % errStr) - # End of conditional validation - - (status, statusMsgs) = ringMasterOptions.verify() - if not status: - # format status messages into a single string - errStr = '' - for msg in statusMsgs: - errStr = "%s%s\n" % (errStr, msg) - raise Exception("%s" % errStr) - - ringMasterOptions.replace_escape_seqs() - ringMasterOptions['ringmaster']['base-dir'] = rootDirectory - # End of option processing - - ret = main(ringMasterOptions,log) - sys.exit(ret) - except Exception, e: - log.error("bin/ringmaster failed to start.%s. Stack trace follows:\n%s" % (get_exception_error_string(),get_exception_string())) - - # Report errors to the client if possible - try: - serviceAddr = to_http_url(ringMasterOptions.normalizeValue( \ - 'ringmaster', 'svcrgy-addr')) - serviceClient = hodXRClient(serviceAddr) - if serviceClient is not None: - serviceClient.setRMError([local_fqdn(), str(e), \ - get_exception_string()]) - log.info("Reported errors to service registry at %s" % serviceAddr) - except Exception, e: - log.error("Failed to report errors to service registry.") - log.error("Reason : %s" % get_exception_string()) - # End of reporting errors to the client - - # Ringmaster failing to start is a ringmaster error. Exit with the appropriate exit code. - sys.exit(6) diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account b/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account deleted file mode 100755 index 65aa79ab44..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# This script file is a stub for systems that might want to include -# checks for the account name that is passed to HOD. It will be -# launched by HOD with the account name as an argument. The script -# should return a zero exit code if the account is valid, and a -# non zero exit code otherwise. Any output that the script generates -# would be reported to the user by HOD, in case of a non-zero exit -# code. -# -# By default, the script does nothing and returns a zero exit code. -exit 0 diff --git a/third_party/hadoop-0.20.0/contrib/hod/build.xml b/third_party/hadoop-0.20.0/contrib/hod/build.xml deleted file mode 100644 index e16b36dacf..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/build.xml +++ /dev/null @@ -1,81 +0,0 @@ -<?xml version="1.0"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<project name="hod" default="compile"> - <import file="../build-contrib.xml"/> - <target name="compile"> - <mkdir dir="${build.dir}"/> - <copy todir="${build.dir}"> - <fileset dir="${basedir}"> - <exclude name="**/VERSION"/> - </fileset> - </copy> - <exec executable="echo" output="${build.dir}/bin/VERSION"> - <arg line="${version}" /> - </exec> - </target> - <target name="package" depends="compile"> - <mkdir dir="${dist.dir}/contrib/${name}"/> - <copy todir="${dist.dir}/contrib/${name}"> - <fileset dir="${build.dir}"/> - </copy> - <chmod dir="${dist.dir}/contrib/${name}/bin" perm="a+x" includes="*"/> - </target> - - <target name="test" depends="compile" description="Run HOD unit tests"> - <antcall target="python.pathcheck"/> - <antcall target="checkAndRunTests"/> - </target> - - <target name="checkAndRunTests" if="python.home"> - <!-- Check python version now --> - <exec executable="/bin/sh" outputproperty="hodtest.pythonVersion"> - <arg value="-c" /> - <arg value="${python.home}/python -V" /> - </exec> - <condition property="python.versionmatched"> - <!--- Currently check for only 2.5.1 --> - <equals arg1="${hodtest.pythonVersion}" arg2="Python 2.5.1" /> - </condition> - <antcall target="python.versioncheck"/> - <antcall target="runtests"/> - </target> - - <target name="python.pathcheck" unless="python.home"> - <echo message="'python.home' is not defined. Please pass -Dpython.home=<Path to Python> to Ant on the command-line."/> - </target> - - <target name="runtests" if="python.versionmatched"> - <echo message="Using Python at : ${python.home}" /> - <echo message="Version : ${hodtest.pythonVersion}"/> - <exec executable="/bin/sh" resultproperty="hodtest.failedTests"> - <arg value="-c" /> - <arg value="${python.home}/python ${build.dir}/testing/main.py" /> - </exec> - <condition property="hodtest.success"> - <equals arg1="${hodtest.failedTests}" arg2="0"/> - </condition> - <fail message="TestCases failed. ${hodtest.failedTests} failed to run successfully." unless="hodtest.success"/> - </target> - - <target name="python.versioncheck" unless="python.versionmatched"> - <echo message="Need Python version 2.5.1. You specified ${hodtest.pythonVersion}"/> - </target> - -</project> diff --git a/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc b/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc deleted file mode 100644 index bc2866d4c8..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc +++ /dev/null @@ -1,46 +0,0 @@ -[hod]
-stream = True
-java-home = ${JAVA_HOME}
-cluster = ${CLUSTER_NAME}
-cluster-factor = 1.8
-xrs-port-range = 32768-65536
-debug = 3
-allocate-wait-time = 3600
-temp-dir = /tmp/hod
-
-[ringmaster]
-register = True
-stream = False
-temp-dir = /tmp/hod
-http-port-range = 8000-9000
-work-dirs = /tmp/hod/1,/tmp/hod/2
-xrs-port-range = 32768-65536
-debug = 3
-
-[hodring]
-stream = False
-temp-dir = /tmp/hod
-register = True
-java-home = ${JAVA_HOME}
-http-port-range = 8000-9000
-xrs-port-range = 32768-65536
-debug = 3
-
-[resource_manager]
-queue = ${RM_QUEUE}
-batch-home = ${RM_HOME}
-id = torque
-#env-vars = HOD_PYTHON_HOME=/foo/bar/python-2.5.1/bin/python
-
-[gridservice-mapred]
-external = False
-pkgs = ${HADOOP_HOME}
-tracker_port = 8030
-info_port = 50080
-
-[gridservice-hdfs]
-external = False
-pkgs = ${HADOOP_HOME}
-fs_port = 8020
-info_port = 50070
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/config.txt b/third_party/hadoop-0.20.0/contrib/hod/config.txt deleted file mode 100644 index ca894a702a..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/config.txt +++ /dev/null @@ -1,172 +0,0 @@ - HOD Configuration - ================= - -1. Introduction: -================ - -Configuration options for HOD are organized as sections and options -within them. They can be specified in two ways: a configuration file -in the INI format, and as command line options to the HOD shell, -specified in the format --section.option[=value]. If the same option is -specified in both places, the value specified on the command line -overrides the value in the configuration file. - -To get a simple description of all configuration options, you can type - hod --verbose-help - -This document explains some of the most important or commonly used -configuration options in some more detail. - -2. Sections: -============ - -The following are the various sections in the HOD configuration: - - * hod: Options for the HOD client - * resource_manager: Options for specifying which resource - manager to use, and other parameters for - using that resource manager - * ringmaster: Options for the RingMaster process, - * hodring: Options for the HodRing processes - * gridservice-mapred: Options for the MapReduce daemons - * gridservice-hdfs: Options for the HDFS daemons. - -The following are some of the important options in the HOD -configuration: - -3. Important / Commonly Used Configuration Options: -=================================================== - -3.1. Common configuration options: ----------------------------------- - -Certain configuration options are defined in most of the sections of -the HOD configuration. Options defined in a section, are used by the -process for which that section applies. These options have the same -meaning, but can have different values in each section. - -* temp-dir: Temporary directory for usage by the HOD processes. Make - sure that the users who will run hod have rights to create - directories under the directory specified here. - -* debug: A numeric value from 1-4. 4 produces the most log information, - and 1 the least. - -* log-dir: Directory where log files are stored. By default, this is - <install-location>/logs/. The restrictions and notes for the - temp-dir variable apply here too. - -* xrs-port-range: A range of ports, among which an available port shall - be picked for use to run an XML-RPC server. - -* http-port-range: A range of ports, among which an available port shall - be picked for use to run an HTTP server. - -* java-home: Location of Java to be used by Hadoop. - -3.2 hod options: ----------------- - -* cluster: A descriptive name given to the cluster. For Torque, this is - specified as a 'Node property' for every node in the cluster. - HOD uses this value to compute the number of available nodes. - -* client-params: A comma-separated list of hadoop config parameters - specified as key-value pairs. These will be used to - generate a hadoop-site.xml on the submit node that - should be used for running MapReduce jobs. - -3.3 resource_manager options: ------------------------------ - -* queue: Name of the queue configured in the resource manager to which - jobs are to be submitted. - -* batch-home: Install directory to which 'bin' is appended and under - which the executables of the resource manager can be - found. - -* env-vars: This is a comma separated list of key-value pairs, - expressed as key=value, which would be passed to the jobs - launched on the compute nodes. - For example, if the python installation is - in a non-standard location, one can set the environment - variable 'HOD_PYTHON_HOME' to the path to the python - executable. The HOD processes launched on the compute nodes - can then use this variable. - -3.4 ringmaster options: ------------------------ - -* work-dirs: These are a list of comma separated paths that will serve - as the root for directories that HOD generates and passes - to Hadoop for use to store DFS / MapReduce data. For e.g. - this is where DFS data blocks will be stored. Typically, - as many paths are specified as there are disks available - to ensure all disks are being utilized. The restrictions - and notes for the temp-dir variable apply here too. - -3.5 gridservice-hdfs options: ------------------------------ - -* external: If false, this indicates that a HDFS cluster must be - bought up by the HOD system, on the nodes which it - allocates via the allocate command. Note that in that case, - when the cluster is de-allocated, it will bring down the - HDFS cluster, and all the data will be lost. - If true, it will try and connect to an externally configured - HDFS system. - Typically, because input for jobs are placed into HDFS - before jobs are run, and also the output from jobs in HDFS - is required to be persistent, an internal HDFS cluster is - of little value in a production system. However, it allows - for quick testing. - -* host: Hostname of the externally configured NameNode, if any - -* fs_port: Port to which NameNode RPC server is bound. - -* info_port: Port to which the NameNode web UI server is bound. - -* pkgs: Installation directory, under which bin/hadoop executable is - located. This can be used to use a pre-installed version of - Hadoop on the cluster. - -* server-params: A comma-separated list of hadoop config parameters - specified key-value pairs. These will be used to - generate a hadoop-site.xml that will be used by the - NameNode and DataNodes. - -* final-server-params: Same as above, except they will be marked final. - - -3.6 gridservice-mapred options: -------------------------------- - -* external: If false, this indicates that a MapReduce cluster must be - bought up by the HOD system on the nodes which it allocates - via the allocate command. - If true, if will try and connect to an externally - configured MapReduce system. - -* host: Hostname of the externally configured JobTracker, if any - -* tracker_port: Port to which the JobTracker RPC server is bound - -* info_port: Port to which the JobTracker web UI server is bound. - -* pkgs: Installation directory, under which bin/hadoop executable is - located - -* server-params: A comma-separated list of hadoop config parameters - specified key-value pairs. These will be used to - generate a hadoop-site.xml that will be used by the - JobTracker and TaskTrackers - -* final-server-params: Same as above, except they will be marked final. - -4. Known Issues: -================ - -HOD does not currently handle special characters such as space, comma -and equals in configuration values. diff --git a/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt b/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt deleted file mode 100644 index ae2b0738f9..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt +++ /dev/null @@ -1,233 +0,0 @@ - Getting Started With Hadoop On Demand (HOD) - =========================================== - -1. Pre-requisites: -================== - -Hardware: -HOD requires a minimum of 3 nodes configured through a resource manager. - -Software: -The following components are assumed to be installed before using HOD: -* Torque: - (http://www.clusterresources.com/pages/products/torque-resource-manager.php) - Currently HOD supports Torque out of the box. We assume that you are - familiar with configuring Torque. You can get information about this - from the following link: - http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki -* Python (http://www.python.org/) - We require version 2.5.1 of Python. - -The following components can be optionally installed for getting better -functionality from HOD: -* Twisted Python: This can be used for improving the scalability of HOD - (http://twistedmatrix.com/trac/) -* Hadoop: HOD can automatically distribute Hadoop to all nodes in the - cluster. However, it can also use a pre-installed version of Hadoop, - if it is available on all nodes in the cluster. - (http://hadoop.apache.org/core) - HOD currently supports Hadoop 0.15 and above. - -NOTE: HOD configuration requires the location of installs of these -components to be the same on all nodes in the cluster. It will also -make the configuration simpler to have the same location on the submit -nodes. - -2. Resource Manager Configuration Pre-requisites: -================================================= - -For using HOD with Torque: -* Install Torque components: pbs_server on a head node, pbs_moms on all - compute nodes, and PBS client tools on all compute nodes and submit - nodes. -* Create a queue for submitting jobs on the pbs_server. -* Specify a name for all nodes in the cluster, by setting a 'node - property' to all the nodes. - This can be done by using the 'qmgr' command. For example: - qmgr -c "set node node properties=cluster-name" -* Ensure that jobs can be submitted to the nodes. This can be done by - using the 'qsub' command. For example: - echo "sleep 30" | qsub -l nodes=3 -* More information about setting up Torque can be found by referring - to the documentation under: -http://www.clusterresources.com/pages/products/torque-resource-manager.php - -3. Setting up HOD: -================== - -* HOD is available under the 'contrib' section of Hadoop under the root - directory 'hod'. -* Distribute the files under this directory to all the nodes in the - cluster. Note that the location where the files are copied should be - the same on all the nodes. -* On the node from where you want to run hod, edit the file hodrc - which can be found in the <install dir>/conf directory. This file - contains the minimal set of values required for running hod. -* Specify values suitable to your environment for the following - variables defined in the configuration file. Note that some of these - variables are defined at more than one place in the file. - - * ${JAVA_HOME}: Location of Java for Hadoop. Hadoop supports Sun JDK - 1.5.x - * ${CLUSTER_NAME}: Name of the cluster which is specified in the - 'node property' as mentioned in resource manager configuration. - * ${HADOOP_HOME}: Location of Hadoop installation on the compute and - submit nodes. - * ${RM_QUEUE}: Queue configured for submiting jobs in the resource - manager configuration. - * ${RM_HOME}: Location of the resource manager installation on the - compute and submit nodes. - -* The following environment variables *may* need to be set depending on - your environment. These variables must be defined where you run the - HOD client, and also be specified in the HOD configuration file as the - value of the key resource_manager.env-vars. Multiple variables can be - specified as a comma separated list of key=value pairs. - - * HOD_PYTHON_HOME: If you install python to a non-default location - of the compute nodes, or submit nodes, then, this variable must be - defined to point to the python executable in the non-standard - location. - - -NOTE: - -You can also review other configuration options in the file and -modify them to suit your needs. Refer to the file config.txt for -information about the HOD configuration. - - -4. Running HOD: -=============== - -4.1 Overview: -------------- - -A typical session of HOD will involve atleast three steps: allocate, -run hadoop jobs, deallocate. - -4.1.1 Operation allocate ------------------------- - -The allocate operation is used to allocate a set of nodes and install and -provision Hadoop on them. It has the following syntax: - - hod -c config_file -t hadoop_tarball_location -o "allocate \ - cluster_dir number_of_nodes" - -The hadoop_tarball_location must be a location on a shared file system -accesible from all nodes in the cluster. Note, the cluster_dir must exist -before running the command. If the command completes successfully then -cluster_dir/hadoop-site.xml will be generated and will contain information -about the allocated cluster's JobTracker and NameNode. - -For example, the following command uses a hodrc file in ~/hod-config/hodrc and -allocates Hadoop (provided by the tarball ~/share/hadoop.tar.gz) on 10 nodes, -storing the generated Hadoop configuration in a directory named -~/hadoop-cluster: - - $ hod -c ~/hod-config/hodrc -t ~/share/hadoop.tar.gz -o "allocate \ - ~/hadoop-cluster 10" - -HOD also supports an environment variable called HOD_CONF_DIR. If this is -defined, HOD will look for a default hodrc file at $HOD_CONF_DIR/hodrc. -Defining this allows the above command to also be run as follows: - - $ export HOD_CONF_DIR=~/hod-config - $ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10" - -4.1.2 Running Hadoop jobs using the allocated cluster ------------------------------------------------------ - -Now, one can run Hadoop jobs using the allocated cluster in the usual manner: - - hadoop --config cluster_dir hadoop_command hadoop_command_args - -Continuing our example, the following command will run a wordcount example on -the allocated cluster: - - $ hadoop --config ~/hadoop-cluster jar \ - /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output - -4.1.3 Operation deallocate --------------------------- - -The deallocate operation is used to release an allocated cluster. When -finished with a cluster, deallocate must be run so that the nodes become free -for others to use. The deallocate operation has the following syntax: - - hod -o "deallocate cluster_dir" - -Continuing our example, the following command will deallocate the cluster: - - $ hod -o "deallocate ~/hadoop-cluster" - -4.2 Command Line Options ------------------------- - -This section covers the major command line options available via the hod -command: - ---help -Prints out the help message to see the basic options. - ---verbose-help -All configuration options provided in the hodrc file can be passed on the -command line, using the syntax --section_name.option_name[=value]. When -provided this way, the value provided on command line overrides the option -provided in hodrc. The verbose-help command lists all the available options in -the hodrc file. This is also a nice way to see the meaning of the -configuration options. - --c config_file -Provides the configuration file to use. Can be used with all other options of -HOD. Alternatively, the HOD_CONF_DIR environment variable can be defined to -specify a directory that contains a file named hodrc, alleviating the need to -specify the configuration file in each HOD command. - --b 1|2|3|4 -Enables the given debug level. Can be used with all other options of HOD. 4 is -most verbose. - --o "help" -Lists the operations available in the operation mode. - --o "allocate cluster_dir number_of_nodes" -Allocates a cluster on the given number of cluster nodes, and store the -allocation information in cluster_dir for use with subsequent hadoop commands. -Note that the cluster_dir must exist before running the command. - --o "list" -Lists the clusters allocated by this user. Information provided includes the -Torque job id corresponding to the cluster, the cluster directory where the -allocation information is stored, and whether the Map/Reduce daemon is still -active or not. - --o "info cluster_dir" -Lists information about the cluster whose allocation information is stored in -the specified cluster directory. - --o "deallocate cluster_dir" -Deallocates the cluster whose allocation information is stored in the -specified cluster directory. - --t hadoop_tarball -Provisions Hadoop from the given tar.gz file. This option is only applicable -to the allocate operation. For better distribution performance it is -recommended that the Hadoop tarball contain only the libraries and binaries, -and not the source or documentation. - --Mkey1=value1 -Mkey2=value2 -Provides configuration parameters for the provisioned Map/Reduce daemons -(JobTracker and TaskTrackers). A hadoop-site.xml is generated with these -values on the cluster nodes - --Hkey1=value1 -Hkey2=value2 -Provides configuration parameters for the provisioned HDFS daemons (NameNode -and DataNodes). A hadoop-site.xml is generated with these values on the -cluster nodes - --Ckey1=value1 -Ckey2=value2 -Provides configuration parameters for the client from where jobs can be -submitted. A hadoop-site.xml is generated with these values on the submit -node. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py deleted file mode 100644 index 56759d7963..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py deleted file mode 100644 index 2794c50354..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py +++ /dev/null @@ -1,104 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""Gold Allocation Manager Implementation""" -# -*- python -*- - -import sys, httplib -import sha, base64, hmac -import xml.dom.minidom - -from hodlib.Common.util import * - -class goldAllocationManager: - def __init__(self, cfg, log): - self.__GOLD_SECRET_KEY_FILE = cfg['auth-file'] - (self.__goldHost, self.__goldPort) = (cfg['allocation-manager-address'][0], - cfg['allocation-manager-address'][1]) - self.cfg = cfg - self.log = log - - def getQuote(self, user, project, ignoreErrors=True): - # Get Secret Key from File - secret = '' - try: - secretFile = open(self.__GOLD_SECRET_KEY_FILE) - secret = secretFile.readline() - except Exception, e: - self.log.error("Unable to open file %s" % self.__GOLD_SECRET_KEY_FILE) - self.log.debug(get_exception_string()) - return (ignoreErrors or False) - secretFile.close() - secret = secret.rstrip() - - # construct the SSRMAP request body - body = '<Body><Request action="Quote" actor="hod"><Object>Job</Object><Data><Job><ProjectId>%s</ProjectId><UserId>%s</UserId><WallDuration>10</WallDuration></Job></Data></Request></Body>' % (project, user) - - # compute digest - message = sha.new() - message.update(body) - digest = message.digest() - digestStr = base64.b64encode(digest) - - # compute signature - message = hmac.new(secret, digest, sha) - signatureStr = base64.b64encode(message.digest()) - - # construct the SSSRMAP Message - sssrmapRequest = '<?xml version="1.0" encoding="UTF-8"?>\ -<Envelope>%s<Signature><DigestValue>%s</DigestValue><SignatureValue>%s</SignatureValue><SecurityToken type="Symmetric"></SecurityToken></Signature></Envelope>' % (body, digestStr, signatureStr) - self.log.info('sssrmapRequest: %s' % sssrmapRequest) - - try: - # post message to GOLD server - webservice = httplib.HTTP(self.__goldHost, self.__goldPort) - webservice.putrequest("POST", "/SSSRMAP3 HTTP/1.1") - webservice.putheader("Content-Type", "text/xml; charset=\"utf-8\"") - webservice.putheader("Transfer-Encoding", "chunked") - webservice.endheaders() - webservice.send("%X" % len(sssrmapRequest) + "\r\n" + sssrmapRequest + '0\r\n') - - # handle the response - statusCode, statusmessage, header = webservice.getreply() - responseStr = webservice.getfile().read() - self.log.debug("httpStatusCode: %d" % statusCode) - self.log.info('responseStr: %s' % responseStr) - - # parse XML response - if (statusCode == 200): - responseArr = responseStr.split("\n") - responseBody = responseArr[2] - try: - doc = xml.dom.minidom.parseString(responseBody) - responseVal = doc.getElementsByTagName("Value")[0].firstChild.nodeValue - self.log.info("responseVal: %s" % responseVal) - if (responseVal == 'Success'): - return True - else: - return False - except Exception, e: - self.log.error("Unable to parse GOLD responseBody XML \"(%s)\" to get responseVal" % (responseBody)) - self.log.debug(get_exception_string()) - return (ignoreErrors or False) - else: - self.log.error("Invalid HTTP statusCode %d" % statusCode) - except Exception, e: - self.log.error("Unable to POST message to GOLD server (%s, %d)" % - (self.__goldHost, self.__goldPort)) - self.log.debug(get_exception_string()) - return (ignoreErrors or False) - - return True - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py deleted file mode 100644 index 515e875070..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py +++ /dev/null @@ -1,27 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""defines Allocation Manager Utilities""" - -# -*- python -*- -from hodlib.allocationManagers.goldAllocationManager import goldAllocationManager - -class allocationManagerUtil: - def getAllocationManager(name, cfg, log): - """returns a concrete instance of the specified AllocationManager""" - if name == 'gold': - return goldAllocationManager(cfg, log) - - getAllocationManager = staticmethod(getAllocationManager) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py deleted file mode 100644 index 013e3bde02..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py +++ /dev/null @@ -1,298 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""manage component descriptors""" -# -*- python -*- - -import random - -from sets import Set -from pprint import pformat -from hodlib.Common.util import local_fqdn -from hodlib.Common.tcp import tcpSocket, tcpError - -class Schema: - """the primary class for describing - schema's """ - STRING, LIST, MAP = range(3) - - def __init__(self, name, type = STRING, delim=','): - self.name = name - self.type = type - self.delim = delim - - def getName(self): - return self.name - - def getType(self): - return self.type - - def getDelim(self): - return self.delim - -class _Merger: - """A class to merge lists and add key/value - pairs to a dictionary""" - def mergeList(x, y, uniq=True): - l = [] - l.extend(x) - l.extend(y) - if not uniq: - return l - - s = Set(l) - l = list(s) - return l - - mergeList = staticmethod(mergeList) - - def mergeMap(to, add): - - for k in add: - to.setdefault(k, add[k]) - - return to - - mergeMap = staticmethod(mergeMap) - -class NodePoolDesc: - """a schema for describing - Nodepools""" - def __init__(self, dict): - self.dict = dict.copy() - - self.dict.setdefault('attrs', {}) - - self._checkRequired() - - if 'options' in dict: self.dict['attrs'] = dict['options'] - - def _checkRequired(self): - - if not 'id' in self.dict: - raise ValueError, "nodepool needs 'id'" - - if self.getPkgDir() == None: - raise ValueError, "nodepool %s needs 'pkgs'" % (self.getName()) - - def getName(self): - return self.dict['id'] - - def getPkgDir(self): - return self.dict['batch-home'] - - def getAttrs(self): - return self.dict['attrs'] - - def getSchema(): - schema = {} - - s = Schema('id') - schema[s.getName()] = s - - s = Schema('batch-home', Schema.LIST, ':') - schema[s.getName()] = s - - s = Schema('attrs', Schema.MAP) - schema[s.getName()] = s - - return schema - - getSchema = staticmethod(getSchema) - -class ServiceDesc: - """A schema for describing services""" - def __init__(self, dict): - self.dict = dict.copy() - - self.dict.setdefault('external', False) - self.dict.setdefault('attrs', {}) - self.dict.setdefault('envs', {}) - self.dict.setdefault('host',None) - self.dict.setdefault('port',None) - self.dict.setdefault('tar', None) - self.dict.setdefault('pkgs', '') - self.dict.setdefault('final-attrs', {}) - self._checkRequired() - if self.dict.has_key('hadoop-tar-ball'): - self.dict['tar'] = self.dict['hadoop-tar-ball'] - - def _checkRequired(self): - - if not 'id' in self.dict: - raise ValueError, "service description needs 'id'" - -# if len(self.getPkgDirs()) <= 0: -# raise ValueError, "service description %s needs 'pkgs'" % (self.getName()) - - def getName(self): - return self.dict['id'] - - def isExternal(self): - """True if the service is outside hod. - e.g. connect to existing HDFS""" - - return self.dict['external'] - - def getPkgDirs(self): - return self.dict['pkgs'] - - def getTar(self): - return self.dict['tar'] - - def getAttrs(self): - return self.dict['attrs'] - - def getfinalAttrs(self): - return self.dict['final-attrs'] - - def getEnvs(self): - return self.dict['envs'] - - def getSchema(): - schema = {} - - s = Schema('id') - schema[s.getName()] = s - - s = Schema('external') - schema[s.getName()] = s - - s = Schema('pkgs', Schema.LIST, ':') - schema[s.getName()] = s - - s = Schema('tar', Schema.LIST, ":") - schema[s.getName()] = s - - s = Schema('attrs', Schema.MAP) - schema[s.getName()] = s - - s = Schema('final-attrs', Schema.MAP) - schema[s.getName()] = s - - s = Schema('envs', Schema.MAP) - schema[s.getName()] = s - - return schema - - getSchema = staticmethod(getSchema) - -class CommandDesc: - - def __init__(self, dict): - """a class for how a command is described""" - self.dict = dict - - def __repr__(self): - return pformat(self.dict) - - def _getName(self): - """return the name of the command to be run""" - return self.dict['name'] - - def _getProgram(self): - """return where the program is """ - return self.dict['program'] - - def _getArgv(self): - """return the arguments for the command to be run""" - return self.dict['argv'] - - def _getEnvs(self): - """return the environment in which the command is to be run""" - return self.dict['envs'] - - def _getPkgDirs(self): - """return the packages for this command""" - return self.dict['pkgdirs'] - - def _getWorkDirs(self): - """return the working directories for this command""" - return self.dict['workdirs'] - - def _getAttrs(self): - """return the list of attributes for this command""" - return self.dict['attrs'] - - def _getfinalAttrs(self): - """return the final xml params list for this command""" - return self.dict['final-attrs'] - - def _getForeground(self): - """return if the command is to be run in foreground or not""" - return self.dict['fg'] - - def _getStdin(self): - return self.dict['stdin'] - - def toString(cmdDesc): - """return a string representation of this command""" - row = [] - row.append('name=%s' % (cmdDesc._getName())) - row.append('program=%s' % (cmdDesc._getProgram())) - row.append('pkgdirs=%s' % CommandDesc._csv(cmdDesc._getPkgDirs(), ':')) - - if 'argv' in cmdDesc.dict: - row.append('argv=%s' % CommandDesc._csv(cmdDesc._getArgv())) - - if 'envs' in cmdDesc.dict: - envs = cmdDesc._getEnvs() - list = [] - for k in envs: - v = envs[k] - list.append('%s=%s' % (k, v)) - row.append('envs=%s' % CommandDesc._csv(list)) - - if 'workdirs' in cmdDesc.dict: - row.append('workdirs=%s' % CommandDesc._csv(cmdDesc._getWorkDirs(), ':')) - - if 'attrs' in cmdDesc.dict: - attrs = cmdDesc._getAttrs() - list = [] - for k in attrs: - v = attrs[k] - list.append('%s=%s' % (k, v)) - row.append('attrs=%s' % CommandDesc._csv(list)) - - if 'final-attrs' in cmdDesc.dict: - fattrs = cmdDesc._getAttrs() - list = [] - for k in fattrs: - v = fattrs[k] - list.append('%s=%s' % (k, v)) - row.append('final-attrs=%s' % CommandDesc._cvs(list)) - - if 'fg' in cmdDesc.dict: - row.append('fg=%s' % (cmdDesc._getForeground())) - - if 'stdin' in cmdDesc.dict: - row.append('stdin=%s' % (cmdDesc._getStdin())) - - return CommandDesc._csv(row) - - toString = staticmethod(toString) - - def _csv(row, delim=','): - """return a string in csv format""" - import cStringIO - import csv - - queue = cStringIO.StringIO() - writer = csv.writer(queue, delimiter=delim, escapechar='\\', quoting=csv.QUOTE_NONE, - doublequote=False, lineterminator='\n') - writer.writerow(row) - return queue.getvalue().rstrip('\n') - - _csv = staticmethod(_csv) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py deleted file mode 100644 index 03852cca64..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py +++ /dev/null @@ -1,72 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""manage hod configuration""" -# -*- python -*- - -import sys, csv, os -from optparse import Option, OptionParser -from xml.dom import minidom -from sets import Set -from select import select, poll, POLLIN - -from hodlib.Common.desc import * - -class DescGenerator: - """Contains the conversion to descriptors and other method calls - to config""" - def __init__(self, hodConfig): - """parse all the descriptors""" - - self.hodConfig = hodConfig - - def initializeDesc(self): - self.hodConfig['nodepooldesc'] = self.createNodePoolDesc() - self.hodConfig['servicedesc'] = self.createServiceDescDict() - - return self.hodConfig - - def getServices(self): - """get all the services from the config""" - - sdd = {} - for keys in self.hodConfig: - if keys.startswith('gridservice-'): - str = keys.split('-') - dict = self.hodConfig[keys] - if 'server-params' in dict: dict['attrs'] = dict['server-params'] - if 'final-server-params' in dict: dict['final-attrs'] = dict['final-server-params'] - dict['id'] = str[1] - desc = ServiceDesc(dict) - sdd[desc.getName()] = desc - - return sdd - - def createNodePoolDesc(self): - """ create a node pool descriptor and store - it in hodconfig""" - - desc = NodePoolDesc(self.hodConfig['resource_manager']) - return desc - - def createServiceDescDict(self): - """create a service descriptor for - all the services and store it in the - hodconfig""" - - sdd = self.getServices() - return sdd - - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py deleted file mode 100644 index e042fe13b7..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py +++ /dev/null @@ -1,228 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $ -# -#------------------------------------------------------------------------------ -import os, time, shutil, xmlrpclib, socket, pprint - -from signal import * - -from hodlib.Common.logger import hodLog, hodDummyLogger -from hodlib.Common.socketServers import hodXMLRPCServer -from hodlib.Common.util import local_fqdn -from hodlib.Common.xmlrpc import hodXRClient - -class hodBaseService: - """hodBaseService class - This class provides service registration, logging, - and configuration access methods. It also provides an XML-RPC server. - This class should be extended to create hod services. Methods beginning - with _xr_method will automatically be added to instances of this class. - """ - def __init__(self, name, config, xrtype='threaded'): - """ Initialization requires a name string and a config object of type - hodlib.Common.setup.options or hodlib.Common.setup.config.""" - - self.name = name - self.hostname = local_fqdn() - self._cfg = config - self._xrc = None - self.logs = {} - self._baseLogger = None - self._serviceID = os.getenv('PBS_JOBID') - - self.__logDir = None - self.__svcrgy = None - self.__stop = False - self.__xrtype = xrtype - - self._init_logging() - - if name != 'serviceRegistry': self._init_signals() - self._init_xrc_server() - - def __set_logging_level(self, level): - self.logs['main'].info("Setting log level to %s." % level) - for loggerName in self.loggers.keys(): - self.logs['main'].set_logger_level(loggerName, level) - - def __get_logging_level(self): - if self._cfg.has_key('stream'): - return self.loggers['main'].get_level('stream', 'main') - elif self._cfg.has_key('log-dir'): - return self.loggers['main'].get_level('file', 'main') - else: - return 0 - - def _xr_method_stop(self, *args): - """XML-RPC method, calls stop() on ourselves.""" - - return self.stop() - - def _xr_method_status(self, *args): - """XML-RPC method, calls status() on ourselves.""" - - return self.status() - - def _init_logging(self): - if self._cfg.has_key('debug'): - if self._cfg['debug'] > 0: - self._baseLogger = hodLog(self.name) - self.logs['main'] = self._baseLogger.add_logger('main') - - if self._cfg.has_key('stream'): - if self._cfg['stream']: - self._baseLogger.add_stream(level=self._cfg['debug'], - addToLoggerNames=('main',)) - - if self._cfg.has_key('log-dir'): - if self._serviceID: - self.__logDir = os.path.join(self._cfg['log-dir'], "%s.%s" % ( - self._cfg['userid'], self._serviceID)) - else: - self.__logDir = os.path.join(self._cfg['log-dir'], - self._cfg['userid']) - if not os.path.exists(self.__logDir): - os.mkdir(self.__logDir) - - self._baseLogger.add_file(logDirectory=self.__logDir, - level=self._cfg['debug'], addToLoggerNames=('main',)) - - if self._cfg.has_key('syslog-address'): - self._baseLogger.add_syslog(self._cfg['syslog-address'], - level=self._cfg['debug'], addToLoggerNames=('main',)) - - if not self.logs.has_key('main'): - self.logs['main'] = hodDummyLogger() - else: - self.logs['main'] = hodDummyLogger() - else: - self.logs['main'] = hodDummyLogger() - - def _init_signals(self): - def sigStop(sigNum, handler): - self.sig_wrapper(sigNum, self.stop) - - def toggleLevel(): - currentLevel = self.__get_logging_level() - if currentLevel == 4: - self.__set_logging_level(1) - else: - self.__set_logging_level(currentLevel + 1) - - def sigStop(sigNum, handler): - self._sig_wrapper(sigNum, self.stop) - - def sigDebug(sigNum, handler): - self.sig_wrapper(sigNum, toggleLevel) - - signal(SIGTERM, sigStop) - signal(SIGQUIT, sigStop) - signal(SIGINT, sigStop) - signal(SIGUSR2, sigDebug) - - def _sig_wrapper(self, sigNum, handler, *args): - self.logs['main'].info("Caught signal %s." % sigNum) - - if args: - handler(args) - else: - handler() - - def _init_xrc_server(self): - host = None - ports = None - if self._cfg.has_key('xrs-address'): - (host, port) = (self._cfg['xrs-address'][0], self._cfg['xrs-address'][1]) - ports = (port,) - elif self._cfg.has_key('xrs-port-range'): - host = '' - ports = self._cfg['xrs-port-range'] - - if host != None: - if self.__xrtype == 'threaded': - self._xrc = hodXMLRPCServer(host, ports) - elif self.__xrtype == 'twisted': - try: - from socketServers import twistedXMLRPCServer - self._xrc = twistedXMLRPCServer(host, ports, self.logs['main']) - except ImportError: - self.logs['main'].error("Twisted XML-RPC server not available, " - + "falling back on threaded server.") - self._xrc = hodXMLRPCServer(host, ports) - for attr in dir(self): - if attr.startswith('_xr_method_'): - self._xrc.register_function(getattr(self, attr), - attr[11:]) - - self._xrc.register_introspection_functions() - - def _register_service(self, port=None, installSignalHandlers=1): - if self.__svcrgy: - self.logs['main'].info( - "Registering service with service registery %s... " % self.__svcrgy) - svcrgy = hodXRClient(self.__svcrgy, None, None, 0, 0, installSignalHandlers) - - if self._xrc and self._http: - svcrgy.registerService(self._cfg['userid'], self._serviceID, - self.hostname, self.name, 'hod', { - 'xrs' : "http://%s:%s" % ( - self._xrc.server_address[0], - self._xrc.server_address[1]),'http' : - "http://%s:%s" % (self._http.server_address[0], - self._http.server_address[1])}) - elif self._xrc: - svcrgy.registerService(self._cfg['userid'], self._serviceID, - self.hostname, self.name, 'hod', { - 'xrs' : "http://%s:%s" % ( - self._xrc.server_address[0], - self._xrc.server_address[1]),}) - elif self._http: - svcrgy.registerService(self._cfg['userid'], self._serviceID, - self.hostname, self.name, 'hod', {'http' : - "http://%s:%s" % (self._http.server_address[0], - self._http.server_address[1]),}) - else: - svcrgy.registerService(self._cfg['userid'], self._serviceID, - self.hostname, name, 'hod', {} ) - - def start(self): - """ Start XML-RPC server and register service.""" - - self.logs['main'].info("Starting HOD service: %s ..." % self.name) - - if self._xrc: self._xrc.serve_forever() - if self._cfg.has_key('register') and self._cfg['register']: - self._register_service() - - def stop(self): - """ Stop XML-RPC server, unregister service and set stop flag. """ - - self.logs['main'].info("Stopping service...") - if self._xrc: self._xrc.stop() - self.__stop = True - - return True - - def status(self): - """Returns true, should be overriden.""" - - return True - - def wait(self): - """Wait until stop method is called.""" - - while not self.__stop: - time.sleep(.1) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py deleted file mode 100644 index 3101ab2cde..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py +++ /dev/null @@ -1,788 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""hodLogger provides a customized interface to Python's core logging package. -""" - -import sys, os, re, logging, logging.handlers, inspect, pprint, types -from tcp import get_address_tuple - -fileFormatString = '[%(asctime)s] %(levelname)s/%(levelno)s \ -%(module)s:%(lineno)s - %(message)s' - -streamFormatString = '%(levelname)s - %(message)s' - -debugStreamFormatString = '[%(asctime)s] %(levelname)s/%(levelno)s \ -%(module)s:%(lineno)s - %(message)s' - -syslogFormatString = '(%(process)d) %(levelname)s/%(levelno)s \ -%(module)s:%(lineno)s - %(message)s' - -smtpFormatString = '[%(asctime)s] %(levelname)s/%(levelno)s \ -%(module)s:%(lineno)s\n\n%(message)s' - -fileFormater = logging.Formatter(fileFormatString) -streamFormater = logging.Formatter(streamFormatString) -debugStreamFormater = logging.Formatter(debugStreamFormatString) -syslogFormater = logging.Formatter(syslogFormatString) -smtpFormater = logging.Formatter(smtpFormatString) - -defaultFileLevel = 3 -defaultStreamLevel = 4 -defaultSyslogLevel = 3 -defaultSmtpLevel = 0 - -hodLogLevelMap = { 0 : logging.CRITICAL, - 1 : logging.ERROR, - 2 : logging.WARNING, - 3 : logging.INFO, - 4 : logging.DEBUG } - -hodStreamFormatMap = { 0 : streamFormater, - 1 : streamFormater, - 2 : streamFormater, - 3 : streamFormater, - 4 : debugStreamFormater } - -rehodLogLevelMap = {} -for key in hodLogLevelMap.keys(): - rehodLogLevelMap[hodLogLevelMap[key]] = key - - -reModule = re.compile("^(.*)\..*$") - -hodLogs = {} - -class hodRotatingFileHandler(logging.handlers.RotatingFileHandler): - """ This class needs to be used in place of RotatingFileHandler when - the 2.4.0 Python interpreter is used.""" - - def emit(self, record): - """ - Emit a record. - - If a formatter is specified, it is used to format the record. - The record is then written to the stream with a trailing newline - [N.B. this may be removed depending on feedback]. If exception - information is present, it is formatted using - traceback.print_exception and appended to the stream. - - ***** - - THIS IS A HACK, when instances of hodLogger get passed to the child of - a child thread for some reason self.stream gets closed. This version - of emit re-opens self.stream if it is closed. After testing it appears - that self.stream is only closed once after the second thread is - initialized so there is not performance penalty to this hack. This - problem only exists in python 2.4. - - ***** - """ - try: - if self.shouldRollover(record): - self.doRollover() - try: - msg = self.format(record) - fs = "%s\n" - if not hasattr(types, "UnicodeType"): #if no unicode support... - self.stream.write(fs % msg) - else: - try: - self.stream.write(fs % msg) - except UnicodeError: - self.stream.write(fs % msg.encode("UTF-8")) - except ValueError: - self.stream = open(self.baseFilename, self.mode) - self.stream.write(fs % msg) - - self.flush() - except: - self.handleError(record) - except: - self.handleError(record) - - def shouldRollover(self, record): - """ - Determine if rollover should occur. - - Basically, see if the supplied record would cause the file to exceed - the size limit we have. - - ***** - - THIS IS A HACK, when instances of hodLogger get passed to the child of - a child thread for some reason self.stream gets closed. This version - of emit re-opens self.stream if it is closed. After testing it appears - that self.stream is only closed once after the second thread is - initialized so there is not performance penalty to this hack. This - problem only exists in python 2.4. - - ***** - """ - if self.maxBytes > 0: # are we rolling over? - msg = "%s\n" % self.format(record) - - try: - #due to non-posix-compliant Windows feature - self.stream.seek(0, 2) - except ValueError: - self.stream = open(self.baseFilename, self.mode) - self.stream.seek(0, 2) - - if self.stream.tell() + len(msg) >= self.maxBytes: - return 1 - return 0 - -class hodCustomLoggingLogger(logging.Logger): - """ Slight extension of the logging.Logger class used by the hodLog class. - """ - def findCaller(self): - """ findCaller() is supposed to return the callers file name and line - number of the caller. This was broken when the logging package was - wrapped by hodLog. We should return much more relevant info now. - """ - - callerModule = '' - callerLine = 0 - - currentModule = os.path.basename(__file__) - currentModule = reModule.sub("\g<1>", currentModule) - - frames = inspect.stack() - for i in range(len(frames)): - frameModule = os.path.basename(frames[i][1]) - frameModule = reModule.sub("\g<1>", frameModule) - if frameModule == currentModule: - previousFrameModule = os.path.basename(frames[i+1][1]) - previousFrameModule = reModule.sub("\g<1>", - previousFrameModule) - callerFile = frames[i+1][1] - callerLine = frames[i+1][2] - continue - - returnValues = (callerFile, callerLine) - if sys.version.startswith('2.4.4') or sys.version.startswith('2.5'): - returnValues = (callerFile, callerLine, None) - - return returnValues - -class hodLog: - """ Cluster management logging class. - - logging levels: 0 - log only critical messages - 1 - log critical and error messages - 2 - log critical, error, and warning messages - 3 - log critical, error, warning, and info messages - 4 - log critical, error, warning, info, and debug - messages""" - - def __init__(self, appName): - """Constructs a hodLogger object. - - appName - name of logging application, log filenames will be - prepended with this name""" - - self.__appName = appName - - # initialize a dictionary to hold loggerNames - self.__loggerNames = {} - - # initialize a dictionary to track log handlers and handler classes - self.__logObjs = { 'file' : {}, 'smtp' : {}, - 'syslog' : {}, 'strm' : {} } - - # use a custom logging.Logger class - logging.setLoggerClass(hodCustomLoggingLogger) - - # get the root app logger - self.__logger = logging.getLogger(appName) - self.__logger.setLevel(logging.DEBUG) - - hodLogs[self.__appName] = self - - def __attr__(self, attrname): - """loggerNames - list of defined logger names""" - - if attrname == "loggerNames": return self.__loggerNames.keys() - else: raise AttributeError, attrname - - def __repr__(self): - """Returns a string representation of a hodLog object of the form: - - LOG_NAME - file: FILENAME (level LEVEL) - smtp: SMTP_SERVER from FROM_ADDRESS (level LEVEL) - strm: STRM_OBJECT (level LEVEL) - ... """ - - hodLogString = "hodLog: %s\n\n" % self.__appName - for loggerName in self.__loggerNames.keys(): - hodLogString = "%s logger: %s\n" % (hodLogString, loggerName) - handlerClasses = self.__logObjs.keys() - handlerClasses.sort() - for handlerClass in handlerClasses: - try: - loggerLevelName = logging.getLevelName( - self.__logObjs[handlerClass][loggerName]['level']) - hodLogString = "%s %s: %s (level %s)\n" % ( - hodLogString, handlerClass, - self.__logObjs[handlerClass][loggerName]['data'], - loggerLevelName) - except: - hodLogString = "%s %s: none\n" % ( - hodLogString, handlerClass) - hodLogString = "%s\n" % hodLogString - - return hodLogString - - # 'private' method which adds handlers to self.__logObjs - def __add_to_handlers(self, handlerClass, loggerName, handler, data, - level): - self.__logObjs[handlerClass][loggerName] = {} - self.__logObjs[handlerClass][loggerName]['handler'] = handler - self.__logObjs[handlerClass][loggerName]['data'] = data - self.__logObjs[handlerClass][loggerName]['level'] = level - - # 'private' method which determines whether a hod log level is valid and - # returns a valid logging.Logger level - def __get_logging_level(self, level, defaultLevel): - loggingLevel = '' - try: - loggingLevel = hodLogLevelMap[int(level)] - except: - loggingLevel = hodLogLevelMap[defaultLevel] - - return loggingLevel - - # make a logging.logger name rootLogger.childLogger in our case the - # appName.componentName - def __get_logging_logger_name(self, loggerName): - return "%s.%s" % (self.__appName, loggerName) - - def add_logger(self, loggerName): - """Adds a logger of name loggerName. - - loggerName - name of component of a given application doing the - logging - - Returns a hodLogger object for the just added logger.""" - - try: - self.__loggerNames[loggerName] - except: - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logging.getLogger(loggingLoggerName) - - self.__loggerNames[loggerName] = 1 - - return hodLogger(self.__appName, loggingLoggerName) - - def add_file(self, logDirectory, maxBytes=0, backupCount=0, - level=defaultFileLevel, addToLoggerNames=None): - """Adds a file handler to all defined loggers or a specified set of - loggers. Each log file will be located in logDirectory and have a - name of the form appName-loggerName.log. - - logDirectory - logging directory - maxBytes - maximum log size to write in bytes before rotate - backupCount - number of rotated logs to keep - level - cluster management log level - addToLoggerNames - list of logger names to which stream handling - will be added""" - - def add_file_handler(loggerName): - if not self.__logObjs['file'].has_key(loggerName): - loggingLevel = self.__get_logging_level(level, - defaultFileLevel) - - logFile = os.path.join(logDirectory, "%s-%s.log" % ( - self.__appName, loggerName)) - - logFilePresent = False - if(os.path.exists(logFile)): - logFilePresent = True - - if sys.version.startswith('2.4'): - fileHandler = hodRotatingFileHandler(logFile, - maxBytes=maxBytes, backupCount=backupCount) - else: - fileHandler = logging.handlers.RotatingFileHandler(logFile, - maxBytes=maxBytes, backupCount=backupCount) - if logFilePresent and backupCount: - fileHandler.doRollover() - - fileHandler.setLevel(loggingLevel) - fileHandler.setFormatter(fileFormater) - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - aLogger = logging.getLogger(loggingLoggerName) - aLogger.addHandler(fileHandler) - - fileData = "%s" % logFile - self.__add_to_handlers('file', loggerName, fileHandler, - fileData, loggingLevel) - - if addToLoggerNames: - for loggerName in addToLoggerNames: - add_file_handler(loggerName) - else: - for loggerName in self.__loggerNames: - add_file_handler(loggerName) - - def add_stream(self, stream=sys.stderr, level=defaultStreamLevel, - addToLoggerNames=None): - """Adds a stream handler to all defined loggers or a specified set of - loggers. - - stream - a stream such as sys.stderr or sys.stdout - level - cluster management log level - addToLoggerNames - tupple of logger names to which stream handling - will be added""" - - def add_stream_handler(loggerName): - if not self.__logObjs['strm'].has_key(loggerName): - loggingLevel = self.__get_logging_level(level, - defaultStreamLevel) - - streamHandler = logging.StreamHandler(stream) - - streamHandler.setLevel(loggingLevel) - - streamHandler.setFormatter(hodStreamFormatMap[int(level)]) - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - aLogger = logging.getLogger(loggingLoggerName) - aLogger.addHandler(streamHandler) - - streamData = "%s" % stream - self.__add_to_handlers('strm', loggerName, streamHandler, - streamData, loggingLevel) - - if addToLoggerNames: - for loggerName in addToLoggerNames: - add_stream_handler(loggerName) - else: - for loggerName in self.__loggerNames: - add_stream_handler(loggerName) - - def add_syslog(self, address, level=defaultSyslogLevel, - addToLoggerNames=None): - def add_syslog_handler(loggerName): - if not self.__logObjs['syslog'].has_key(loggerName): - loggingLevel = self.__get_logging_level(level, - defaultSyslogLevel) - - address[1] = int(address[1]) - syslogHandler = logging.handlers.SysLogHandler(tuple(address), - 9) - - syslogHandler.setLevel(loggingLevel) - - syslogHandler.setFormatter(syslogFormater) - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - aLogger = logging.getLogger(loggingLoggerName) - aLogger.addHandler(syslogHandler) - - syslogData = "%s:%s" % (address[0], address[1]) - self.__add_to_handlers('syslog', loggerName, syslogHandler, - syslogData, loggingLevel) - - if addToLoggerNames: - for loggerName in addToLoggerNames: - add_syslog_handler(loggerName) - else: - for loggerName in self.__loggerNames: - add_syslog_handler(loggerName) - - - def add_smtp(self, mailHost, fromAddress, toAddresses, - level=defaultSmtpLevel, addToLoggerNames=None): - """Adds an SMTP handler to all defined loggers or a specified set of - loggers. - - mailHost - SMTP server to used when sending mail - fromAddress - email address to use as the from address when - sending mail - toAdresses - comma seperated list of email address to which - mail will be sent - level - cluster management log level - addToLoggerNames - tupple of logger names to which smtp handling - will be added""" - - def add_email_handler(loggerName): - if not self.__logObjs['smtp'].has_key(loggerName): - loggingLevel = self.__get_logging_level(level, - defaultSmtpLevel) - - subject = loggerName - if loggingLevel == 50: - subject = "%s - a critical error has occured." % subject - elif loggingLevel == 40: - subject = "%s - an error has occured." % subject - elif loggingLevel == 30: - subject = "%s - warning message." % subject - elif loggingLevel == 20: - subject = "%s - information message." % subject - elif loggingLevel == 10: - subject = "%s - debugging message." % subject - - mailHostTuple = get_address_tuple(mailHost) - emailHandler = logging.handlers.SMTPHandler(mailHostTuple, - fromAddress, toAddresses, subject) - - emailHandler.setFormatter(smtpFormater) - emailHandler.setLevel(loggingLevel) - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - aLogger = logging.getLogger(loggingLoggerName) - aLogger.addHandler(emailHandler) - - emailData = "%s from %s" % (mailHost, fromAddress) - self.__add_to_handlers('smtp', loggerName, emailHandler, - emailData, loggingLevel) - - if addToLoggerNames: - for loggerName in addToLoggerNames: - add_email_handler(loggerName) - else: - for loggerName in self.__loggerNames: - add_email_handler(loggerName) - - def status(self): - statusStruct = {} - for loggerName in self.__loggerNames.keys(): - statusStruct[loggerName] = [] - for handlerClass in self.__logObjs.keys(): - loggerDict = {} - try: - level = self.__logObjs[handlerClass][loggerName]['level'] - level = rehodLogLevelMap[level] - - loggerDict['handler'] = handlerClass - loggerDict['level'] = level - loggerDict['data'] = \ - self.__logObjs[handlerClass][loggerName]['data'] - except: - pass - else: - statusStruct[loggerName].append(loggerDict) - - return statusStruct - - def lock_handlers(self): - for handlerClass in self.__logObjs.keys(): - for loggerName in self.__logObjs[handlerClass].keys(): - self.__logObjs[handlerClass][loggerName]['handler'].acquire() - - def release_handlers(self): - for handlerClass in self.__logObjs.keys(): - for loggerName in self.__logObjs[handlerClass].keys(): - self.__logObjs[handlerClass][loggerName]['handler'].release() - - def get_level(self, handler, loggerName): - return rehodLogLevelMap[self.__logObjs[handler][loggerName]['level']] - - def set_level(self, handler, loggerName, level): - """Sets the logging level of a particular logger and logger handler. - - handler - handler (smtp, file, or stream) - loggerName - logger to set level on - level - level to set logger - """ - - level = self.__get_logging_level(level, defaultFileLevel) - self.__logObjs[handler][loggerName]['handler'].setLevel(level) - self.__logObjs[handler][loggerName]['level'] = level - - if handler == 'stream': - self.__logObjs[handler][loggerName]['handler'].setFormatter( - hodStreamFormatMap[int(level)]) - - def set_logger_level(self, loggerName, level): - status = 0 - for handlerClass in self.__logObjs.keys(): - if self.__logObjs[handlerClass].has_key(loggerName): - self.set_level(handlerClass, loggerName, level) - else: - status = 1 - - return status - - def rollover(self, loggerName): - status = 0 - if self.__logObjs['file'].has_key(loggerName): - if self.__logObjs['file'][loggerName]['handler'].shouldRollover(): - self.__logObjs['file'][loggerName]['handler'].doRollover() - else: - status = 1 - - return status - - def set_max_bytes(self, maxBytes): - status = 0 - if self.__logObjs.has_key('file'): - for loggerName in self.__logObjs['file'].keys(): - self.__logObjs['file'][loggerName]['handler'].maxBytes = 0 - else: - status = 1 - - return status - - def get_logger(self, loggerName): - """ Returns a hodLogger object for a logger by name. """ - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - return hodLogger(self.__appName, loggingLoggerName) - - def critical(self, loggerName, msg): - """Logs a critical message and flushes log buffers. This method really - should only be called upon a catastrophic failure. - - loggerName - logger to use - msg - message to be logged""" - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logger = logging.getLogger(loggingLoggerName) - logger.critical(msg) - self.flush() - - def error(self, loggerName, msg): - """Logs an error message and flushes log buffers. - - loggerName - logger to use - msg - message to be logged""" - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logger = logging.getLogger(loggingLoggerName) - logger.error(msg) - self.flush() - - def warn(self, loggerName, msg): - """Logs a warning message. - - loggerName - logger to use - msg - message to be logged""" - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logger = logging.getLogger(loggingLoggerName) - logger.warn(msg) - - def info(self, loggerName, msg): - """Logs an information message. - - loggerName - logger to use - msg - message to be logged""" - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logger = logging.getLogger(loggingLoggerName) - logger.info(msg) - - def debug(self, loggerName, msg): - """Logs a debugging message. - - loggerName - logger to use - msg - message to be logged""" - - loggingLoggerName = self.__get_logging_logger_name(loggerName) - logger = logging.getLogger(loggingLoggerName) - logger.debug(msg) - - def flush(self): - """Flush all log handlers.""" - - for handlerClass in self.__logObjs.keys(): - for loggerName in self.__logObjs[handlerClass].keys(): - self.__logObjs[handlerClass][loggerName]['handler'].flush() - - def shutdown(self): - """Shutdown all logging, flushing all buffers.""" - - for handlerClass in self.__logObjs.keys(): - for loggerName in self.__logObjs[handlerClass].keys(): - self.__logObjs[handlerClass][loggerName]['handler'].flush() - # Causes famous 'ValueError: I/O operation on closed file' - # self.__logObjs[handlerClass][loggerName]['handler'].close() - -class hodLogger: - """ Encapsulates a particular logger from a hodLog object. """ - def __init__(self, appName, loggingLoggerName): - """Constructs a hodLogger object (a particular logger in a hodLog - object). - - loggingLoggerName - name of a logger in hodLog object""" - - self.__appName = appName - self.__loggerName = loggingLoggerName - self.__logger = logging.getLogger(self.__loggerName) - - def __repr__(self): - """Returns a string representation of a hodComponentLog object.""" - - return "%s hodLog" % self.__loggerName - - def __call__(self): - pass - - def set_logger_level(self, loggerName, level): - - return hodLogs[self.__appName].set_logger_level(loggerName, level) - - def set_max_bytes(self, maxBytes): - - return hodLogs[self.__appName].set_max_bytes(maxBytes) - - def rollover(self): - return hodLogs[self.__appName].rollover(self.__loggerName) - - def get_level(self, handler, loggerName): - - return hodLogs[self.__appName].get_level(handler, loggerName) - - def critical(self, msg): - """Logs a critical message and calls sys.exit(1). - - msg - message to be logged""" - - self.__logger.critical(msg) - - def error(self, msg): - """Logs an error message. - - msg - message to be logged""" - - self.__logger.error(msg) - - def warn(self, msg): - """Logs a warning message. - - msg - message to be logged""" - - self.__logger.warn(msg) - - def info(self, msg): - """Logs an information message. - - msg - message to be logged""" - - self.__logger.info(msg) - - def debug(self, msg): - """Logs a debugging message. - - msg - message to be logged""" - - self.__logger.debug(msg) - -class hodDummyLogger: - """ Dummy hodLogger class. Other hod classes requiring a hodLogger default - to this hodLogger if no logger is passed.""" - - def __init__(self): - """pass""" - - pass - - def __repr__(self): - return "dummy hodLogger" - - def __call__(self): - """pass""" - - pass - - def set_logger_level(self, loggerName, level): - - return 0 - - def set_max_bytes(self, loggerName, maxBytes): - - return 0 - - def get_level(self, handler, loggerName): - - return 4 - - def rollover(self): - - return 0 - - def critical(self, msg): - """pass""" - - pass - - def error(self, msg): - """pass""" - - pass - - def warn(self, msg): - """pass""" - - pass - - def info(self, msg): - """pass""" - - pass - - def debug(self, msg): - """pass""" - - pass - -def ensureLogDir(logDir): - """Verify that the passed in log directory exists, and if it doesn't - create it.""" - if not os.path.exists(logDir): - try: - old_mask = os.umask(0) - os.makedirs(logDir, 01777) - os.umask(old_mask) - except Exception, e: - print >>sys.stderr, "Could not create log directories %s. Exception: %s. Stack Trace: %s" % (logDir, get_exception_error_string(), get_exception_string()) - raise e - -def getLogger(cfg, logName): - if cfg['debug'] > 0: - user = cfg['userid'] - baseLogger = hodLog(logName) - log = baseLogger.add_logger('main') - - if cfg.has_key('log-dir'): - serviceId = os.getenv('PBS_JOBID') - if serviceId: - logDir = os.path.join(cfg['log-dir'], "%s.%s" % (user, serviceId)) - else: - logDir = os.path.join(cfg['log-dir'], user) - if not os.path.exists(logDir): - os.mkdir(logDir) - - baseLogger.add_file(logDirectory=logDir, level=cfg['debug'], - addToLoggerNames=('main',)) - - try: - if cfg.has_key('stream') and cfg['stream']: - baseLogger.add_stream(level=cfg['debug'], addToLoggerNames=('main',)) - - if cfg.has_key('syslog-address'): - baseLogger.add_syslog(cfg['syslog-address'], - level=cfg['debug'], addToLoggerNames=('main',)) - except Exception,e: - # Caught an exception while initialising logger - log.critical("%s Logger failed to initialise. Reason : %s" % (logName, e)) - pass - return log diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py deleted file mode 100644 index 34a0fd0124..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py +++ /dev/null @@ -1,45 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import urllib, urlparse, re - -from HTMLParser import HTMLParser - -class miniHTMLParser( HTMLParser ): - - viewedQueue = [] - instQueue = [] - - def setBaseUrl(self, url): - self.baseUrl = url - - def getNextLink( self ): - if self.instQueue == []: - return None - else: - return self.instQueue.pop(0) - - def handle_starttag( self, tag, attrs ): - if tag == 'a': - newstr = urlparse.urljoin(self.baseUrl, str(attrs[0][1])) - if re.search('mailto', newstr) != None: - return - - if (newstr in self.viewedQueue) == False: - self.instQueue.append( newstr ) - self.viewedQueue.append( newstr ) - - - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py deleted file mode 100644 index d733780ec1..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py +++ /dev/null @@ -1,26 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -from hodlib.NodePools.torque import TorquePool - -class NodePoolUtil: - def getNodePool(nodePoolDesc, cfg, log): - """returns a concrete instance of NodePool as configured by 'cfg'""" - npd = nodePoolDesc - name = npd.getName() - if name == 'torque': - return TorquePool(npd, cfg, log) - - getNodePool = staticmethod(getNodePool) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py deleted file mode 100644 index 791b095c9b..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py +++ /dev/null @@ -1,1058 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $ -# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $ -# -#------------------------------------------------------------------------------ - -"""'setup' provides for reading and verifing configuration files based on - Python's SafeConfigParser class.""" - -import sys, os, re, pprint - -from ConfigParser import SafeConfigParser -from optparse import OptionParser, IndentedHelpFormatter, OptionGroup -from util import get_perms, replace_escapes -from types import typeValidator, typeValidatorInstance, is_valid_type, \ - typeToString -from hodlib.Hod.hod import hodHelp - -reEmailAddress = re.compile("^.*@.*$") -reEmailDelimit = re.compile("@") -reComma = re.compile("\s*,\s*") -reDot = re.compile("\.") -reCommentHack = re.compile("^.*?\s+#|;.*", flags=re.S) -reCommentNewline = re.compile("\n|\r$") -reKeyVal = r"(?<!\\)=" -reKeyVal = re.compile(reKeyVal) -reKeyValList = r"(?<!\\)," -reKeyValList = re.compile(reKeyValList) - -errorPrefix = 'error' -requiredPerms = '0660' - -class definition: - def __init__(self): - """Generates a configuration definition object.""" - self.__def = {} - self.__defOrder = [] - - def __repr__(self): - return pprint.pformat(self.__def) - - def __getitem__(self, section): - return self.__def[section] - - def __iter__(self): - return iter(self.__def) - - def sections(self): - """Returns a list of sections/groups.""" - - if len(self.__defOrder): - return self.__defOrder - else: - return self.__def.keys() - - def add_section(self, section): - """Add a configuration section / option group.""" - - if self.__def.has_key(section): - raise Exception("Section already exists: '%s'" % section) - else: - self.__def[section] = {} - - def add_def(self, section, var, type, desc, help = True, default = None, - req = True, validate = True, short = None): - """ Add a variable definition. - - section - section name - var - variable name - type - valid hodlib.types - desc - description of variable - help - display help for this variable - default - default value - req - bool, requried? - validate - bool, validate type value? - short - short symbol (1 character), - help - bool, display help?""" - - if self.__def.has_key(section): - if not is_valid_type(type): - raise Exception("Type (type) is invalid: %s.%s - '%s'" % (section, var, - type)) - if not isinstance(desc, str): - raise Exception("Description (desc) must be a string: %s.%s - '%s'" % ( - section, var, desc)) - if not isinstance(req, bool): - raise Exception("Required (req) must be a bool: %s.%s - '%s'" % (section, - var, - req)) - if not isinstance(validate, bool): - raise Exception("Validate (validate) must be a bool: %s.%s - '%s'" % ( - section, var, validate)) - - if self.__def[section].has_key(var): - raise Exception("Variable name already defined: '%s'" % var) - else: - self.__def[section][var] = { 'type' : type, - 'desc' : desc, - 'help' : help, - 'default' : default, - 'req' : req, - 'validate' : validate, - 'short' : short } - else: - raise Exception("Section does not exist: '%s'" % section) - - def add_defs(self, defList, defOrder=None): - """ Add a series of definitions. - - defList = { section0 : ((name0, - type0, - desc0, - help0, - default0, - req0, - validate0, - short0), - .... - (nameN, - typeN, - descN, - helpN, - defaultN, - reqN, - validateN, - shortN)), - .... - - sectionN : ... } - - Where the short synmbol is optional and can only be one char.""" - - for section in defList.keys(): - self.add_section(section) - for defTuple in defList[section]: - if isinstance(defTuple, tuple): - if len(defTuple) < 7: - raise Exception( - "section %s is missing an element: %s" % ( - section, pprint.pformat(defTuple))) - else: - raise Exception("section %s of defList is not a tuple" % - section) - - if len(defTuple) == 7: - self.add_def(section, defTuple[0], defTuple[1], - defTuple[2], defTuple[3], defTuple[4], - defTuple[5], defTuple[6]) - else: - self.add_def(section, defTuple[0], defTuple[1], - defTuple[2], defTuple[3], defTuple[4], - defTuple[5], defTuple[6], defTuple[7]) - if defOrder: - for section in defOrder: - if section in self.__def: - self.__defOrder.append(section) - - for section in self.__def: - if not section in defOrder: - raise Exception( - "section %s is missing from specified defOrder." % - section) - -class baseConfig: - def __init__(self, configDef, originalDir=None): - self.__toString = typeToString() - self.__validated = False - self._configDef = configDef - self._options = None - self._mySections = [] - self._dict = {} - self.configFile = None - self.__originalDir = originalDir - - if self._configDef: - self._mySections = configDef.sections() - - def __repr__(self): - """Returns a string representation of a config object including all - normalizations.""" - - print_string = ''; - for section in self._mySections: - print_string = "%s[%s]\n" % (print_string, section) - options = self._dict[section].keys() - for option in options: - print_string = "%s%s = %s\n" % (print_string, option, - self._dict[section][option]) - - print_string = "%s\n" % (print_string) - - print_string = re.sub("\n\n$", "", print_string) - - return print_string - - def __getitem__(self, section): - """ Returns a dictionary of configuration name and values by section. - """ - return self._dict[section] - - def __setitem__(self, section, value): - self._dict[section] = value - - def __iter__(self): - return iter(self._dict) - - def has_key(self, section): - status = False - if section in self._dict: - status = True - - return status - - # Prints configuration error messages - def var_error(self, section, option, *addData): - errorStrings = [] - if not self._dict[section].has_key(option): - self._dict[section][option] = None - errorStrings.append("%s: invalid '%s' specified in section %s (--%s.%s): %s" % ( - errorPrefix, option, section, section, option, self._dict[section][option])) - - if addData: - errorStrings.append("%s: additional info: %s\n" % (errorPrefix, - addData[0])) - return errorStrings - - def var_error_suggest(self, errorStrings): - if self.configFile: - errorStrings.append("Check your command line options and/or " + \ - "your configuration file %s" % self.configFile) - - def __get_args(self, section): - def __dummyToString(type, value): - return value - - toString = __dummyToString - if self.__validated: - toString = self.__toString - - args = [] - if isinstance(self._dict[section], dict): - for option in self._dict[section]: - if section in self._configDef and \ - option in self._configDef[section]: - if self._configDef[section][option]['type'] == 'bool': - if self._dict[section][option] == 'True' or \ - self._dict[section][option] == True: - args.append("--%s.%s" % (section, option)) - else: - args.append("--%s.%s" % (section, option)) - args.append(toString( - self._configDef[section][option]['type'], - self._dict[section][option])) - else: - if section in self._configDef: - if self._configDef[section][option]['type'] == 'bool': - if self._dict[section] == 'True' or \ - self._dict[section] == True: - args.append("--%s" % section) - else: - if self._dict[section] != 'config': - args.append("--%s" % section) - args.append(toString(self._configDef[section]['type'], - self._dict[section])) - - return args - - def values(self): - return self._dict.values() - - def keys(self): - return self._dict.keys() - - def get_args(self, exclude=None, section=None): - """Retrieve a tuple of config arguments.""" - - args = [] - if section: - args = self.__get_args(section) - else: - for section in self._dict: - if exclude: - if not section in exclude: - args.extend(self.__get_args(section)) - else: - args.extend(self.__get_args(section)) - - return tuple(args) - - def verify(self): - """Verifies each configuration variable, using the configValidator - class, based on its type as defined by the dictionary configDef. - Upon encountering a problem an error is printed to STDERR and - false is returned.""" - - oldDir = os.getcwd() - if self.__originalDir: - os.chdir(self.__originalDir) - - status = True - statusMsgs = [] - - if self._configDef: - errorCount = 0 - configValidator = typeValidator(self.__originalDir) - - # foreach section and option by type string as defined in configDef - # add value to be validated to validator - for section in self._mySections: - for option in self._configDef[section].keys(): - configVarName = "%s.%s" % (section, option) - - if self._dict[section].has_key(option): - if self._configDef[section][option].has_key('validate'): - if self._configDef[section][option]['validate']: - # is the section.option needed to be validated? - configValidator.add(configVarName, - self._configDef[section][option]['type'], - self._dict[section][option]) - else: - # If asked not to validate, just normalize - self[section][option] = \ - configValidator.normalize( - self._configDef[section][option]['type'], - self._dict[section][option]) - if self._configDef[section][option]['default'] != \ - None: - self._configDef[section][option]['default'] = \ - configValidator.normalize( - self._configDef[section][option]['type'], - self._configDef[section][option]['default'] - ) - self._configDef[section][option]['default'] = \ - self.__toString( - self._configDef[section][option]['type'], - self._configDef[section][option]['default'] - ) - else: - # This should not happen. Just in case, take this as 'to be validated' case. - configValidator.add(configVarName, - self._configDef[section][option]['type'], - self._dict[section][option]) - elif self._configDef[section][option]['req']: - statusMsgs.append("%s: %s.%s is not defined." - % (errorPrefix, section, option)) - errorCount = errorCount + 1 - - configValidator.validate() - - for valueInfo in configValidator.validatedInfo: - sectionsOptions = reDot.split(valueInfo['name']) - - if valueInfo['isValid'] == 1: - self._dict[sectionsOptions[0]][sectionsOptions[1]] = \ - valueInfo['normalized'] - else: - if valueInfo['errorData']: - statusMsgs.extend(self.var_error(sectionsOptions[0], - sectionsOptions[1], valueInfo['errorData'])) - else: - statusMsgs.extend(self.var_error(sectionsOptions[0], - sectionsOptions[1])) - errorCount = errorCount + 1 - - if errorCount > 1: - statusMsgs.append( "%s: %s problems found." % ( - errorPrefix, errorCount)) - self.var_error_suggest(statusMsgs) - status = False - elif errorCount > 0: - statusMsgs.append( "%s: %s problem found." % ( - errorPrefix, errorCount)) - self.var_error_suggest(statusMsgs) - status = False - - self.__validated = True - - if self.__originalDir: - os.chdir(oldDir) - - return status,statusMsgs - - def normalizeValue(self, section, option) : - return typeValidatorInstance.normalize( - self._configDef[section][option]['type'], - self[section][option]) - - def validateValue(self, section, option): - # Validates a section.option and exits on error - valueInfo = typeValidatorInstance.verify( - self._configDef[section][option]['type'], - self[section][option]) - if valueInfo['isValid'] == 1: - return [] - else: - if valueInfo['errorData']: - return self.var_error(section, option, valueInfo['errorData']) - else: - return self.var_error(section, option) - -class config(SafeConfigParser, baseConfig): - def __init__(self, configFile, configDef=None, originalDir=None, - options=None, checkPerms=False): - """Constructs config object. - - configFile - configuration file to read - configDef - definition object - options - options object - checkPerms - check file permission on config file, 0660 - - sample configuration file: - - [snis] - modules_dir = modules/ ; location of infoModules - md5_defs_dir = etc/md5_defs ; location of infoTree md5 defs - info_store = var/info ; location of nodeInfo store - cam_daemon = localhost:8200 ; cam daemon address""" - - - SafeConfigParser.__init__(self) - baseConfig.__init__(self, configDef, originalDir) - - if(os.path.exists(configFile)): - self.configFile = configFile - else: - raise IOError - - self._options = options - - ## UNUSED CODE : checkPerms is never True - ## zim: this code is used if one instantiates config() with checkPerms set to - ## True. - if checkPerms: self.__check_perms() - - self.read(configFile) - - self._configDef = configDef - if not self._configDef: - self._mySections = self.sections() - - self.__initialize_config_dict() - - def __initialize_config_dict(self): - """ build a dictionary of config vars keyed by section name defined in - configDef, if options defined override config""" - - for section in self._mySections: - items = self.items(section) - self._dict[section] = {} - - # First fill self._dict with whatever is given in hodrc. - # Going by this, options given at the command line either override - # options in hodrc, or get appended to the list, like for - # hod.client-params. Note that after this dict has _only_ hodrc - # params - for keyValuePair in items: - # stupid commenting bug in ConfigParser class, lines without an - # option value pair or section required that ; or # are at the - # beginning of the line, :( - newValue = reCommentHack.sub("", keyValuePair[1]) - newValue = reCommentNewline.sub("", newValue) - self._dict[section][keyValuePair[0]] = newValue - # end of filling with options given in hodrc - # now start filling in command line options - if self._options: - for option in self._configDef[section].keys(): - if self._options[section].has_key(option): - # the user has given an option - compoundOpt = "%s.%s" %(section,option) - if ( compoundOpt == \ - 'gridservice-mapred.final-server-params' \ - or compoundOpt == \ - 'gridservice-hdfs.final-server-params' \ - or compoundOpt == \ - 'gridservice-mapred.server-params' \ - or compoundOpt == \ - 'gridservice-hdfs.server-params' \ - or compoundOpt == \ - 'hod.client-params' ): - - if ( compoundOpt == \ - 'gridservice-mapred.final-server-params' \ - or compoundOpt == \ - 'gridservice-hdfs.final-server-params' ): - overwrite = False - else: overwrite = True - - # Append to the current list of values in self._dict - if not self._dict[section].has_key(option): - self._dict[section][option] = "" - dictOpts = reKeyValList.split(self._dict[section][option]) - dictOptsKeyVals = {} - for opt in dictOpts: - if opt != '': - # when dict _has_ params from hodrc - if reKeyVal.search(opt): - (key, val) = reKeyVal.split(opt,1) - # we only consider the first '=' for splitting - # we do this to support passing params like - # mapred.child.java.opts=-Djava.library.path=some_dir - # Even in case of an invalid error like unescaped '=', - # we don't want to fail here itself. We leave such errors - # to be caught during validation which happens after this - dictOptsKeyVals[key] = val - else: - # this means an invalid option. Leaving it - #for config.verify to catch - dictOptsKeyVals[opt] = None - - cmdLineOpts = reKeyValList.split(self._options[section][option]) - - for opt in cmdLineOpts: - if reKeyVal.search(opt): - # Same as for hodrc options. only consider - # the first = - ( key, val ) = reKeyVal.split(opt,1) - else: - key = opt - val = None - # whatever is given at cmdline overrides - # what is given in hodrc only for non-final params - if dictOptsKeyVals.has_key(key): - if overwrite: - dictOptsKeyVals[key] = val - else: dictOptsKeyVals[key] = val - - self._dict[section][option] = "" - for key in dictOptsKeyVals: - if self._dict[section][option] == "": - if dictOptsKeyVals[key]: - self._dict[section][option] = key + "=" + \ - dictOptsKeyVals[key] - else: #invalid option. let config.verify catch - self._dict[section][option] = key - else: - if dictOptsKeyVals[key]: - self._dict[section][option] = \ - self._dict[section][option] + "," + key + \ - "=" + dictOptsKeyVals[key] - else: #invalid option. let config.verify catch - self._dict[section][option] = \ - self._dict[section][option] + "," + key - - else: - # for rest of the options, that don't need - # appending business. - # options = cmdline opts + defaults - # dict = hodrc opts only - # only non default opts can overwrite any opt - # currently in dict - if not self._dict[section].has_key(option): - # options not mentioned in hodrc - self._dict[section][option] = \ - self._options[section][option] - elif self._configDef[section][option]['default'] != \ - self._options[section][option]: - # option mentioned in hodrc but user has given a - # non-default option - self._dict[section][option] = \ - self._options[section][option] - - ## UNUSED METHOD - ## zim: is too :) - def __check_perms(self): - perms = None - if self._options: - try: - perms = get_perms(self.configFile) - except OSError, data: - self._options.print_help() - raise Exception("*** could not find config file: %s" % data) - sys.exit(1) - else: - perms = get_perms(self.configFile) - - if perms != requiredPerms: - error = "*** '%s' has invalid permission: %s should be %s\n" % \ - (self.configFile, perms, requiredPerms) - raise Exception( error) - sys.exit(1) - - def replace_escape_seqs(self): - """ replace any escaped characters """ - replace_escapes(self) - -class formatter(IndentedHelpFormatter): - def format_option_strings(self, option): - """Return a comma-separated list of option strings & metavariables.""" - if option.takes_value(): - metavar = option.metavar or option.dest.upper() - short_opts = [sopt - for sopt in option._short_opts] - long_opts = [self._long_opt_fmt % (lopt, metavar) - for lopt in option._long_opts] - else: - short_opts = option._short_opts - long_opts = option._long_opts - - if self.short_first: - opts = short_opts + long_opts - else: - opts = long_opts + short_opts - - return ", ".join(opts) - -class options(OptionParser, baseConfig): - - def __init__(self, optionDef, usage, version, originalDir=None, - withConfig=False, defaultConfig=None, defaultLocation=None, - name=None): - """Constructs and options object. - - optionDef - definition object - usage - usage statement - version - version string - withConfig - used in conjunction with a configuration file - defaultConfig - default configuration file - - """ - OptionParser.__init__(self, usage=usage) - baseConfig.__init__(self, optionDef, originalDir) - - self.formatter = formatter(4, max_help_position=100, width=180, - short_first=1) - - self.__name = name - self.__version = version - self.__withConfig = withConfig - self.__defaultConfig = defaultConfig - self.__defaultLoc = defaultLocation - self.args = [] - self.__optionList = [] - self.__compoundOpts = [] - self.__shortMap = {} - self.__alphaString = 'abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVXYZ1234567890' - self.__alpha = [] - self.__parsedOptions = {} - self.__reserved = [ 'h' ] - - self.__orig_grps = [] - self.__orig_grp_lists = {} - self.__orig_option_list = [] - - self.__display_grps = [] - self.__display_grp_lists = {} - self.__display_option_list = [] - - self.config = None - - if self.__withConfig: - self.__reserved.append('c') - self.__reserved.append('v') - - self.__gen_alpha() - - # build self.__optionList, so it contains all the options that are - # possible. the list elements are of the form section.option - for section in self._mySections: - if self.__withConfig and section == 'config': - raise Exception( - "withConfig set 'config' cannot be used as a section name") - for option in self._configDef[section].keys(): - if '.' in option: - raise Exception("Options cannot contain: '.'") - elif self.__withConfig and option == 'config': - raise Exception( - "With config set, option config is not allowed.") - elif self.__withConfig and option == 'verbose-help': - raise Exception( - "With config set, option verbose-help is not allowed.") - self.__optionList.append(self.__splice_compound(section, - option)) - self.__build_short_map() - self.__add_options() - self.__init_display_options() - - (self.__parsedOptions, self.args) = self.parse_args() - - # Now process the positional arguments only for the client side - if self.__name == 'hod': - - hodhelp = hodHelp() - - _operation = getattr(self.__parsedOptions,'hod.operation') - _script = getattr(self.__parsedOptions, 'hod.script') - nArgs = self.args.__len__() - if _operation: - # -o option is given - if nArgs != 0: - self.error('invalid syntax : command and operation(-o) cannot coexist') - elif nArgs == 0 and _script: - # for a script option, without subcommand: hod -s script ... - pass - elif nArgs == 0: - print "Usage: ",hodhelp.help() - sys.exit(0) - else: - # subcommand is given - cmdstr = self.args[0] # the subcommand itself - cmdlist = hodhelp.ops - if cmdstr not in cmdlist: - print "Usage: ", hodhelp.help() - sys.exit(2) - - numNodes = None - clusterDir = None - # Check which subcommand. cmdstr = subcommand itself now. - if cmdstr == "allocate": - clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') - numNodes = getattr(self.__parsedOptions, 'hod.nodecount') - - if not clusterDir or not numNodes: - print hodhelp.usage(cmdstr) - sys.exit(3) - - cmdstr = cmdstr + ' ' + clusterDir + ' ' + numNodes - - setattr(self.__parsedOptions,'hod.operation', cmdstr) - - elif cmdstr == "deallocate" or cmdstr == "info": - clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') - - if not clusterDir: - print hodhelp.usage(cmdstr) - sys.exit(3) - - cmdstr = cmdstr + ' ' + clusterDir - setattr(self.__parsedOptions,'hod.operation', cmdstr) - - elif cmdstr == "list": - setattr(self.__parsedOptions,'hod.operation', cmdstr) - pass - - elif cmdstr == "script": - clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') - numNodes = getattr(self.__parsedOptions, 'hod.nodecount') - originalDir = getattr(self.__parsedOptions, 'hod.original-dir') - - if originalDir and clusterDir: - self.remove_exit_code_file(originalDir, clusterDir) - - if not _script or not clusterDir or not numNodes: - print hodhelp.usage(cmdstr) - sys.exit(3) - pass - - elif cmdstr == "help": - if nArgs == 1: - self.print_help() - sys.exit(0) - elif nArgs != 2: - self.print_help() - sys.exit(3) - elif self.args[1] == 'options': - self.print_options() - sys.exit(0) - cmdstr = cmdstr + ' ' + self.args[1] - setattr(self.__parsedOptions,'hod.operation', cmdstr) - - # end of processing for arguments on the client side - - if self.__withConfig: - self.config = self.__parsedOptions.config - if not self.config: - self.error("configuration file must be specified") - if not os.path.isabs(self.config): - # A relative path. Append the original directory which would be the - # current directory at the time of launch - try: - origDir = getattr(self.__parsedOptions, 'hod.original-dir') - if origDir is not None: - self.config = os.path.join(origDir, self.config) - self.__parsedOptions.config = self.config - except AttributeError, e: - self.error("hod.original-dir is not defined.\ - Cannot get current directory") - if not os.path.exists(self.config): - if self.__defaultLoc and not re.search("/", self.config): - self.__parsedOptions.config = os.path.join( - self.__defaultLoc, self.config) - self.__build_dict() - - def norm_cluster_dir(self, orig_dir, directory): - directory = os.path.expanduser(directory) - if not os.path.isabs(directory): - directory = os.path.join(orig_dir, directory) - directory = os.path.abspath(directory) - - return directory - - def remove_exit_code_file(self, orig_dir, dir): - try: - dir = self.norm_cluster_dir(orig_dir, dir) - if os.path.exists(dir): - exit_code_file = os.path.join(dir, "script.exitcode") - if os.path.exists(exit_code_file): - os.remove(exit_code_file) - except: - print >>sys.stderr, "Could not remove the script.exitcode file." - - def __init_display_options(self): - self.__orig_option_list = self.option_list[:] - optionListTitleMap = {} - for option in self.option_list: - optionListTitleMap[option._long_opts[0]] = option - - self.__orig_grps = self.option_groups[:] - for group in self.option_groups: - self.__orig_grp_lists[group.title] = group.option_list[:] - - groupTitleMap = {} - optionTitleMap = {} - for group in self.option_groups: - groupTitleMap[group.title] = group - optionTitleMap[group.title] = {} - for option in group.option_list: - (sectionName, optionName) = \ - self.__split_compound(option._long_opts[0]) - optionTitleMap[group.title][optionName] = option - - for section in self._mySections: - for option in self._configDef[section]: - if self._configDef[section][option]['help']: - if groupTitleMap.has_key(section): - if not self.__display_grp_lists.has_key(section): - self.__display_grp_lists[section] = [] - self.__display_grp_lists[section].append( - optionTitleMap[section][option]) - - try: - self.__display_option_list.append( - optionListTitleMap["--" + self.__splice_compound( - section, option)]) - except KeyError: - pass - try: - self.__display_option_list.append(optionListTitleMap['--config']) - except KeyError: - pass - - self.__display_option_list.append(optionListTitleMap['--help']) - self.__display_option_list.append(optionListTitleMap['--verbose-help']) - self.__display_option_list.append(optionListTitleMap['--version']) - - self.__display_grps = self.option_groups[:] - for section in self._mySections: - if self.__display_grp_lists.has_key(section): - self.__orig_grp_lists[section] = \ - groupTitleMap[section].option_list - else: - try: - self.__display_grps.remove(groupTitleMap[section]) - except KeyError: - pass - - def __gen_alpha(self): - assignedOptions = [] - for section in self._configDef: - for option in self._configDef[section]: - if self._configDef[section][option]['short']: - assignedOptions.append( - self._configDef[section][option]['short']) - - for symbol in self.__alphaString: - if not symbol in assignedOptions: - self.__alpha.append(symbol) - - def __splice_compound(self, section, option): - return "%s.%s" % (section, option) - - def __split_compound(self, compound): - return compound.split('.') - - def __build_short_map(self): - """ build a short_map of parametername : short_option. This is done - only for those parameters that don't have short options already - defined in configDef. - If possible, the first letter in the option that is not already - used/reserved as a short option is allotted. Otherwise the first - letter in __alpha that isn't still used is allotted. - e.g. { 'hodring.java-home': 'T', 'resource_manager.batch-home': 'B' } - """ - - optionsKey = {} - for compound in self.__optionList: - (section, option) = self.__split_compound(compound) - if not optionsKey.has_key(section): - optionsKey[section] = [] - optionsKey[section].append(option) - - for section in self._configDef.sections(): - options = optionsKey[section] - options.sort() - for option in options: - if not self._configDef[section][option]['short']: - compound = self.__splice_compound(section, option) - shortOptions = self.__shortMap.values() - for i in range(0, len(option)): - letter = option[i] - letter = letter.lower() - if letter in self.__alpha: - if not letter in shortOptions and \ - not letter in self.__reserved: - self.__shortMap[compound] = letter - break - if not self.__shortMap.has_key(compound): - for i in range(0, len(self.__alpha)): - letter = self.__alpha[i] - if not letter in shortOptions and \ - not letter in self.__reserved: - self.__shortMap[compound] = letter - - def __add_option(self, config, compoundOpt, section, option, group=None): - addMethod = self.add_option - if group: addMethod=group.add_option - - self.__compoundOpts.append(compoundOpt) - - if compoundOpt == 'gridservice-mapred.final-server-params' or \ - compoundOpt == 'gridservice-hdfs.final-server-params' or \ - compoundOpt == 'gridservice-mapred.server-params' or \ - compoundOpt == 'gridservice-hdfs.server-params' or \ - compoundOpt == 'hod.client-params': - _action = 'append' - elif config[section][option]['type'] == 'bool': - _action = 'store_true' - else: - _action = 'store' - - if self.__shortMap.has_key(compoundOpt): - addMethod("-" + self.__shortMap[compoundOpt], - "--" + compoundOpt, dest=compoundOpt, - action= _action, - metavar=config[section][option]['type'], - default=config[section][option]['default'], - help=config[section][option]['desc']) - else: - if config[section][option]['short']: - addMethod("-" + config[section][option]['short'], - "--" + compoundOpt, dest=compoundOpt, - action= _action, - metavar=config[section][option]['type'], - default=config[section][option]['default'], - help=config[section][option]['desc']) - else: - addMethod('', "--" + compoundOpt, dest=compoundOpt, - action= _action, - metavar=config[section][option]['type'], - default=config[section][option]['default'], - help=config[section][option]['desc']) - - def __add_options(self): - if self.__withConfig: - self.add_option("-c", "--config", dest='config', - action='store', default=self.__defaultConfig, - metavar='config_file', - help="Full path to configuration file.") - - self.add_option("", "--verbose-help", - action='help', default=None, - metavar='flag', - help="Display verbose help information.") - - self.add_option("-v", "--version", - action='version', default=None, - metavar='flag', - help="Display version information.") - - self.version = self.__version - - if len(self._mySections) > 1: - for section in self._mySections: - group = OptionGroup(self, section) - for option in self._configDef[section]: - compoundOpt = self.__splice_compound(section, option) - self.__add_option(self._configDef, compoundOpt, section, - option, group) - self.add_option_group(group) - else: - for section in self._mySections: - for option in self._configDef[section]: - compoundOpt = self.__splice_compound(section, option) - self.__add_option(self._configDef, compoundOpt, section, - option) - - def __build_dict(self): - if self.__withConfig: - self._dict['config'] = str(getattr(self.__parsedOptions, 'config')) - for compoundOption in dir(self.__parsedOptions): - if compoundOption in self.__compoundOpts: - (section, option) = self.__split_compound(compoundOption) - if not self._dict.has_key(section): - self._dict[section] = {} - - if getattr(self.__parsedOptions, compoundOption): - _attr = getattr(self.__parsedOptions, compoundOption) - # when we have multi-valued parameters passed separately - # from command line, python optparser pushes them into a - # list. So converting all such lists to strings - if type(_attr) == type([]): - import string - _attr = string.join(_attr,',') - self._dict[section][option] = _attr - - for section in self._configDef: - for option in self._configDef[section]: - if self._configDef[section][option]['type'] == 'bool': - compoundOption = self.__splice_compound(section, option) - if not self._dict.has_key(section): - self._dict[section] = {} - - if option not in self._dict[section]: - self._dict[section][option] = False - - def __set_display_groups(self): - if not '--verbose-help' in sys.argv: - self.option_groups = self.__display_grps - self.option_list = self.__display_option_list - for group in self.option_groups: - group.option_list = self.__display_grp_lists[group.title] - - def __unset_display_groups(self): - if not '--verbose-help' in sys.argv: - self.option_groups = self.__orig_grps - self.option_list = self.__orig_option_list - for group in self.option_groups: - group.option_list = self.__orig_grp_lists[group.title] - - def print_help(self, file=None): - self.__set_display_groups() - OptionParser.print_help(self, file) - self.__unset_display_groups() - - def print_options(self): - _usage = self.usage - self.set_usage('') - self.print_help() - self.set_usage(_usage) - - def verify(self): - return baseConfig.verify(self) - - def replace_escape_seqs(self): - replace_escapes(self) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py deleted file mode 100644 index 72dbd69569..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py +++ /dev/null @@ -1,621 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# Various socket server and helper classes. -# -# -import os, sys, socket, threading, pprint, re, xmlrpclib, time - -from select import select -from SocketServer import ThreadingMixIn, ForkingMixIn -from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -from SimpleXMLRPCServer import SimpleXMLRPCRequestHandler, SimpleXMLRPCServer -from SimpleHTTPServer import SimpleHTTPRequestHandler -from random import Random -from urlparse import urlparse - -Fault = xmlrpclib.Fault - -from hodlib.Common.util import local_fqdn -from hodlib.Common.logger import hodDummyLogger - -class hodHTTPHandler(BaseHTTPRequestHandler): - port = -1 - - def __init__(self, request, client_address, server, registerService): - self.registerService = registerService - BaseHTTPRequestHandler.__init__(self, request, client_address, server) - - def log_message(self, *args): - """Forget logging for now.""" - - pass - - def do_GET(self): - self.fullUrl = "http://%s:%s%s" % (self.server.server_address[0], - self.server.server_address[1], - self.path) - - parsedUrl = urlparse(self.fullUrl) - self.writeHeaders() - self.writeData(parsedUrl) - - def w(self, string): - self.wfile.write("%s\n" % string) - - def writeHeaders(self): - self.send_response(200, 'OK') - self.send_header('Content-type', 'text/html') - self.end_headers() - - def sendWrongPage(self, userJob): - self.w('<font class="alert">') - if userJob == False: - self.w('invalid URL specified') - elif re.match("^\d+$", userJob): - self.w('invalid URL specified, job <b>%s</b> does not exist' % userJob) - elif re.match("^\w+$", userJob): - self.w('invalid URL specified, user <b>%s</b> does not exist' % userJob) - self.w('</font>') - - def getServiceHosts(self, serviceInfo): - hostInfo = { 'long' : {}, 'short' : {} } - for user in serviceInfo: - for job in serviceInfo[user]: - for host in serviceInfo[user][job]: - for serviceItem in serviceInfo[user][job][host]: - serviceName = serviceItem.keys() - serviceName = serviceName[0] - if isinstance(serviceItem[serviceName], str): - hostInfo['short'][self.getJobKey(user, job, host)] = True - hostInfo['long'][self.getJobKey(user, job, host)] = True - - return hostInfo - - def getJobInfo(self, job, serviceInfo): - jobInfo = {} - - for user in serviceInfo.keys(): - for someJob in serviceInfo[user].keys(): - if job == someJob: - jobInfo[user] = { job : serviceInfo[user][job] } - - return jobInfo - - def getJobKey(self, user, job, host): - return "%s-%s-%s" % (user, job, host) - - def writeData(self, parsedUrl): - options = parsedUrl[4] - serviceInfo = self.server.service.getServiceInfo() - users = serviceInfo.keys() - users.sort() - - self.w("<html>") - self.w("<body>") - self.w("<head>") - self.writeCSS() - self.w("</head>") - self.w('<font class="header2">HOD Service Registry Information</font>') - if serviceInfo == {}: - self.w('<br><br><font class="header"> No HOD clusters configured.</font>') - else: - if parsedUrl[2] == '/': - self.w(' <table class="main">') - count = 0 - for user in users: - self.writeUserData(user, options, serviceInfo, count) - count = count + 1 - elif parsedUrl[2][1:] in serviceInfo: - self.w(' <table class="main">') - self.writeUserData(parsedUrl[2][1:], options, serviceInfo, 0) - elif re.match("^\d+$", parsedUrl[2][1:]): - jobInfo = self.getJobInfo(parsedUrl[2][1:], serviceInfo) - if jobInfo.keys(): - self.w(' <table class="main">') - for user in jobInfo.keys(): - self.writeUserData(user, options, jobInfo, 0) - else: - self.sendWrongPage(parsedUrl[2][1:]) - self.w(' <table class="main">') - count = 0 - for user in users: - self.writeUserData(user, options, serviceInfo, count) - count = count + 1 - elif re.match("^\w+$", parsedUrl[2][1:]): - self.sendWrongPage(parsedUrl[2][1:]) - self.w(' <table class="main">') - count = 0 - for user in users: - self.writeUserData(user, options, serviceInfo, count) - count = count + 1 - else: - self.sendWrongPage(False) - self.w(' <table class="main">') - count = 0 - for user in users: - self.writeUserData(user, options, serviceInfo, count) - count = count + 1 - - self.w('</table>') - self.w("</pre>") - self.w("</body>") - self.w("</html>") - - def writeCSS(self): - self.w('<style type="text/css">') - - self.w('table.main { border: 0px; padding: 1; background-color: #E1ECE0; width: 70%; margin: 10; }') - self.w('table.sub1 { background-color: #F1F1F1; padding: 0; }') - self.w('table.sub2 { background-color: #FFFFFF; padding: 0; }') - self.w('table.sub3 { border: 1px solid #EEEEEE; background-color: #FFFFFF; padding: 0; }') - self.w('td.header { border-bottom: 1px solid #CCCCCC; padding: 2;}') - self.w('td.service1 { border: 0px; background-color: #FFFFFF; padding: 2; width: 10%}') - self.w('td.service2 { border: 0px; background-color: #FFFFFF; padding: 2; width: 90%}') - self.w('td { vertical-align: top; padding: 0; }') - self.w('td.noborder { border-style: none; border-collapse: collapse; }') - self.w('tr.colored { background-color: #F1F1F1; }') - self.w('font { font-family: Helvetica, Arial, sans-serif; font-size: 10pt; color: #666666; }') - self.w('font.header { font-family: Helvetica, Arial, sans-serif; font-size: 10pt; color: #333333; font-style: bold }') - self.w('font.header2 { font-family: Helvetica, Arial, sans-serif; font-size: 16pt; color: #333333; }') - self.w('font.sml { font-family: Helvetica, Arial, sans-serif; font-size: 8pt; color: #666666; }') - self.w('font.alert { font-family: Helvetica, Arial, sans-serif; font-size: 9pt; color: #FF7A22; }') - self.w('a { font-family: Helvetica, Arial, sans-serif; text-decoration:none; font-size: 10pt; color: #111111; }') - self.w('a:visited { font-family: Helvetica, Arial, sans-serif; color:#2D4628; text-decoration:none; font-size: 10pt; }') - self.w('a:hover { font-family: Helvetica, Arial, sans-serif; color:#00A033; text-decoration:none; font-size: 10pt; }') - self.w('a.small { font-family: Helvetica, Arial, sans-serif; text-decoration:none; font-size: 8pt }') - self.w('a.small:hover { color:#822499; text-decoration:none; font-size: 8pt }') - - self.w("</style>") - - def writeUserData(self, user, options, serviceInfo, count): - hostInfo = self.getServiceHosts(serviceInfo) - hostKey = 'short' - if options == 'display=long': - hostKey = 'long' - - if count == 0: - self.w('<tr>') - self.w('<td class="header" colspan="2">') - self.w('<font class="header">Active Users</font>') - self.w('</td>') - self.w('</tr>') - self.w('<tr>') - self.w('<td><font>%s</font></td>' % user) - self.w('<td>') - jobIDs = serviceInfo[user].keys() - jobIDs.sort() - for jobID in jobIDs: - self.w('<table class="sub1" width="100%">') - if count == 0: - self.w('<tr>') - self.w('<td class="header" colspan="2">') - self.w('<font class="header">PBS Job Identifiers</font>') - self.w('</td>') - self.w('</tr>') - self.w('<tr>') - self.w('<td><font>%s</font></td>' % jobID) - self.w('<td>') - hosts = serviceInfo[user][jobID].keys() - hosts.sort() - for host in hosts: - if hostInfo[hostKey].has_key(self.getJobKey(user, jobID, host)): - self.w('<table class="sub2" width="100%">') - if count == 0: - self.w('<tr>') - self.w('<td class="header" colspan="2">') - self.w('<font class="header">Hosts Running Services</font>') - self.w('</td>') - self.w('</tr>') - self.w('<tr>') - self.w('<td><font>%s</font></td>' % host) - self.w('<td>') - self.w('<table class="sub3" width="100%">') - self.w('<tr>') - self.w('<td colspan="2">') - self.w('<font class="header">Service Information</font>') - self.w('</td>') - self.w('</tr>') - for serviceItem in serviceInfo[user][jobID][host]: - serviceName = serviceItem.keys() - serviceName = serviceName[0] - if isinstance(serviceItem[serviceName], dict) and \ - options == 'display=long': - self.w('<tr class="colored">') - self.w('<td><font>%s</font></td>' % serviceName) - self.w('<td>') - self.w('<table width="100%">') - for key in serviceItem[serviceName]: - self.w('<tr>') - self.w('<td class="service1"><font>%s</font></td>' % key) - self.w('<td class="service2"><font>%s</font></td>' % serviceItem[serviceName][key]) - self.w('</tr>') - self.w('</table>') - self.w('</td>') - self.w('</tr>') - elif isinstance(serviceItem[serviceName], str): - self.w('<tr class="colored">') - self.w('<td><font class="service1">%s</font></td>' % serviceName) - self.w('<td>') - (host, port) = serviceItem[serviceName].split(':') - hostnameInfo = socket.gethostbyname_ex(host) - if serviceName.startswith('mapred'): - self.w('<a href="http://%s:%s">Hadoop Job Tracker</a>' % (hostnameInfo[0], port)) - elif serviceName.startswith('hdfs'): - self.w('<a href="http://%s:%s">HDFS Name Node</a> ' % (hostnameInfo[0], port)) - else: - self.w('<font class="service2">%s</font>' % serviceItem[serviceName]) - self.w('</td>') - self.w('</tr>') - self.w('</table>') - self.w('</td>') - self.w('</tr>') - self.w('</table>') - count = count + 1 - self.w('</td>') - self.w('</tr>') - self.w('</table>') - count = count + 1 - self.w('</td>') - self.w('</tr>') -# self.w("<pre>") -# self.w(pprint.pformat(serviceInfo)) -# self.w("</pre>") - -class baseSocketServer: - def __init__(self, host, ports): - self.host = host - self.ports = ports - self.__stopForever = threading.Event() - self.__stopForever.clear() - self.__run = threading.Event() - self.__run.set() - self.server_address = () - self.mThread = None - - def server_bind(self): - """server_bind() method binds to a random range of ports.""" - - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - - if len(self.ports) > 1: - randomPort = Random(os.getpid()) - portSequence = range(self.ports[0], self.ports[1]) - - maxTryCount = abs(self.ports[0] - self.ports[1]) - tryCount = 0 - while True: - somePort = randomPort.choice(portSequence) - self.server_address = (self.host, somePort) - try: - self.socket.bind(self.server_address) - except socket.gaierror, errData: - raise socket.gaierror, errData - except: - tryCount = tryCount + 1 - if tryCount > maxTryCount: - bindError = "bind failure for port range %s:%d" % ( - self.ports) - - raise socket.error, bindError - else: - break - else: - self.server_address = (self.host, int(self.ports[0])) - self.socket.bind(self.server_address) - - if self.host == '': - self.server_address = (local_fqdn(), self.server_address[1]) - - def _serve_forever(self): - """Replacement for serve_forever loop. - - All baseSocketServers run within a master thread; that thread - imitates serve_forever, but checks an event (self.__stopForever) - before processing new connections. - """ - - while not self.__stopForever.isSet(): - (rlist, wlist, xlist) = select([self.socket], [], [], - 1) - - if (len(rlist) > 0 and self.socket == rlist[0]): - self.handle_request() - - while not self.__run.isSet(): - if self.__stopForever.isSet(): - break - time.sleep(1) - - self.server_close() - - return True - - def serve_forever(self): - """Handle requests until stopForever event flag indicates stop.""" - - self.mThread = threading.Thread(name="baseSocketServer", - target=self._serve_forever) - self.mThread.start() - - return self.mThread - - def pause(self): - """Temporarily stop servicing requests.""" - - self.__run.clear() - - def cont(self): - """Resume servicing requests.""" - - self.__run.set() - - def stop(self): - """Set the stopForever flag to tell serve_forever() to exit.""" - - self.__stopForever.set() - if self.mThread: self.mThread.join() - return True - - def is_alive(self): - if self.mThread != None: - return self.mThread.isAlive() - else: - return False - -class threadedHTTPServer(baseSocketServer, ThreadingMixIn, HTTPServer): - def __init__(self, host, ports): - baseSocketServer.__init__(self, host, ports) - HTTPServer.__init__(self, self.server_address, SimpleHTTPRequestHandler) - -class forkingHTTPServer(baseSocketServer, ForkingMixIn, HTTPServer): - def __init__(self, host, ports): - baseSocketServer.__init__(self, host, ports) - HTTPServer.__init__(self, self.server_address, SimpleHTTPRequestHandler) - -class hodHTTPServer(baseSocketServer, ThreadingMixIn, HTTPServer): - service = None - def __init__(self, host, ports, serviceobj = None): - self.service = serviceobj - baseSocketServer.__init__(self, host, ports) - HTTPServer.__init__(self, self.server_address, hodHTTPHandler) - - def finish_request(self, request, client_address): - self.RequestHandlerClass(request, client_address, self, self.service) - -class hodXMLRPCServer(baseSocketServer, ThreadingMixIn, SimpleXMLRPCServer): - def __init__(self, host, ports, - requestHandler=SimpleXMLRPCRequestHandler, - logRequests=False, allow_none=False, encoding=None): - baseSocketServer.__init__(self, host, ports) - SimpleXMLRPCServer.__init__(self, self.server_address, requestHandler, - logRequests) - - self.register_function(self.stop, 'stop') - -try: - from twisted.web import server, xmlrpc - from twisted.internet import reactor, defer - from twisted.internet.threads import deferToThread - from twisted.python import log - - class twistedXMLRPC(xmlrpc.XMLRPC): - def __init__(self, logger): - xmlrpc.XMLRPC.__init__(self) - - self.__XRMethods = {} - self.__numRequests = 0 - self.__logger = logger - self.__pause = False - - def render(self, request): - request.content.seek(0, 0) - args, functionPath = xmlrpclib.loads(request.content.read()) - try: - function = self._getFunction(functionPath) - except Fault, f: - self._cbRender(f, request) - else: - request.setHeader("content-type", "text/xml") - defer.maybeDeferred(function, *args).addErrback( - self._ebRender).addCallback(self._cbRender, request) - - return server.NOT_DONE_YET - - def _cbRender(self, result, request): - if isinstance(result, xmlrpc.Handler): - result = result.result - if not isinstance(result, Fault): - result = (result,) - try: - s = xmlrpclib.dumps(result, methodresponse=1) - except: - f = Fault(self.FAILURE, "can't serialize output") - s = xmlrpclib.dumps(f, methodresponse=1) - request.setHeader("content-length", str(len(s))) - request.write(s) - request.finish() - - def _ebRender(self, failure): - if isinstance(failure.value, Fault): - return failure.value - log.err(failure) - return Fault(self.FAILURE, "error") - - def _getFunction(self, methodName): - while self.__pause: - time.sleep(1) - - self.__numRequests = self.__numRequests + 1 - function = None - try: - def defer_function(*args): - return deferToThread(self.__XRMethods[methodName], - *args) - function = defer_function - self.__logger.info( - "[%s] processing defered XML-RPC call to: %s ..." % - (self.__numRequests, methodName)) - except KeyError: - self.__logger.warn( - "[%s] fault %s on XML-RPC call to %s, method not found." % ( - self.__numRequests, self.NOT_FOUND, methodName)) - raise xmlrpc.NoSuchFunction(self.NOT_FOUND, - "method %s not found" % methodName) - - return function - - def register_function(self, functionRef, methodName): - self.__XRMethods[methodName] = functionRef - - def list_methods(self): - return self.__XRMethods.keys() - - def num_requests(self): - return self.__numRequests - - def pause(self): - self.__pause = True - - def cont(self): - self.__pause = False - - class twistedXMLRPCServer: - def __init__(self, host, ports, logger=None, threadPoolSize=100): - self.__host = host - self.__ports = ports - - if logger == None: - logger = hodDummyLogger() - - self.__logger = logger - - self.server_address = ['', ''] - reactor.suggestThreadPoolSize(threadPoolSize) - - self.__stopForever = threading.Event() - self.__stopForever.clear() - self.__mThread = None - - self.__xmlrpc = twistedXMLRPC(self.__logger) - - def _serve_forever(self): - if len(self.__ports) > 1: - randomPort = Random(os.getpid()) - portSequence = range(self.__ports[0], self.__ports[1]) - - maxTryCount = abs(self.__ports[0] - self.__ports[1]) - tryCount = 0 - while True: - somePort = randomPort.choice(portSequence) - self.server_address = (self.__host, int(somePort)) - if self.__host == '': - self.server_address = (local_fqdn(), self.server_address[1]) - try: - reactor.listenTCP(int(somePort), server.Site( - self.__xmlrpc), interface=self.__host) - reactor.run(installSignalHandlers=0) - except: - self.__logger.debug("Failed to bind to: %s:%s." % ( - self.__host, somePort)) - tryCount = tryCount + 1 - if tryCount > maxTryCount: - self.__logger.warn("Failed to bind to: %s:%s" % ( - self.__host, self.__ports)) - sys.exit(1) - else: - break - else: - try: - self.server_address = (self.__host, int(self.__ports[0])) - if self.__host == '': - self.server_address = (local_fqdn(), self.server_address[1]) - reactor.listenTCP(int(self.__ports[0]), server.Site(self.__xmlrpc), - interface=self.__host) - reactor.run(installSignalHandlers=0) - except: - self.__logger.warn("Failed to bind to: %s:%s."% ( - self.__host, self.__ports[0])) - sys.exit(1) - - def serve_forever(self): - """Handle requests until stopForever event flag indicates stop.""" - - self.__mThread = threading.Thread(name="XRServer", - target=self._serve_forever) - self.__mThread.start() - - if not self.__mThread.isAlive(): - raise Exception("Twisted XMLRPC server thread dead.") - - def register_function(self, functionRef, methodName): - self.__xmlrpc.register_function(functionRef, methodName) - - def register_introspection_functions(self): - pass - - def register_instance(self, instance): - for method in dir(instance): - if not method.startswith('_'): - self.register_function(getattr(instance, method), method) - - def pause(self): - self.__xmlrpc.pause() - - def cont(self): - self.__xmlrpc.cont() - - def stop(self): - def stop_thread(): - time.sleep(2) - reactor.stop() - - self.__stopForever.set() - - stopThread = threading.Thread(name='XRStop', target=stop_thread) - stopThread.start() - - return True - - def is_alive(self): - status = False - if reactor.running == 1: - status = True - - return status - - def status(self): - """Return status information on running XMLRPC Server.""" - stat = { 'XR server address' : self.server_address, - 'XR methods' : self.system_listMethods(), - 'XR server alive' : self.is_alive(), - 'XR requests processed' : self.__xmlrpc.num_requests(), - 'XR server stop flag' : self.__stopForever.isSet()} - return(stat) - - def system_listMethods(self): - return self.__xmlrpc.list_methods() - - def get_server_address(self): - waitCount = 0 - while self.server_address == '': - if waitCount == 9: - break - time.sleep(1) - waitCount = waitCount + 1 - - return self.server_address -except ImportError: - pass diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py deleted file mode 100644 index a118a67f9c..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py +++ /dev/null @@ -1,176 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# $Id:tcp.py 6172 2007-05-22 20:26:54Z zim $ -# -#------------------------------------------------------------------------------ - -""" TCP related classes. """ - -import socket, re, string -reAddress = re.compile(":") -reMayBeIp = re.compile("^\d+\.\d+\.\d+\.\d+$") -reValidPort = re.compile("^\d+$") - -class Error(Exception): - def __init__(self, msg=''): - self.message = msg - Exception.__init__(self, msg) - - def __repr__(self): - return self.message - -class tcpError(Error): - def __init__(self, message): - Error.__init__(self, message) - -class tcpSocket: - def __init__(self, address, timeout=30, autoflush=0): - """Constructs a tcpSocket object. - - address - standard tcp address (HOST:PORT) - timeout - socket timeout""" - - self.address = address - self.__autoFlush = autoflush - self.__remoteSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.__remoteSock.settimeout(timeout) - self.host = None - self.port = None - splitAddress = address - if isinstance(address, (tuple, list)): - self.host = address[0] - self.port = int(address[1]) - else: - splitAddress = get_address_tuple(address) - if not splitAddress[0]: - self.host = 'localhost' - else: - self.host = splitAddress[0] - - self.port = int(splitAddress[1]) - - self.__fileObjectOut = '' - self.__fileObjectIn = '' - - def __repr__(self): - return self.address - - def __iter__(self): - return self - - def next(self): - sockLine = self.read() - if not sockLine: - raise StopIteration - - return sockLine - - def open(self): - """Attempts to open a socket to the specified address.""" - - socketAddress = (self.host, self.port) - - try: - self.__remoteSock.connect(socketAddress) - if self.__autoFlush: - self.__fileObjectOut = self.__remoteSock.makefile('wb', 0) - else: - self.__fileObjectOut = self.__remoteSock.makefile('wb') - - self.__fileObjectIn = self.__remoteSock.makefile('rb', 0) - except: - raise tcpError, "connection failure: %s" % self.address - - def flush(self): - """Flushes write buffer.""" - self.__fileObjectOut.flush() - - def close(self): - """Attempts to close and open socket connection""" - - try: - self.__remoteSock.close() - self.__fileObjectOut.close() - self.__fileObjectIn.close() - except socket.error, exceptionObject: - exceptionMessage = "close failure %s %s" % (self.address, - exceptionObject.__str__()) - raise tcpError, exceptionMessage - - def verify(self): - """Verifies that a given IP address/host and port are valid. This - method will not attempt to open a socket to the specified address. - """ - - isValidAddress = False - if reMayBeIp.match(self.host): - if check_ip_address(self.host): - if reValidPort.match(str(self.port)): - isValidAddress = True - else: - if reValidPort.match(str(self.port)): - isValidAddress = True - - return(isValidAddress) - - def read(self): - """Reads a line off of the active socket.""" - - return self.__fileObjectIn.readline() - - def write(self, string): - """Writes a string to the active socket.""" - - print >> self.__fileObjectOut, string - -def check_net_address(address): - valid = True - pieces = string.split(address, '.') - if len(pieces) != 4: - valid = False - else: - for piece in pieces: - if int(piece) < 0 or int(piece) > 255: - valid = False - - return valid - -def check_ip_address(address): - valid = True - pieces = string.split(address, '.') - if len(pieces) != 4: - valid = False - else: - if int(pieces[0]) < 1 or int(pieces[0]) > 254: - valid = False - for i in range(1,4): - if int(pieces[i]) < 0 or int(pieces[i]) > 255: - valid = False - - return valid - -def get_address_tuple(address): - """ Returns an address tuple for TCP address. - - address - TCP address of the form host:port - - returns address tuple (host, port) - """ - - addressList = reAddress.split(address) - addressTuple = (addressList[0], int(addressList[1])) - - return addressTuple diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py deleted file mode 100644 index 0d19042074..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py +++ /dev/null @@ -1,389 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import threading, time, os, sys, pprint - -from popen2 import Popen4, Popen3, MAXFD -from signal import SIGTERM, SIGKILL - -class baseThread(threading.Thread): - """Base CAM threading class. The run method should be overridden.""" - - def __init__(self, name): - threading.Thread.__init__(self, name=name) - self.stopFlag = threading.Event() - self.stopFlag.clear() - self.running = threading.Event() - self.running.set() - self.isFinished = threading.Event() - self.isFinished.clear() - - def join(self, timeout=None): - self.stopFlag.set() - threading.Thread.join(self, timeout) - - def pause(self): - """Pause thread.""" - - self.running.clear() - - def cont(self): - """Resume thread operation.""" - - self.running.set() - -class simpleCommand(baseThread): - """Command execution object. Command output and exit status are captured. - - Public class attributes: - - cmdString - command to be executed - outputBuffer - command output, stdout + stderr - status - exit status, as returned by wait - - stdin - standard input for command - stdout - standard output of command when buffer == False - stderr - standard error of command when mode == 3 and buffer == False - - """ - - def __init__(self, name, cmdString, env=os.environ, mode=4, buffer=True, - wait=True, chdir=None): - """Class initialization. - - name - thread name to use when running the command - cmdString - command string to execute - inputString - string to print to command's stdin - env - shell environment dictionary - mode - 3 for popen3 and 4 for popen4 - buffer - out put to be retrieved with output() method - wait - return immediately after start() is called and output - command results as they come to stdout""" - - baseThread.__init__(self, name=name) - - self.cmdString = cmdString - self.__mode = mode - self.__buffer = buffer - self.__wait = wait - self.__chdir = chdir - self.__outputBuffer = [] - self.__status = None - self.__pid = None - self.__isFinished = threading.Event() - self.__isFinished.clear() - - self.stdin = None - self.stdout = None - self.stderr = None - - self.__env = env - - def run(self): - """ Overridden run method. Most of the work happens here. start() - should be called in place of this method.""" - - oldDir = None - if self.__chdir: - if os.path.exists(self.__chdir): - oldDir = os.getcwd() - os.chdir(self.__chdir) - else: - raise Exception( - "simpleCommand: invalid chdir specified: %s" % - self.__chdir) - - cmd = None - if self.__mode == 3: - cmd = _Popen3Env(self.cmdString, env=self.__env) - else: - cmd = _Popen4Env(self.cmdString, env=self.__env) - self.__pid = cmd.pid - - self.stdin = cmd.tochild - - if self.__mode == 3: - self.stderr = cmd.childerr - - while cmd.fromchild == None: - time.sleep(1) - - if self.__buffer == True: - output = cmd.fromchild.readline() - while output != '': - while not self.running.isSet(): - if self.stopFlag.isSet(): - break - time.sleep(1) - self.__outputBuffer.append(output) - output = cmd.fromchild.readline() - - elif self.__wait == False: - output = cmd.fromchild.readline() - while output != '': - while not self.running.isSet(): - if self.stopFlag.isSet(): - break - time.sleep(1) - print output, - if self.stopFlag.isSet(): - break - output = cmd.fromchild.readline() - else: - self.stdout = cmd.fromchild - - self.__status = cmd.poll() - while self.__status == -1: - while not self.running.isSet(): - if self.stopFlag.isSet(): - break - time.sleep(1) - - self.__status = cmd.poll() - time.sleep(1) - - if oldDir: - os.chdir(oldDir) - - self.__isFinished.set() - - sys.exit(0) - - def getPid(self): - """return pid of the launches process""" - return self.__pid - - def output(self): - return self.__outputBuffer[:] - - def wait(self): - """Wait blocking until command execution completes.""" - - self.__isFinished.wait() - - return os.WEXITSTATUS(self.__status) - - def is_running(self): - """Returns boolean, are we running?""" - - status = True - if self.__isFinished.isSet(): - status = False - - return status - - def exit_code(self): - """ Returns process exit code.""" - - if self.__status != None: - return os.WEXITSTATUS(self.__status) - else: - return None - - def exit_status_string(self): - """Return a string representation of the command's exit status.""" - - statusString = None - if self.__status: - exitStatus = os.WEXITSTATUS(self.__status) - exitSignal = os.WIFSIGNALED(self.__status) - coreDump = os.WCOREDUMP(self.__status) - - statusString = "exit code: %s | signal: %s | core %s" % \ - (exitStatus, exitSignal, coreDump) - - return(statusString) - - def stop(self): - """Stop the running command and join it's execution thread.""" - - self.join() - - def kill(self): - count = 0 - while self.is_running(): - try: - if count > 20: - os.kill(self.__pid, SIGKILL) - break - else: - os.kill(self.__pid, SIGTERM) - except: - break - - time.sleep(.1) - count = count + 1 - - self.stop() - -class _Popen3Env(Popen3): - def __init__(self, cmd, capturestderr=False, bufsize=-1, env=os.environ): - self._env = env - Popen3.__init__(self, cmd, capturestderr, bufsize) - - def _run_child(self, cmd): - if isinstance(cmd, basestring): - cmd = ['/bin/sh', '-c', cmd] - for i in xrange(3, MAXFD): - try: - os.close(i) - except OSError: - pass - - try: - os.execvpe(cmd[0], cmd, self._env) - finally: - os._exit(1) - -class _Popen4Env(_Popen3Env, Popen4): - childerr = None - - def __init__(self, cmd, bufsize=-1, env=os.environ): - self._env = env - Popen4.__init__(self, cmd, bufsize) - -class loop(baseThread): - """ A simple extension of the threading.Thread class which continuously - executes a block of code until join(). - """ - - def __init__(self, name, functionRef, functionArgs=None, sleep=1, wait=0, - offset=False): - """Initialize a loop object. - - name - thread name - functionRef - a function reference - functionArgs - function arguments in the form of a tuple, - sleep - time to wait between function execs - wait - time to wait before executing the first time - offset - set true to sleep as an offset of the start of the - last func exec instead of the end of the last func - exec - """ - - self.__functionRef = functionRef - self.__functionArgs = functionArgs - self.__sleep = sleep - self.__wait = wait - self.__offset = offset - - baseThread.__init__(self, name=name) - - def run(self): - """Do not call this directly. Call self.start().""" - - startTime = None - while not self.stopFlag.isSet(): - sleep = self.__sleep - if self.__wait > 0: - startWaitCount = 0 - while not self.stopFlag.isSet(): - while not self.running.isSet(): - if self.stopFlag.isSet(): - break - time.sleep(1) - time.sleep(0.5) - startWaitCount = startWaitCount + .5 - if startWaitCount >= self.__wait: - self.__wait = 0 - break - startTime = time.time() - - if not self.stopFlag.isSet(): - if self.running.isSet(): - if self.__functionArgs: - self.__functionRef(self.__functionArgs) - else: - self.__functionRef() - endTime = time.time() - - while not self.running.isSet(): - time.sleep(1) - - while not self.stopFlag.isSet(): - while not self.running.isSet(): - if self.stopFlag.isSet(): - break - time.sleep(1) - - currentTime = time.time() - if self.__offset: - elapsed = time.time() - startTime - else: - elapsed = time.time() - endTime - - if elapsed >= self.__sleep: - break - - time.sleep(0.5) - - self.isFinished.set() - - def set_sleep(self, sleep, wait=None, offset=None): - """Modify loop frequency paramaters. - - sleep - time to wait between function execs - wait - time to wait before executing the first time - offset - set true to sleep as an offset of the start of the - last func exec instead of the end of the last func - exec - """ - - self.__sleep = sleep - if wait != None: - self.__wait = wait - if offset != None: - self.__offset = offset - - def get_sleep(self): - """Get loop frequency paramaters. - Returns a dictionary with sleep, wait, offset. - """ - - return { - 'sleep' : self.__sleep, - 'wait' : self.__wait, - 'offset' : self.__offset, - } - -class func(baseThread): - """ A simple extension of the threading.Thread class which executes - a function in a separate thread. - """ - - def __init__(self, name, functionRef, functionArgs=None): - """Initialize a func object. - - name - thread name - functionRef - a function reference - functionArgs - function arguments in the form of a tuple, - """ - - self.__functionRef = functionRef - self.__functionArgs = functionArgs - - baseThread.__init__(self, name=name) - - def run(self): - """Do not call this directly. Call self.start().""" - - if not self.stopFlag.isSet(): - if self.running.isSet(): - if self.__functionArgs: - self.__functionRef(self.__functionArgs) - else: - self.__functionRef() - sys.exit(0) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py deleted file mode 100644 index 9612ce4313..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py +++ /dev/null @@ -1,1266 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# $Id:types.py 6172 2007-05-22 20:26:54Z zim $ -# -#------------------------------------------------------------------------------ - -""" Higher level data types and type related classes. - - Supported Types (Verification and Display): - - address - validates ip:port and host:port tcp addresses - ip_address - validates and IP address - net_address - validates an IP like address, ie netmask - hostname - validates a hostname with DNS - eaddress - validates a single email address or a comma - seperated list of email addresses - http_version - validates a value is a http version (1.0/1.1) - tcp_port - validates a value to be a valid tcp port (2-65535) - bool - validates value is (0, 1, true, false) / converts - true -> 1 and false -> 0 - directory - validates a values is a directory / resolves path to - absolute path - file - validates a value is a file / resolves path to absolute - path - float - validates a value is a float, converts string to float - pos_float - validates a value is a float and >= 0, converts string - to float - pos_num - same as pos_float - neg_float - validates a value is a float and < 0, converts string to - float - int - validates a value is an integer, converts string to - integer - pos_int - validates a value is an integer and >= 0, converts - string to integer - neg_int - validates a values is an integer and < 0, converts - striing to integer - freq - frequency, positive integer - size - validates a size in bytes, kb, mb, kb, and tb - (int > 0 post fixed with K, M, G, or T) also converts - value to integer bytes - range - numeric range, x-y normalized to a tuple, if a single - number is supplie a single element tuple is returned - timestamp - utc timestamp of the form YYYYMMDDHHMMSS - user_account - UNIX user account name - user_group - UNIX group name - string - arbitrarily long string - list - comma seperated list of strings of arbitrary length, - keyval - comma seperated list of key=value pairs, key does not - need to be unique. - uri - a uri """ - -import sys, os, socket, pwd, grp, stat, re, re, string, pprint, urlparse - -from tcp import tcpSocket, check_net_address, check_ip_address -from util import check_timestamp - -types = { 'directory' : { 'db' : 'string', - 'units' : None }, - - 'address' : { 'db' : 'string', - 'units' : None }, - - 'ip_address' : { 'db' : 'string', - 'units' : None }, - - 'net_address' : { 'db' : 'string', - 'units' : None }, - - 'bool' : { 'db' : 'bool', - 'units' : None }, - - 'int' : { 'db' : 'integer', - 'units' : None }, - - 'float' : { 'db' : 'float', - 'units' : None }, - - 'pos_int' : { 'db' : 'integer', - 'units' : None }, - - 'neg_int' : { 'db' : 'integer', - 'units' : None }, - - 'pos_num' : { 'db' : 'float', - 'units' : None }, - - 'pos_float' : { 'db' : 'float', - 'units' : None }, - - 'neg_float' : { 'db' : 'float', - 'units' : None }, - - 'string' : { 'db' : 'string', - 'units' : None }, - - 'list' : { 'db' : 'string', - 'units' : None }, - - 'file' : { 'db' : 'string', - 'units' : None }, - - 'size' : { 'db' : 'integer', - 'units' : 'bytes' }, - - 'freq' : { 'db' : 'integer', - 'units' : 'hz' }, - - 'eaddress' : { 'db' : 'string', - 'units' : None }, - - 'tcp_port' : { 'db' : 'integer', - 'units' : None }, - - 'http_version' : { 'db' : 'float', - 'units' : None }, - - 'range' : { 'db' : 'string', - 'units' : None }, - - 'hostname' : { 'db' : 'string', - 'units' : None }, - - 'user_account' : { 'db' : 'string', - 'units' : None }, - - 'user_group' : { 'db' : 'string', - 'units' : None }, - - 'timestamp' : { 'db' : 'timestamp', - 'units' : None }, - - 'keyval' : { 'db' : 'string', - 'units' : None }, - - 'uri' : { 'db' : 'string', - 'units' : None }, - - '' : { 'db' : 'string', - 'units' : None }} - -dbTypes = { 'string' : { 'type' : 'varchar', - 'store' : 'type_strings_0', - 'table' : True }, - - 'integer' : { 'type' : 'bigint', - 'store' : 'integers', - 'table' : False }, - - 'float' : { 'type' : 'real', - 'store' : 'floats', - 'table' : False }, - - 'bool' : { 'type' : 'boolean', - 'store' : 'bools', - 'table' : False }, - - 'timestamp' : { 'type' : 'timestamp(0)', - 'store' : 'timestamps', - 'table' : False }} - -reSizeFormat = re.compile("^(\d+)(k|m|g|t|p|kb|mb|gb|tb|pb)$", flags=2) -reDash = re.compile("\s*-\s*") - -sizeFactors = { 'b' : 1, - 'bytes' : 1, - 'k' : 1024, - 'kb' : 1024, - 'm' : 1048576, - 'mb' : 1048576, - 'g' : 1073741824, - 'gb' : 1073741824, - 't' : 1099511627776, - 'tb' : 1099511627776, - 'p' : 1125899906842624, - 'pb' : 1125899906842624 } - -freqFactors = { 'hz' : 1, - 'khz' : 1000, - 'mhz' : 1000000, - 'ghz' : 1000000000, - 'thz' : 1000000000000, - 'phz' : 1000000000000000 } - -sizeMap = [ { 'factor' : sizeFactors['b'], - 'long' : 'byte', - 'short' : 'byte' }, - - { 'factor' : sizeFactors['k'], - 'long' : 'Kilobyte', - 'short' : 'KB' }, - - { 'factor' : sizeFactors['m'], - 'long' : 'Megabyte', - 'short' : 'MB' }, - - { 'factor' : sizeFactors['g'], - 'long' : 'Gigabyte', - 'short' : 'GB' }, - - { 'factor' : sizeFactors['t'], - 'long' : 'Terabyte', - 'short' : 'TB' }, - - { 'factor' : sizeFactors['p'], - 'long' : 'Petabyte', - 'short' : 'PB' } ] - -freqMap = [ { 'factor' : freqFactors['hz'], - 'long' : 'Hertz', - 'short' : 'Hz' }, - - { 'factor' : freqFactors['khz'], - 'long' : 'Kilohertz', - 'short' : 'KHz' }, - - { 'factor' : freqFactors['mhz'], - 'long' : 'Megahertz', - 'short' : 'MHz' }, - - { 'factor' : freqFactors['ghz'], - 'long' : 'Gigahertz', - 'short' : 'GHz' }, - - { 'factor' : freqFactors['thz'], - 'long' : 'Terahertz', - 'short' : 'THz' }, - - { 'factor' : freqFactors['phz'], - 'long' : 'Petahertz', - 'short' : 'PHz' } ] - -reListString = r"(?<!\\)," -reList = re.compile(reListString) - -reKeyVal = r"(?<!\\)=" -reKeyVal = re.compile(reKeyVal) - -class typeToString: - """Provides method for converting normalized types to strings.""" - def __init__(self): - self.toStringFunctions = {} - self.__build_to_string_functions() - - def __call__(self, type, value): - return self.toStringFunctions[type](value) - - def __build_to_string_functions(self): - functions = {} - for function in dir(self): - functions[function] = 1 - - for type in types.keys(): - # kinda bad, need to find out how to know the name of the class - # I'm in. But it works. - functionName = "_typeToString__tostring_%s" % type - if functions.has_key(functionName): - self.toStringFunctions[type] = getattr(self, functionName) - else: - if type == '': - self.toStringFunctions[type] = self.__tostring_nothing - else: - error = "To string function %s for type %s does not exist." \ - % (functionName, type) - raise Exception(error) - sys.exit(1) - - def __tostring(self, value): - return str(value) - - def __tostring_directory(self, value): - return self.__tostring(value) - - def __tostring_address(self, value): - return "%s:%s" % (value[0], value[1]) - - def __tostring_ip_address(self, value): - return self.__tostring(value) - - def __tostring_net_address(self, value): - return self.__tostring(value) - - def __tostring_bool(self, value): - if value == False: - return 'false' - elif value == True: - return 'true' - else: - return str(value) - - def __tostring_int(self, value): - return self.__tostring(value) - - def __tostring_float(self, value): - return self.__tostring(value) - - def __tostring_pos_int(self, value): - return self.__tostring(value) - - def __tostring_neg_int(self, value): - return self.__tostring(value) - - def __tostring_freq(self, value): - return self.__tostring(value) - - def __tostring_pos_float(self, value): - return self.__tostring(value) - - def __tostring_pos_num(self, value): - return self.__tostring(value) - - def __tostring_neg_float(self, value): - return self.__tostring(value) - - def __tostring_string(self, value): - return value - - def __tostring_keyval(self, value): - string = '"' # to protect from shell escapes - for key in value: - # for item in value[key]: - # string = "%s%s=%s," % (string, key, item) - # Quotes still cannot protect Double-slashes. - # Dealing with them separately - val = re.sub(r"\\\\",r"\\\\\\\\",value[key]) - - string = "%s%s=%s," % (string, key, val) - - return string[:-1] + '"' - - def __tostring_list(self, value): - string = '' - for item in value: - string = "%s%s," % (string, item) - - return string[:-1] - - def __tostring_file(self, value): - return self.__tostring(value) - - def __tostring_size(self, value): - return self.__tostring(value) - - def __tostring_eaddress(self, value): - return self.__tostring(value) - - def __tostring_tcp_port(self, value): - return self.__tostring(value) - - def __tostring_http_version(self, value): - return self.__tostring(value) - - def __tostring_range(self, value): - if len(value) < 2: - return value[0] - else: - return "%s-%s" % (value[0], value[1]) - - def __tostring_timestamp(self, value): - return self.__tostring(value) - - def __tostring_hostname(self, value): - return self.__tostring(value) - - def __tostring_user_account(self, value): - return self.__tostring(value) - - def __tostring_user_group(self, value): - return self.__tostring(value) - - def __tostring_uri(self, value): - return self.__tostring(value) - - def __tostring_nothing(self, value): - return value - -class typeValidator: - """Type validation class used to normalize values or validated - single/large sets of values by type.""" - - def __init__(self, originalDir=None): - self.verifyFunctions = {} - self.__build_verify_functions() - - self.validateList = [] - self.validatedInfo = [] - self.__originalDir = originalDir - - def __getattr__(self, attrname): - """validateList = [ { 'func' : <bound method configValidator>, - 'name' : 'SA_COMMON.old_xml_dir', - 'value': 'var/data/old' }, - - { 'func' : <bound method configValidator>, - 'name' : 'SA_COMMON.log_level', - 'value': '4' } ] - - validatedInfo = [ { # name supplied to add() - 'name' : 'SA_COMMON.tmp_xml_dir', - - # is valid or not - 'isValid' : 1 - - # normalized value - 'normalized' : /var/data/tmp, - - # error string ? - 'errorData' : 0 }, - - { 'name' : 'SA_COMMON.new_xml_dir', - 'isValid' : 1 - 'normalized' : /var/data/new, - 'errorData' : 0 } ]""" - - if attrname == "validateList": - return self.validateList # list of items to be validated - elif attrname == "validatedInfo": - return self.validatedInfo # list of validation results - else: raise AttributeError, attrname - - def __build_verify_functions(self): - functions = {} - for function in dir(self): - functions[function] = 1 - - for type in types.keys(): - # kinda bad, need to find out how to know the name of the class - # I'm in. But it works. - functionName = "_typeValidator__verify_%s" % type - if functions.has_key(functionName): - self.verifyFunctions[type] = getattr(self, functionName) - else: - if type == '': - self.verifyFunctions[type] = self.__verify_nothing - else: - error = "Verify function %s for type %s does not exist." \ - % (functionName, type) - raise Exception(error) - sys.exit(1) - - def __get_value_info(self): - valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 } - - return valueInfo - - def __set_value_info(self, valueInfo, **valueData): - try: - valueInfo['normalized'] = valueData['normalized'] - valueInfo['isValid'] = 1 - except KeyError: - valueInfo['isValid'] = 0 - try: - valueInfo['errorData'] = valueData['errorData'] - except: - pass - - # start of 'private' verification methods, each one should correspond to a - # type string (see self.verify_config()) - def __verify_directory(self, type, value): - valueInfo = self.__get_value_info() - - if os.path.isdir(value): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_directory(self, value): - return self.__normalizedPath(value) - - def __verify_address(self, type, value): - valueInfo = self.__get_value_info() - - try: - socket = tcpSocket(value) - if socket.verify(): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - except: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_address(self, value): - return value.split(':') - - def __verify_ip_address(self, type, value): - valueInfo = self.__get_value_info() - - if check_ip_address(value): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __verify_net_address(self, type, value): - valueInfo = self.__get_value_info() - - if check_net_address(value): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __verify_bool(self, type, value): - valueInfo = self.__get_value_info() - - value = str(value) - if re.match("^false|0|f|no$", value, 2): - self.__set_value_info(valueInfo, normalized=False) - elif re.match("^true|1|t|yes$", value, 2): - self.__set_value_info(valueInfo, normalized=True) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_bool(self, value): - value = str(value) - norm = "" - if re.match("^false|0|f|no$", value, 2): - norm = False - elif re.match("^true|1|t|yes$", value, 2): - norm = True - else: - raise Exception("invalid bool specified: %s" % value) - - return norm - - def __verify_int(self, type, value): - valueInfo = self.__get_value_info() - - try: - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - except: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_int(self, value): - return int(value) - - def __verify_float(self, type, value): - valueInfo = self.__get_value_info() - - try: - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - except: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_float(self, value): - return float(value) - - def __verify_pos_int(self, type, value): - valueInfo = self.__get_value_info() - - try: - value = self.normalize(type, value) - except: - self.__set_value_info(valueInfo) - else: - self.__set_value_info(valueInfo, normalized=value) - - return valueInfo - - def __norm_pos_int(self, value): - value = int(value) - if value < 0: - raise Exception("value is not positive: %s" % value) - - return value - - def __verify_neg_int(self, type, value): - valueInfo = self.__get_value_info() - - try: - value = self.normalize(type, value) - except: - self.__set_value_info(valueInfo) - else: - self.__set_value_info(valueInfo, normalized=value) - - return valueInfo - - def __norm_neg_int(self, type, value): - value = int(value) - if value > 0: - raise Exception("value is not negative: %s" % value) - - return value - - def __verify_freq(self, type, value): - return self.__verify_pos_int(type, value) - - def __norm_freq(self, value): - return self.__norm_pos_int(value) - - def __verify_pos_float(self, type, value): - valueInfo = self.__get_value_info() - - try: - value = self.normalize(type, value) - except: - self.__set_value_info(valueInfo) - else: - self.__set_value_info(valueInfo, normalized=value) - - return valueInfo - - def __norm_pos_float(self, value): - value = float(value) - if value < 0: - raise Exception("value is not positive: %s" % value) - - return value - - def __verify_pos_num(self, type, value): - return self.__verify_pos_float(value) - - def __norm_pos_num(self, value): - return self.__norm_pos_float(value) - - def __verify_neg_float(self, type, value): - valueInfo = self.__get_value_info() - - try: - value = self.normalize(type, value) - except: - self.__set_value_info(valueInfo) - else: - self.__set_value_info(valueInfo, normalized=value) - - return valueInfo - - def __norm_neg_float(self, value): - value = float(value) - if value >= 0: - raise Exception("value is not negative: %s" % value) - - return value - - def __verify_string(self, type, value): - valueInfo = self.__get_value_info() - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - - return valueInfo - - def __norm_string(self, value): - return str(value) - - def __verify_keyval(self, type, value): - valueInfo = self.__get_value_info() - - if reKeyVal.search(value): - try: - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - except: - self.__set_value_info(valueInfo, errorData = \ - "invalid list of key-value pairs : [ %s ]" % value) - else: - msg = "No key value pairs found?" - self.__set_value_info(valueInfo, errorData=msg) - - return valueInfo - - def __norm_keyval(self, value): - list = self.__norm_list(value) - keyValue = {} - for item in list: - (key, value) = reKeyVal.split(item) - #if not keyValue.has_key(key): - # keyValue[key] = [] - #keyValue[key].append(value) - keyValue[key] = value - return keyValue - - def __verify_list(self, type, value): - valueInfo = self.__get_value_info() - - self.__set_value_info(valueInfo, normalized=self.normalize(type,value)) - - return valueInfo - - def __norm_list(self, value): - norm = [] - if reList.search(value): - norm = reList.split(value) - else: - norm = [value,] - - return norm - - def __verify_file(self, type, value): - valueInfo = self.__get_value_info() - - if os.path.isfile(value): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_file(self, value): - return self.__normalizedPath(value) - - def __verify_size(self, type, value): - valueInfo = self.__get_value_info() - - value = str(value) - if reSizeFormat.match(value): - numberPart = int(reSizeFormat.sub("\g<1>", value)) - factorPart = reSizeFormat.sub("\g<2>", value) - try: - normalized = normalize_size(numberPart, factorPart) - self.__set_value_info(valueInfo, - normalized=normalized) - except: - self.__set_value_info(valueInfo) - else: - try: - value = int(value) - except: - self.__set_value_info(valueInfo) - else: - if value >= 0: - self.__set_value_info(valueInfo, normalized=value) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_size(self, file): - norm = None - if reSizeFormat.match(value): - numberPart = int(reSizeFormat.sub("\g<1>", value)) - factorPart = reSizeFormat.sub("\g<2>", value) - norm = normalize_size(numberPart, factorPart) - else: - norm = int(value) - - return norm - - - def __verify_eaddress(self, type, value): - valueInfo = self.__get_value_info() - - emailList = reComma.split(value) - - for emailAddress in emailList: - if reEmailAddress.match(emailAddress): - emailParts = reEmailDelimit.split(emailAddress) - try: - socket.gethostbyname(emailParts[1]) - self.__set_value_info(valueInfo, normalized=self.normalize( - type, value)) - except: - errorString = "%s is invalid (domain lookup failed)" % \ - emailAddress - self.__set_value_info(valueInfo, errorData=errorString) - else: - errorString = "%s is invalid" % emailAddress - self.__set_value_info(valueInfo, errorData=errorString) - - return valueInfo - - def __verify_tcp_port(self, type, value): - valueInfo = self.__get_value_info() - - try: - value = self.__norm_tcp_port(value) - except: - self.__set_value_info(valueInfo) - else: - if value in range(2, 65536): - self.__set_value_info(valueInfo, normalized=value) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __norm_tcp_port(self, value): - return int(value) - - def __verify_http_version(self, type, value): - valueInfo = self.__get_value_info() - - if value in ('1.0', '1.1'): - self.__set_value_info(valueInfo, normalized=float(value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __verify_range(self, type, value): - valueInfo = self.__get_value_info() - - range = reDash.split(value) - - try: - if len(range) > 1: - start = int(range[0]) - end = int(range[1]) - else: - start = int(range[0]) - end = None - except: - self.__set_value_info(valueInfo) - else: - if end: - if end - start != 0: - self.__set_value_info(valueInfo, normalized=(start, end)) - else: - self.__set_value_info(valueInfo) - else: - self.__set_value_info(valueInfo, normalized=(start,)) - - return valueInfo - - def __norm_range(self, value): - range = reDash.split(value) - if len(range) > 1: - start = int(range[0]) - end = int(range[1]) - else: - start = int(range[0]) - end = None - - return (start, end) - - def __verify_uri(self, type, value): - valueInfo = self.__get_value_info() - - _norm = None - try: - uriComponents = urlparse.urlparse(value) - if uriComponents[0] == '' or uriComponents[0] == 'file': - # if scheme is '' or 'file' - if not os.path.isfile(uriComponents[2]) and \ - not os.path.isdir(uriComponents[2]): - raise Exception("Invalid local URI") - else: - self.__set_value_info(valueInfo, normalized=self.normalize( - type,value)) - else: - # other schemes - # currently not checking anything. TODO - self.__set_value_info(valueInfo, normalized=self.normalize( - type,value)) - except: - errorString = "%s is an invalid uri" % value - self.__set_value_info(valueInfo, errorData=errorString) - - return valueInfo - - def __norm_uri(self, value): - uriComponents = list(urlparse.urlparse(value)) - if uriComponents[0] == '': - # if scheme is ''' - return self.__normalizedPath(uriComponents[2]) - elif uriComponents[0] == 'file': - # if scheme is 'file' - normalizedPath = self.__normalizedPath(uriComponents[2]) - return urlparse.urlunsplit(uriComponents[0:1] + [normalizedPath] + uriComponents[3:]) - - # Not dealing with any other case right now - return value - - def __verify_timestamp(self, type, value): - valueInfo = self.__get_value_info() - - if check_timestamp(value): - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - else: - self.__set_value_info(valueInfo) - - return valueInfo - - def __verify_hostname(self, type, value): - valueInfo = self.__get_value_info() - - try: - socket.gethostbyname(value) - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - except: - errorString = "%s is invalid (domain lookup failed)" % value - self.__set_value_info(valueInfo, errorData=errorString) - - return valueInfo - - def __verify_user_account(self, type, value): - valueInfo = self.__get_value_info() - - try: - pwd.getpwnam(value) - except: - errorString = "'%s' user account does not exist" % value - self.__set_value_info(valueInfo, errorData=errorString) - else: - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - - return valueInfo - - def __verify_user_group(self, type, value): - valueInfo = self.__get_value_info() - - try: - grp.getgrnam(value) - except: - errorString = "'%s' group does not exist" % value - self.__set_value_info(valueInfo, errorData=errorString) - else: - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - - return valueInfo - - def __verify_nothing(self, type, value): - valueInfo = self.__get_value_info() - - self.__set_value_info(valueInfo, normalized=self.normalize(type, - value)) - - return valueInfo - - #-------------------------------------------------------------------------- - - def normalize(self, type, value): - try: - normFunc = getattr(self, "_typeValidator__norm_%s" % type) - return normFunc(value) - except AttributeError, A: - # this exception should occur only when we don't have corresponding normalize function - return value - - def verify(self, type, value, allowNone=False): - """Verifies a value based on its type. - - type - supported configValidator type - value - data to be validated - allowNone - don't freak out if None or '' is supplied - - returns a valueInfo dictionary: - - valueInfo = { 'isValid' : 1, 'normalized' : 5, 'errorData' : 0 } - - where: - - isValid - true or false (0/1) - normalized - the normalized value - errorData - if invalid an error string - - supported types: - - see top level""" - - result = None - if allowNone: - if value == '' or value == None: - result = self.__verify_nothing(None, None) - result['normalized'] = None - else: - result = self.verifyFunctions[type](type, value) - else: - result = self.verifyFunctions[type](type, value) - - return result - - def is_valid_type(self, type): - """Returns true if type is valid.""" - - return types.has_key(type) - - def type_info(self, type): - """Returns type info dictionary.""" - - dbInfo = dbTypes[types[type]['db']] - typeInfo = types[type].copy() - typeInfo['db'] = dbInfo - - return typeInfo - - def add(self, name, type, value): - """Adds a value and type by name to the configValidate object to be - verified using validate(). - - name - name used to key values and access the results of the - validation - type - configValidator type - value - data to be verified""" - - self.validateList.append({ 'name' : name, - 'type' : type, - 'value': value }) - - def validate(self, allowNone=False): - """Validates configValidate object populating validatedInfo with - valueInfo dictionaries for each value added to the object.""" - - for valItem in self.validateList: - valueInfo = self.verify(valItem['type'], valItem['value'], - allowNone) - if valueInfo: - valueInfo['name'] = valItem['name'] - self.validatedInfo.append(valueInfo) - else: - raise Exception("\nMissing a return value: valueInfo\n%s" % \ - self.verifyFunctions[valItem['type']](valItem['value'])) - - def __normalizedPath(self, value): - oldWd = os.getcwd() - if self.__originalDir: - os.chdir(self.__originalDir) - normPath = os.path.realpath(value) - os.chdir(oldWd) - return normPath - - -class display: - def __init__(self): - self.displayFunctions = {} - self.__build_dispaly_functions() - - def __build_dispaly_functions(self): - functions = {} - for function in dir(self): - functions[function] = 1 - - for type in types.keys(): - # kinda bad, need to find out how to know the name of the class - # I'm in. But it works. - functionName = "_cisplay__display_%s" % type - if functions.has_key(functionName): - self.displayFunctions[type] = getattr(self, functionName) - else: - if type == '': - self.displayFunctions[type] = self.__display_default - else: - error = "Display function %s for type %s does not exist." \ - % (functionName, type) - raise Exception(error) - sys.exit(1) - - def __display_default(self, value, style): - return value - - def __display_generic_number(self, value): - displayNumber = '' - splitNum = string.split(str(value), sep='.') - numList = list(str(splitNum[0])) - numList.reverse() - length = len(numList) - counter = 0 - for char in numList: - counter = counter + 1 - if counter % 3 or counter == length: - displayNumber = "%s%s" % (char, displayNumber) - else: - displayNumber = ",%s%s" % (char, displayNumber) - - if len(splitNum) > 1: - displayNumber = "%s.%s" % (displayNumber, splitNum[1]) - - return displayNumber - - def __display_generic_mappable(self, map, value, style, plural=True): - displayValue = '' - length = len(str(value)) - if length > 3: - for factorSet in map: - displayValue = float(value) / factorSet['factor'] - if len(str(int(displayValue))) <= 3 or \ - factorSet['factor'] == map[-1]['factor']: - displayValue = "%10.2f" % displayValue - if displayValue[-1] == '0': - if displayValue > 1 and style != 'short' and plural: - displayValue = "%s %ss" % (displayValue[:-1], - factorSet[style]) - else: - displayValue = "%s %s" % (displayValue[:-1], - factorSet[style]) - else: - if displayValue > 1 and style != 'short' and plural: - displayValue = "%s %ss" % (displayValue, - factorSet[style]) - else: - displayValue = "%s %s" % (displayValue, - factorSet[style]) - break - - return displayValue - - def __display_directory(self, value, style): - return self.__display_default(value, style) - - def __display_address(self, value, style): - return self.__display_default(value, style) - - def __display_ip_address(self, value, style): - return self.__display_default(value, style) - - def __display_net_address(self, value, style): - return self.__display_default(value, style) - - def __display_bool(self, value, style): - displayValue = value - - if not isinstance(displayValue, bool): - if re.match("^false|0|f|no$", value, 2): - displayValue=False - elif re.match("^true|1|t|yes$", value, 2): - displayValue=True - - return displayValue - - def __display_int(self, value, style): - return self.__display_generic_number(value) - - def __display_float(self, value, style): - return self.__display_generic_number(value) - - def __display_pos_int(self, value, style): - return self.__display_generic_number(value) - - def __display_neg_int(self, value, style): - return self.__display_generic_number(value) - - def __display_pos_num(self, value, style): - return self.__display_generic_number(value) - - def __display_pos_float(self, value, style): - return self.__display_generic_number(value) - - def __display_neg_float(self, value, style): - return self.__display_generic_number(value) - - def __display_string(self, value, style): - return self.__display_default(value, style) - - def __display_list(self, value, style): - value = value.rstrip() - return value.rstrip(',') - - def __display_keyval(self, value, style): - value = value.rstrip() - return value.rstrip(',') - - def __display_file(self, value, style): - return self.__display_default(value, style) - - def __display_size(self, value, style): - return self.__display_generic_mappable(sizeMap, value, style) - - def __display_freq(self, value, style): - return self.__display_generic_mappable(freqMap, value, style, False) - - def __display_eaddress(self, value, style): - return self.__display_default(value, style) - - def __display_tcp_port(self, value, style): - return self.__display_default(value, style) - - def __display_http_version(self, value, style): - return self.__display_default(value, style) - - def __display_range(self, value, style): - return self.__display_default(value, style) - - def __display_hostname(self, value, style): - return self.__display_default(value, style) - - def __display_user_account(self, value, style): - return self.__display_default(value, style) - - def __display_user_group(self, value, style): - return self.__display_default(value, style) - - def __display_timestamp(self, value, style): - return self.__display_default(value, style) - - def display(self, type, value, style='short'): - displayValue = value - if value != None: - displayValue = self.displayFunctions[type](value, style) - - return displayValue - -typeValidatorInstance = typeValidator() - -def is_valid_type(type): - """Returns true if type is valid.""" - - return typeValidatorInstance.is_valid_type(type) - -def type_info(type): - """Returns type info dictionary.""" - - return typeValidatorInstance.type_info(type) - -def verify(type, value, allowNone=False): - """Returns a normalized valueInfo dictionary.""" - - return typeValidatorInstance.verify(type, value, allowNone) - -def __normalize(map, val, factor): - normFactor = string.lower(factor) - normVal = float(val) - return int(normVal * map[normFactor]) - -def normalize_size(size, factor): - """ Normalize a size to bytes. - - size - number of B, KB, MB, GB, TB, or PB - factor - size factor (case insensitive): - b | bytes - bytes - k | kb - kilobytes - m | mb - megabytes - g | gb - gigabytes - t | tb - terabytes - p | pb - petabytes - """ - - return __normalize(sizeFactors, size, factor) - -def normalize_freq(freq, factor): - """ Normalize a frequency to hertz. - - freq - number of Hz, Khz, Mhz, Ghz, Thz, or Phz - factor - size factor (case insensitive): - Hz - Hertz - Mhz - Megahertz - Ghz - Gigahertz - Thz - Terahertz - Phz - Petahertz - """ - - return __normalize(freqFactors, freq, factor) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py deleted file mode 100644 index 3d5cb6fade..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py +++ /dev/null @@ -1,309 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import errno, sys, os, traceback, stat, socket, re, warnings, signal - -from hodlib.Common.tcp import tcpSocket, tcpError -from hodlib.Common.threads import simpleCommand - -setUGV = { 'S_ISUID' : 2, 'S_ISGID' : 1, 'S_ISVTX' : 0 } -reEscapeSeq = r"\\(.)?" -reEscapeSeq = re.compile(reEscapeSeq) - -HOD_INTERRUPTED_CODE = 127 -HOD_INTERRUPTED_MESG = "Hod interrupted. Cleaning up and exiting" -TORQUE_USER_LIMITS_COMMENT_FIELD = "User-limits exceeded. " + \ - "Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)" -TORQUE_USER_LIMITS_EXCEEDED_MSG = "Requested number of nodes exceeded " + \ - "maximum user limits. " - -class AlarmException(Exception): - def __init__(self, msg=''): - self.message = msg - Exception.__init__(self, msg) - - def __repr__(self): - return self.message - -def isProcessRunning(pid): - '''Check if a process is running, by sending it a 0 signal, and checking for errors''' - # This method is documented in some email threads on the python mailing list. - # For e.g.: http://mail.python.org/pipermail/python-list/2002-May/144522.html - try: - os.kill(pid, 0) - return True - except OSError, err: - return err.errno == errno.EPERM - -def untar(file, targetDir): - status = False - command = 'tar -C %s -zxf %s' % (targetDir, file) - commandObj = simpleCommand('untar', command) - commandObj.start() - commandObj.wait() - commandObj.join() - if commandObj.exit_code() == 0: - status = True - - return status - -def tar(tarFile, tarDirectory, tarList): - currentDir = os.getcwd() - os.chdir(tarDirectory) - status = False - command = 'tar -czf %s ' % (tarFile) - - for file in tarList: - command = "%s%s " % (command, file) - - commandObj = simpleCommand('tar', command) - commandObj.start() - commandObj.wait() - commandObj.join() - if commandObj.exit_code() == 0: - status = True - else: - status = commandObj.exit_status_string() - - os.chdir(currentDir) - - return status - -def to_http_url(list): - """convert [hostname, port] to a http url""" - str = '' - str = "http://%s:%s" % (list[0], list[1]) - - return str - -def get_exception_string(): - (type, value, tb) = sys.exc_info() - exceptList = traceback.format_exception(type, value, tb) - exceptString = '' - for line in exceptList: - exceptString = "%s%s" % (exceptString, line) - - return exceptString - -def get_exception_error_string(): - (type, value, tb) = sys.exc_info() - if value: - exceptString = "%s %s" % (type, value) - else: - exceptString = type - - return exceptString - -def check_timestamp(timeStamp): - """ Checks the validity of a timeStamp. - - timeStamp - (YYYY-MM-DD HH:MM:SS in UTC) - - returns True or False - """ - isValid = True - - try: - timeStruct = time.strptime(timeStamp, "%Y-%m-%d %H:%M:%S") - except: - isValid = False - - return isValid - -def sig_wrapper(sigNum, handler, *args): - if args: - handler(args) - else: - handler() - -def get_perms(filename): - mode = stat.S_IMODE(os.stat(filename)[stat.ST_MODE]) - permsString = '' - permSet = 0 - place = 2 - for who in "USR", "GRP", "OTH": - for what in "R", "W", "X": - if mode & getattr(stat,"S_I"+what+who): - permSet = permSet + 2**place - place = place - 1 - - permsString = "%s%s" % (permsString, permSet) - permSet = 0 - place = 2 - - permSet = 0 - for permFlag in setUGV.keys(): - if mode & getattr(stat, permFlag): - permSet = permSet + 2**setUGV[permFlag] - - permsString = "%s%s" % (permSet, permsString) - - return permsString - -def local_fqdn(): - """Return a system's true FQDN rather than any aliases, which are - occasionally returned by socket.gethostname.""" - - fqdn = None - me = os.uname()[1] - nameInfo=socket.gethostbyname_ex(me) - nameInfo[1].append(nameInfo[0]) - for name in nameInfo[1]: - if name.count(".") and name.startswith(me): - fqdn = name - if fqdn == None: - fqdn = me - return(fqdn) - -def need_to_allocate(allocated, config, command): - status = True - - if allocated.isSet(): - status = False - elif re.search("\s*dfs.*$", command) and \ - config['gridservice-hdfs']['external']: - status = False - elif config['gridservice-mapred']['external']: - status = False - - return status - -def filter_warnings(): - warnings.filterwarnings('ignore', - message=".*?'with' will become a reserved keyword.*") - -def args_to_string(list): - """return a string argument space seperated""" - arg = '' - for item in list: - arg = "%s%s " % (arg, item) - return arg[:-1] - -def replace_escapes(object): - """ replace any escaped character. e.g \, with , \= with = and so on """ - # here object is either a config object or a options object - for section in object._mySections: - for option in object._configDef[section].keys(): - if object[section].has_key(option): - if object._configDef[section][option]['type'] == 'keyval': - keyValDict = object[section][option] - object[section][option] = {} - for (key,value) in keyValDict.iteritems(): - match = reEscapeSeq.search(value) - if match: - value = reEscapeSeq.sub(r"\1", value) - object[section][option][key] = value - -def hadoopVersion(hadoopDir, java_home, log): - # Determine the version of hadoop being used by executing the - # hadoop version command. Code earlier in idleTracker.py - hadoopVersion = { 'major' : None, 'minor' : None } - hadoopPath = os.path.join(hadoopDir, 'bin', 'hadoop') - cmd = "%s version" % hadoopPath - log.debug('Executing command %s to find hadoop version' % cmd) - env = os.environ - env['JAVA_HOME'] = java_home - hadoopVerCmd = simpleCommand('HadoopVersion', cmd, env) - hadoopVerCmd.start() - hadoopVerCmd.wait() - hadoopVerCmd.join() - if hadoopVerCmd.exit_code() == 0: - verLine = hadoopVerCmd.output()[0] - log.debug('Version from hadoop command: %s' % verLine) - hadoopVerRegExp = re.compile("Hadoop ([0-9]+)\.([0-9]+).*") - verMatch = hadoopVerRegExp.match(verLine) - if verMatch != None: - hadoopVersion['major'] = verMatch.group(1) - hadoopVersion['minor'] = verMatch.group(2) - return hadoopVersion - - -def get_cluster_status(hdfsAddress, mapredAddress): - """Determine the status of the cluster based on socket availability - of HDFS and Map/Reduce.""" - status = 0 - - mapredSocket = tcpSocket(mapredAddress) - try: - mapredSocket.open() - mapredSocket.close() - except tcpError: - status = 14 - - hdfsSocket = tcpSocket(hdfsAddress) - try: - hdfsSocket.open() - hdfsSocket.close() - except tcpError: - if status > 0: - status = 10 - else: - status = 13 - - return status - -def parseEquals(list): - # takes in a list of keyval pairs e.g ['a=b','c=d'] and returns a - # dict e.g {'a'='b','c'='d'}. Used in GridService/{mapred.py/hdfs.py} and - # HodRing/hodring.py. No need for specially treating escaped =. as in \=, - # since all keys are generated by hod and don't contain such anomalies - dict = {} - for elems in list: - splits = elems.split('=') - dict[splits[0]] = splits[1] - return dict - -def getMapredSystemDirectory(mrSysDirRoot, userid, jobid): - return os.path.join(mrSysDirRoot, userid, 'mapredsystem', jobid) - -class HodInterrupt: - def __init__(self): - self.HodInterruptFlag = False - self.log = None - - def set_log(self, log): - self.log = log - - def init_signals(self): - - def sigStop(sigNum, handler): - sig_wrapper(sigNum, self.setFlag) - - signal.signal(signal.SIGTERM, sigStop) # 15 : software termination signal - signal.signal(signal.SIGQUIT, sigStop) # 3 : Quit program - signal.signal(signal.SIGINT, sigStop) # 2 ^C : Interrupt program - - def sig_wrapper(sigNum, handler, *args): - self.log.critical("Caught signal %s." % sigNum ) - - if args: - handler(args) - else: - handler() - - def setFlag(self, val = True): - self.HodInterruptFlag = val - - def isSet(self): - return self.HodInterruptFlag - -class HodInterruptException(Exception): - def __init__(self, value = ""): - self.value = value - - def __str__(self): - return repr(self.value) - -hodInterrupt = HodInterrupt() diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py deleted file mode 100644 index bb7ef8b60c..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py +++ /dev/null @@ -1,57 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import xmlrpclib, time, random, signal -from hodlib.Common.util import hodInterrupt, HodInterruptException - -class hodXRClient(xmlrpclib.ServerProxy): - def __init__(self, uri, transport=None, encoding=None, verbose=0, - allow_none=0, installSignalHandlers=1, retryRequests=True, timeOut=15): - xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding, verbose, - allow_none) - self.__retryRequests = retryRequests - self.__timeOut = timeOut - if (installSignalHandlers!=0): - self.__set_alarm() - - def __set_alarm(self): - def alarm_handler(sigNum, sigHandler): - raise Exception("XML-RPC socket timeout.") - - signal.signal(signal.SIGALRM, alarm_handler) - - def __request(self, methodname, params): - response = None - retryWaitTime = 5 + random.randint(0, 5) - for i in range(0, 30): - signal.alarm(self.__timeOut) - try: - response = self._ServerProxy__request(methodname, params) - signal.alarm(0) - break - except Exception: - if self.__retryRequests: - if hodInterrupt.isSet(): - raise HodInterruptException() - time.sleep(retryWaitTime) - else: - raise Exception("hodXRClientTimeout") - - return response - - def __getattr__(self, name): - # magic method dispatcher - return xmlrpclib._Method(self.__request, name) - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py deleted file mode 100644 index 52138f2f8a..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from mapred import MapReduce, MapReduceExternal -from hdfs import Hdfs, HdfsExternal diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py deleted file mode 100644 index 11efd116c3..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py +++ /dev/null @@ -1,310 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""define Hdfs as subclass of Service""" - -# -*- python -*- - -import os - -from service import * -from hodlib.Hod.nodePool import * -from hodlib.Common.desc import CommandDesc -from hodlib.Common.util import get_exception_string, parseEquals - -class HdfsExternal(MasterSlave): - """dummy proxy to external HDFS instance""" - - def __init__(self, serviceDesc, workDirs, version): - MasterSlave.__init__(self, serviceDesc, workDirs,None) - self.launchedMaster = True - self.masterInitialized = True - self.version = version - - def getMasterRequest(self): - return None - - def getMasterCommands(self, serviceDict): - return [] - - def getAdminCommands(self, serviceDict): - return [] - - def getWorkerCommands(self, serviceDict): - return [] - - def getMasterAddrs(self): - attrs = self.serviceDesc.getfinalAttrs() - addr = attrs['fs.default.name'] - return [addr] - - def setMasterParams(self, dict): - self.serviceDesc.dict['final-attrs']['fs.default.name'] = "%s:%s" % \ - (dict['host'], dict['fs_port']) - - if self.version < 16: - self.serviceDesc.dict['final-attrs']['dfs.info.port'] = \ - str(self.serviceDesc.dict['info_port']) - else: - # After Hadoop-2185 - self.serviceDesc.dict['final-attrs']['dfs.http.address'] = "%s:%s" % \ - (dict['host'], dict['info_port']) - - def getInfoAddrs(self): - attrs = self.serviceDesc.getfinalAttrs() - if self.version < 16: - addr = attrs['fs.default.name'] - k,v = addr.split( ":") - infoaddr = k + ':' + attrs['dfs.info.port'] - else: - # After Hadoop-2185 - infoaddr = attrs['dfs.http.address'] - return [infoaddr] - -class Hdfs(MasterSlave): - - def __init__(self, serviceDesc, nodePool, required_node, version, \ - format=True, upgrade=False, - workers_per_ring = 1): - MasterSlave.__init__(self, serviceDesc, nodePool, required_node) - self.masterNode = None - self.masterAddr = None - self.runAdminCommands = True - self.infoAddr = None - self._isLost = False - self.format = format - self.upgrade = upgrade - self.workers = [] - self.version = version - self.workers_per_ring = workers_per_ring - - def getMasterRequest(self): - req = NodeRequest(1, [], False) - return req - - def getMasterCommands(self, serviceDict): - - masterCommands = [] - if self.format: - masterCommands.append(self._getNameNodeCommand(True)) - - if self.upgrade: - masterCommands.append(self._getNameNodeCommand(False, True)) - else: - masterCommands.append(self._getNameNodeCommand(False)) - - return masterCommands - - def getAdminCommands(self, serviceDict): - - adminCommands = [] - if self.upgrade and self.runAdminCommands: - adminCommands.append(self._getNameNodeAdminCommand('-safemode wait')) - adminCommands.append(self._getNameNodeAdminCommand('-finalizeUpgrade', - True, True)) - - self.runAdminCommands = False - return adminCommands - - def getWorkerCommands(self, serviceDict): - workerCmds = [] - for id in range(1, self.workers_per_ring + 1): - workerCmds.append(self._getDataNodeCommand(str(id))) - - return workerCmds - - def setMasterNodes(self, list): - node = list[0] - self.masterNode = node - - def getMasterAddrs(self): - return [self.masterAddr] - - def getInfoAddrs(self): - return [self.infoAddr] - - def getWorkers(self): - return self.workers - - def setMasterParams(self, list): - dict = self._parseEquals(list) - self.masterAddr = dict['fs.default.name'] - k,v = self.masterAddr.split( ":") - self.masterNode = k - if self.version < 16: - self.infoAddr = self.masterNode + ':' + dict['dfs.info.port'] - else: - # After Hadoop-2185 - self.infoAddr = dict['dfs.http.address'] - - def _parseEquals(self, list): - return parseEquals(list) - - def _setWorkDirs(self, workDirs, envs, attrs, parentDirs, subDir): - namedir = None - hadooptmpdir = None - datadir = [] - - for p in parentDirs: - workDirs.append(p) - workDirs.append(os.path.join(p, subDir)) - dir = os.path.join(p, subDir, 'dfs-data') - datadir.append(dir) - if not hadooptmpdir: - # Not used currently, generating hadooptmpdir just in case - hadooptmpdir = os.path.join(p, subDir, 'hadoop-tmp') - - if not namedir: - namedir = os.path.join(p, subDir, 'dfs-name') - - workDirs.append(namedir) - workDirs.extend(datadir) - - # FIXME!! use csv - attrs['dfs.name.dir'] = namedir - attrs['hadoop.tmp.dir'] = hadooptmpdir - attrs['dfs.data.dir'] = ','.join(datadir) - envs['HADOOP_ROOT_LOGGER'] = "INFO,DRFA" - - - def _getNameNodeCommand(self, format=False, upgrade=False): - sd = self.serviceDesc - - parentDirs = self.workDirs - workDirs = [] - attrs = sd.getfinalAttrs().copy() - envs = sd.getEnvs().copy() - - if 'fs.default.name' not in attrs: - attrs['fs.default.name'] = 'fillinhostport' - - if self.version < 16: - if 'dfs.info.port' not in attrs: - attrs['dfs.info.port'] = 'fillinport' - else: - # Addressing Hadoop-2185, added the following. Earlier versions don't - # care about this - if 'dfs.http.address' not in attrs: - attrs['dfs.http.address'] = 'fillinhostport' - - self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-nn') - - dict = { 'name' : 'namenode' } - dict['program'] = os.path.join('bin', 'hadoop') - argv = ['namenode'] - if format: - argv.append('-format') - elif upgrade: - argv.append('-upgrade') - dict['argv'] = argv - dict['envs'] = envs - dict['pkgdirs'] = sd.getPkgDirs() - dict['workdirs'] = workDirs - dict['final-attrs'] = attrs - dict['attrs'] = sd.getAttrs() - if format: - dict['fg'] = 'true' - dict['stdin'] = 'Y' - cmd = CommandDesc(dict) - return cmd - - def _getNameNodeAdminCommand(self, adminCommand, wait=True, ignoreFailures=False): - sd = self.serviceDesc - - parentDirs = self.workDirs - workDirs = [] - attrs = sd.getfinalAttrs().copy() - envs = sd.getEnvs().copy() - nn = self.masterAddr - - if nn == None: - raise ValueError, "Can't get namenode address" - - attrs['fs.default.name'] = nn - - self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-nn') - - dict = { 'name' : 'dfsadmin' } - dict['program'] = os.path.join('bin', 'hadoop') - argv = ['dfsadmin'] - argv.append(adminCommand) - dict['argv'] = argv - dict['envs'] = envs - dict['pkgdirs'] = sd.getPkgDirs() - dict['workdirs'] = workDirs - dict['final-attrs'] = attrs - dict['attrs'] = sd.getAttrs() - if wait: - dict['fg'] = 'true' - dict['stdin'] = 'Y' - if ignoreFailures: - dict['ignorefailures'] = 'Y' - cmd = CommandDesc(dict) - return cmd - - def _getDataNodeCommand(self, id): - - sd = self.serviceDesc - - parentDirs = self.workDirs - workDirs = [] - attrs = sd.getfinalAttrs().copy() - envs = sd.getEnvs().copy() - nn = self.masterAddr - - if nn == None: - raise ValueError, "Can't get namenode address" - - attrs['fs.default.name'] = nn - - if self.version < 16: - if 'dfs.datanode.port' not in attrs: - attrs['dfs.datanode.port'] = 'fillinport' - if 'dfs.datanode.info.port' not in attrs: - attrs['dfs.datanode.info.port'] = 'fillinport' - else: - # Adding the following. Hadoop-2185 - if 'dfs.datanode.address' not in attrs: - attrs['dfs.datanode.address'] = 'fillinhostport' - if 'dfs.datanode.http.address' not in attrs: - attrs['dfs.datanode.http.address'] = 'fillinhostport' - - if self.version >= 18: - # After HADOOP-3283 - # TODO: check for major as well as minor versions - attrs['dfs.datanode.ipc.address'] = 'fillinhostport' - - # unique workdirs in case of multiple datanodes per hodring - pd = [] - for dir in parentDirs: - dir = dir + "-" + id - pd.append(dir) - parentDirs = pd - # end of unique workdirs - - self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-dn') - - dict = { 'name' : 'datanode' } - dict['program'] = os.path.join('bin', 'hadoop') - dict['argv'] = ['datanode'] - dict['envs'] = envs - dict['pkgdirs'] = sd.getPkgDirs() - dict['workdirs'] = workDirs - dict['final-attrs'] = attrs - dict['attrs'] = sd.getAttrs() - - cmd = CommandDesc(dict) - return cmd - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py deleted file mode 100644 index 086f052fda..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py +++ /dev/null @@ -1,272 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""define MapReduce as subclass of Service""" - -# -*- python -*- - -import os, copy, time - -from service import * -from hodlib.Hod.nodePool import * -from hodlib.Common.desc import CommandDesc -from hodlib.Common.util import get_exception_string, parseEquals - -class MapReduceExternal(MasterSlave): - """dummy proxy to external MapReduce instance""" - - def __init__(self, serviceDesc, workDirs, version): - MasterSlave.__init__(self, serviceDesc, workDirs,None) - self.launchedMaster = True - self.masterInitialized = True - self.version = version - - def getMasterRequest(self): - return None - - def getMasterCommands(self, serviceDict): - return [] - - def getAdminCommands(self, serviceDict): - return [] - - def getWorkerCommands(self, serviceDict): - return [] - - def getMasterAddrs(self): - attrs = self.serviceDesc.getfinalAttrs() - addr = attrs['mapred.job.tracker'] - return [addr] - - def needsMore(self): - return 0 - - def needsLess(self): - return 0 - - def setMasterParams(self, dict): - self.serviceDesc['final-attrs']['mapred.job.tracker'] = "%s:%s" % (dict['host'], - dict['tracker_port']) - - if self.version < 16: - self.serviceDesc.dict['final-attrs']['mapred.job.tracker.info.port'] = \ - str(self.serviceDesc.dict['info_port']) - else: - # After Hadoop-2185 - self.serviceDesc['final-attrs']['mapred.job.tracker.http.address'] = \ - "%s:%s" %(dict['host'], dict['info_port']) - - def getInfoAddrs(self): - attrs = self.serviceDesc.getfinalAttrs() - if self.version < 16: - addr = attrs['mapred.job.tracker'] - k,v = addr.split( ":") - infoaddr = k + ':' + attrs['mapred.job.tracker.info.port'] - else: - # After Hadoop-2185 - # Note: earlier,we never respected mapred.job.tracker.http.address - infoaddr = attrs['mapred.job.tracker.http.address'] - return [infoaddr] - -class MapReduce(MasterSlave): - - def __init__(self, serviceDesc, workDirs,required_node, version, - workers_per_ring = 1): - MasterSlave.__init__(self, serviceDesc, workDirs,required_node) - - self.masterNode = None - self.masterAddr = None - self.infoAddr = None - self.workers = [] - self.required_node = required_node - self.version = version - self.workers_per_ring = workers_per_ring - - def isLaunchable(self, serviceDict): - hdfs = serviceDict['hdfs'] - if (hdfs.isMasterInitialized()): - return True - return False - - def getMasterRequest(self): - req = NodeRequest(1, [], False) - return req - - def getMasterCommands(self, serviceDict): - - hdfs = serviceDict['hdfs'] - - cmdDesc = self._getJobTrackerCommand(hdfs) - return [cmdDesc] - - def getAdminCommands(self, serviceDict): - return [] - - def getWorkerCommands(self, serviceDict): - - hdfs = serviceDict['hdfs'] - - workerCmds = [] - for id in range(1, self.workers_per_ring + 1): - workerCmds.append(self._getTaskTrackerCommand(str(id), hdfs)) - - return workerCmds - - def setMasterNodes(self, list): - node = list[0] - self.masterNode = node - - def getMasterAddrs(self): - return [self.masterAddr] - - def getInfoAddrs(self): - return [self.infoAddr] - - def getWorkers(self): - return self.workers - - def requiredNode(self): - return self.required_host - - def setMasterParams(self, list): - dict = self._parseEquals(list) - self.masterAddr = dict['mapred.job.tracker'] - k,v = self.masterAddr.split(":") - self.masterNode = k - if self.version < 16: - self.infoAddr = self.masterNode + ':' + dict['mapred.job.tracker.info.port'] - else: - # After Hadoop-2185 - self.infoAddr = dict['mapred.job.tracker.http.address'] - - def _parseEquals(self, list): - return parseEquals(list) - - def _setWorkDirs(self, workDirs, envs, attrs, parentDirs, subDir): - local = [] - system = None - temp = None - hadooptmpdir = None - dfsclient = [] - - for p in parentDirs: - workDirs.append(p) - workDirs.append(os.path.join(p, subDir)) - dir = os.path.join(p, subDir, 'mapred-local') - local.append(dir) - if not system: - system = os.path.join(p, subDir, 'mapred-system') - if not temp: - temp = os.path.join(p, subDir, 'mapred-temp') - if not hadooptmpdir: - # Not used currently, generating hadooptmpdir just in case - hadooptmpdir = os.path.join(p, subDir, 'hadoop-tmp') - dfsclientdir = os.path.join(p, subDir, 'dfs-client') - dfsclient.append(dfsclientdir) - workDirs.append(dfsclientdir) - # FIXME!! use csv - attrs['mapred.local.dir'] = ','.join(local) - attrs['mapred.system.dir'] = 'fillindir' - attrs['mapred.temp.dir'] = temp - attrs['hadoop.tmp.dir'] = hadooptmpdir - - - envs['HADOOP_ROOT_LOGGER'] = "INFO,DRFA" - - - def _getJobTrackerCommand(self, hdfs): - sd = self.serviceDesc - - parentDirs = self.workDirs - workDirs = [] - attrs = sd.getfinalAttrs().copy() - envs = sd.getEnvs().copy() - - if 'mapred.job.tracker' not in attrs: - attrs['mapred.job.tracker'] = 'fillinhostport' - - if self.version < 16: - if 'mapred.job.tracker.info.port' not in attrs: - attrs['mapred.job.tracker.info.port'] = 'fillinport' - else: - # Addressing Hadoop-2185, - if 'mapred.job.tracker.http.address' not in attrs: - attrs['mapred.job.tracker.http.address'] = 'fillinhostport' - - attrs['fs.default.name'] = hdfs.getMasterAddrs()[0] - - self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'mapred-jt') - - dict = { 'name' : 'jobtracker' } - dict['version'] = self.version - dict['program'] = os.path.join('bin', 'hadoop') - dict['argv'] = ['jobtracker'] - dict['envs'] = envs - dict['pkgdirs'] = sd.getPkgDirs() - dict['workdirs'] = workDirs - dict['final-attrs'] = attrs - dict['attrs'] = sd.getAttrs() - cmd = CommandDesc(dict) - return cmd - - def _getTaskTrackerCommand(self, id, hdfs): - - sd = self.serviceDesc - - parentDirs = self.workDirs - workDirs = [] - attrs = sd.getfinalAttrs().copy() - envs = sd.getEnvs().copy() - jt = self.masterAddr - - if jt == None: - raise ValueError, "Can't get job tracker address" - - attrs['mapred.job.tracker'] = jt - attrs['fs.default.name'] = hdfs.getMasterAddrs()[0] - - if self.version < 16: - if 'tasktracker.http.port' not in attrs: - attrs['tasktracker.http.port'] = 'fillinport' - # earlier to 16, tasktrackers always took ephemeral port 0 for - # tasktracker.report.bindAddress - else: - # Adding the following. Hadoop-2185 - if 'mapred.task.tracker.report.address' not in attrs: - attrs['mapred.task.tracker.report.address'] = 'fillinhostport' - if 'mapred.task.tracker.http.address' not in attrs: - attrs['mapred.task.tracker.http.address'] = 'fillinhostport' - - # unique parentDirs in case of multiple tasktrackers per hodring - pd = [] - for dir in parentDirs: - dir = dir + "-" + id - pd.append(dir) - parentDirs = pd - # end of unique workdirs - - self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'mapred-tt') - - dict = { 'name' : 'tasktracker' } - dict['program'] = os.path.join('bin', 'hadoop') - dict['argv'] = ['tasktracker'] - dict['envs'] = envs - dict['pkgdirs'] = sd.getPkgDirs() - dict['workdirs'] = workDirs - dict['final-attrs'] = attrs - dict['attrs'] = sd.getAttrs() - cmd = CommandDesc(dict) - return cmd - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py deleted file mode 100644 index f0c7f5cbbf..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py +++ /dev/null @@ -1,266 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""defines Service as abstract interface""" - -# -*- python -*- -import random, socket - -class Service: - """ the service base class that all the - other services inherit from. """ - def __init__(self, serviceDesc, workDirs): - self.serviceDesc = serviceDesc - self.workDirs = workDirs - - def getName(self): - return self.serviceDesc.getName() - - def getInfoAddrs(self): - """Return a list of addresses that provide - information about the servie""" - return [] - - def isLost(self): - """True if the service is down""" - raise NotImplementedError - - def addNodes(self, nodeList): - """add nodeSet""" - raise NotImplementedError - - def removeNodes(self, nodeList): - """remove a nodeset""" - raise NotImplementedError - - def getWorkers(self): - raise NotImplementedError - - def needsMore(self): - """return number of nodes the service wants to add""" - raise NotImplementedError - - def needsLess(self): - """return number of nodes the service wants to remove""" - raise NotImplementedError - -class MasterSlave(Service): - """ the base class for a master slave - service architecture. """ - def __init__(self, serviceDesc, workDirs,requiredNode): - Service.__init__(self, serviceDesc, workDirs) - self.launchedMaster = False - self.masterInitialized = False - self.masterAddress = 'none' - self.requiredNode = requiredNode - self.failedMsg = None - self.masterFailureCount = 0 - - def getRequiredNode(self): - return self.requiredNode - - def getMasterRequest(self): - """ the number of master you need - to run for this service. """ - raise NotImplementedError - - def isLaunchable(self, serviceDict): - """ if your service does not depend on - other services. is set to true by default. """ - return True - - def getMasterCommands(self, serviceDict): - """ a list of master commands you - want to run for this service. """ - raise NotImplementedError - - def getAdminCommands(self, serviceDict): - """ a list of admin commands you - want to run for this service. """ - raise NotImplementedError - - def getWorkerCommands(self, serviceDict): - """ a list of worker commands you want to - run for this service. """ - raise NotImplementedError - - def setMasterNodes(self, list): - """ set the status of master nodes - after they start running on a node cluster. """ - raise NotImplementedError - - def addNodes(self, list): - """ add nodes to a service. Not implemented - currently. """ - raise NotImplementedError - - def getMasterAddrs(self): - """ return the addresses of master. the - hostname:port to which worker nodes should - connect. """ - raise NotImplementedError - - def setMasterParams(self, list): - """ set the various master params - depending on what each hodring set - the master params to. """ - raise NotImplementedError - - def setlaunchedMaster(self): - """ set the status of master launched - to true. """ - self.launchedMaster = True - - def isMasterLaunched(self): - """ return if a master has been launched - for the service or not. """ - return self.launchedMaster - - def isMasterInitialized(self): - """ return if a master if launched - has been initialized or not. """ - return self.masterInitialized - - def setMasterInitialized(self): - """ set the master initialized to - true. """ - self.masterInitialized = True - # Reset failure related variables, as master is initialized successfully. - self.masterFailureCount = 0 - self.failedMsg = None - - def getMasterAddress(self): - """ it needs to change to reflect - more that one masters. Currently it - keeps a knowledge of where the master - was launched and to keep track if it was actually - up or not. """ - return self.masterAddress - - def setMasterAddress(self, addr): - self.masterAddress = addr - - def isExternal(self): - return self.serviceDesc.isExternal() - - def setMasterFailed(self, err): - """Sets variables related to Master failure""" - self.masterFailureCount += 1 - self.failedMsg = err - # When command is sent to HodRings, this would have been set to True. - # Reset it to reflect the correct status. - self.launchedMaster = False - - def getMasterFailed(self): - return self.failedMsg - - def getMasterFailureCount(self): - return self.masterFailureCount - -class NodeRequest: - """ A class to define - a node request. """ - def __init__(self, n, required = [], preferred = [], isPreemptee = True): - self.numNodes = n - self.preferred = preferred - self.isPreemptee = isPreemptee - self.required = required - - def setNumNodes(self, n): - self.numNodes = n - - def setPreferredList(self, list): - self.preferred = list - - def setIsPreemptee(self, flag): - self.isPreemptee = flag - - -class ServiceUtil: - """ this class should be moved out of - service.py to a util file""" - localPortUsed = {} - - def getUniqRandomPort(h=None, low=50000, high=60000, retry=900, log=None): - """This allocates a randome free port between low and high""" - # We use a default value of 900 retries, which takes an agreeable - # time limit of ~ 6.2 seconds to check 900 ports, in the worse case - # of no available port in those 900. - - while retry > 0: - n = random.randint(low, high) - if n in ServiceUtil.localPortUsed: - continue - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - if not h: - h = socket.gethostname() - avail = False - if log: log.debug("Trying to see if port %s is available"% n) - try: - s.bind((h, n)) - if log: log.debug("Yes, port %s is available" % n) - avail = True - except socket.error,e: - if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e)) - retry -= 1 - pass - # The earlier code that used to be here had syntax errors. The code path - # couldn't be followd anytime, so the error remained uncaught. - # This time I stumbled upon the error - s.close() - - if avail: - ServiceUtil.localPortUsed[n] = True - return n - raise ValueError, "Can't find unique local port between %d and %d" % (low, high) - - getUniqRandomPort = staticmethod(getUniqRandomPort) - - def getUniqPort(h=None, low=40000, high=60000, retry=900, log=None): - """get unique port on a host that can be used by service - This and its consumer code should disappear when master - nodes get allocatet by nodepool""" - - # We use a default value of 900 retries, which takes an agreeable - # time limit of ~ 6.2 seconds to check 900 ports, in the worse case - # of no available port in those 900. - - n = low - while retry > 0: - n = n + 1 - if n in ServiceUtil.localPortUsed: - continue - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - if not h: - h = socket.gethostname() - avail = False - if log: log.debug("Trying to see if port %s is available"% n) - try: - s.bind((h, n)) - if log: log.debug("Yes, port %s is available" % n) - avail = True - except socket.error,e: - if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e)) - retry -= 1 - pass - s.close() - - if avail: - ServiceUtil.localPortUsed[n] = True - return n - - raise ValueError, "Can't find unique local port between %d and %d" % (low, high) - - getUniqPort = staticmethod(getUniqPort) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py deleted file mode 100644 index 616d775803..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py +++ /dev/null @@ -1,747 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""define WorkLoad as abstract interface for user job""" -# -*- python -*- - -import os, time, sys, shutil, exceptions, re, threading, signal, urllib, pprint, math - -from HTMLParser import HTMLParser - -import xml.dom.minidom -import xml.dom.pulldom -from xml.dom import getDOMImplementation - -from hodlib.Common.util import * -from hodlib.Common.xmlrpc import hodXRClient -from hodlib.Common.miniHTMLParser import miniHTMLParser -from hodlib.Common.nodepoolutil import NodePoolUtil -from hodlib.Common.tcp import tcpError, tcpSocket - -reCommandDelimeterString = r"(?<!\\);" -reCommandDelimeter = re.compile(reCommandDelimeterString) - -class hadoopConfig: - def __create_xml_element(self, doc, name, value, description, final = False): - prop = doc.createElement("property") - nameP = doc.createElement("name") - string = doc.createTextNode(name) - nameP.appendChild(string) - valueP = doc.createElement("value") - string = doc.createTextNode(value) - valueP.appendChild(string) - if final: - finalP = doc.createElement("final") - string = doc.createTextNode("true") - finalP.appendChild(string) - desc = doc.createElement("description") - string = doc.createTextNode(description) - desc.appendChild(string) - prop.appendChild(nameP) - prop.appendChild(valueP) - if final: - prop.appendChild(finalP) - prop.appendChild(desc) - - return prop - - def gen_site_conf(self, confDir, tempDir, numNodes, hdfsAddr, mrSysDir,\ - mapredAddr=None, clientParams=None, serverParams=None,\ - finalServerParams=None, clusterFactor=None): - if not mapredAddr: - mapredAddr = "dummy:8181" - - implementation = getDOMImplementation() - doc = implementation.createDocument('', 'configuration', None) - comment = doc.createComment( - "This is an auto generated hadoop-site.xml, do not modify") - topElement = doc.documentElement - topElement.appendChild(comment) - - description = {} - paramsDict = { 'mapred.job.tracker' : mapredAddr , \ - 'fs.default.name' : "hdfs://" + hdfsAddr, \ - 'hadoop.tmp.dir' : tempDir, \ - } - - paramsDict['mapred.system.dir'] = mrSysDir - - # mapred-default.xml is no longer used now. - numred = int(math.floor(clusterFactor * (int(numNodes) - 1))) - paramsDict['mapred.reduce.tasks'] = str(numred) - # end - - # for all the above vars generated, set the description - for k, v in paramsDict.iteritems(): - description[k] = 'Hod generated parameter' - - # finalservelParams - if finalServerParams: - for k, v in finalServerParams.iteritems(): - if not description.has_key(k): - description[k] = "final server parameter" - paramsDict[k] = v - - # servelParams - if serverParams: - for k, v in serverParams.iteritems(): - if not description.has_key(k): - # if no final value for same param is mentioned - description[k] = "server parameter" - paramsDict[k] = v - - # clientParams - if clientParams: - for k, v in clientParams.iteritems(): - if not description.has_key(k) or description[k] == "server parameter": - # Just add, if no final value for same param is mentioned. - # Replace even if server param is mentioned for same config variable - description[k] = "client-side parameter" - paramsDict[k] = v - - # generate the xml elements - for k,v in paramsDict.iteritems(): - if ( description[k] == "final server parameter" or \ - description[k] == "Hod generated parameter" ): - final = True - else: final = False - prop = self.__create_xml_element(doc, k, v, description[k], final) - topElement.appendChild(prop) - - siteName = os.path.join(confDir, "hadoop-site.xml") - sitefile = file(siteName, 'w') - print >> sitefile, topElement.toxml() - sitefile.close() - -class hadoopCluster: - def __init__(self, cfg, log): - self.__cfg = cfg - self.__log = log - self.__changedClusterParams = [] - - self.__hostname = local_fqdn() - self.__svcrgyClient = None - self.__nodePool = NodePoolUtil.getNodePool(self.__cfg['nodepooldesc'], - self.__cfg, self.__log) - self.__hadoopCfg = hadoopConfig() - self.jobId = None - self.mapredInfo = None - self.hdfsInfo = None - self.ringmasterXRS = None - - def __get_svcrgy_client(self): - svcrgyUrl = to_http_url(self.__cfg['hod']['xrs-address']) - return hodXRClient(svcrgyUrl) - - def __get_service_status(self): - serviceData = self.__get_service_data() - - status = True - hdfs = False - mapred = False - - for host in serviceData.keys(): - for item in serviceData[host]: - service = item.keys() - if service[0] == 'hdfs.grid' and \ - self.__cfg['gridservice-hdfs']['external'] == False: - hdfs = True - elif service[0] == 'mapred.grid': - mapred = True - - if not mapred: - status = "mapred" - - if not hdfs and self.__cfg['gridservice-hdfs']['external'] == False: - if status != True: - status = "mapred and hdfs" - else: - status = "hdfs" - - return status - - def __get_service_data(self): - registry = to_http_url(self.__cfg['hod']['xrs-address']) - serviceData = self.__svcrgyClient.getServiceInfo( - self.__cfg['hod']['userid'], self.__setup.np.getNodePoolId()) - - return serviceData - - def __check_job_status(self): - failureCount = 0 - status = False - state = 'Q' - userLimitsFirstFlag = True - - while (state=='Q') or (state==False): - if hodInterrupt.isSet(): - raise HodInterruptException() - - jobInfo = self.__nodePool.getJobInfo() - state = jobInfo['job_state'] - self.__log.debug('job state %s' % state) - if state == False: - failureCount += 1 - if (failureCount >= self.__cfg['hod']['job-status-query-failure-retries']): - self.__log.debug('Number of retries reached max limit while querying job status') - break - time.sleep(self.__cfg['hod']['job-command-failure-interval']) - elif state!='Q': - break - else: - self.__log.debug('querying for job status after job-status-query-interval') - time.sleep(self.__cfg['hod']['job-status-query-interval']) - - if self.__cfg['hod'].has_key('job-feasibility-attr') and \ - self.__cfg['hod']['job-feasibility-attr']: - (status, msg) = self.__isJobFeasible() - if status == "Never": - self.__log.critical(TORQUE_USER_LIMITS_EXCEEDED_MSG + msg + \ - "This cluster cannot be allocated now.") - return -1 - elif status == False: - if userLimitsFirstFlag: - self.__log.critical(TORQUE_USER_LIMITS_EXCEEDED_MSG + msg + \ - "This cluster allocation will succeed only after other " + \ - "clusters are deallocated.") - userLimitsFirstFlag = False - - if state and state != 'C': - status = True - - return status - - def __isJobFeasible(self): - return self.__nodePool.isJobFeasible() - - def __get_ringmaster_client(self): - ringmasterXRS = None - - ringList = self.__svcrgyClient.getServiceInfo( - self.__cfg['ringmaster']['userid'], self.__nodePool.getServiceId(), - 'ringmaster', 'hod') - - if ringList and len(ringList): - if isinstance(ringList, list): - ringmasterXRS = ringList[0]['xrs'] - else: - count = 0 - waitTime = self.__cfg['hod']['allocate-wait-time'] - - while count < waitTime: - if hodInterrupt.isSet(): - raise HodInterruptException() - - ringList = self.__svcrgyClient.getServiceInfo( - self.__cfg['ringmaster']['userid'], self.__nodePool.getServiceId(), - 'ringmaster', - 'hod') - - if ringList and len(ringList): - if isinstance(ringList, list): - ringmasterXRS = ringList[0]['xrs'] - - if ringmasterXRS is not None: - break - else: - time.sleep(1) - count = count + 1 - # check to see if the job exited by any chance in that time: - if (count % self.__cfg['hod']['job-status-query-interval'] == 0): - if not self.__check_job_status(): - break - return ringmasterXRS - - def __init_hadoop_service(self, serviceName, xmlrpcClient): - status = True - serviceAddress = None - serviceInfo = None - - for i in range(0, 250): - try: - if hodInterrupt.isSet(): - raise HodInterruptException() - - serviceAddress = xmlrpcClient.getServiceAddr(serviceName) - if serviceAddress: - if serviceAddress == 'not found': - time.sleep(1) - # check to see if the job exited by any chance in that time: - if ((i+1) % self.__cfg['hod']['job-status-query-interval'] == 0): - if not self.__check_job_status(): - break - else: - serviceInfo = xmlrpcClient.getURLs(serviceName) - break - except HodInterruptException,h : - raise h - except: - self.__log.critical("'%s': ringmaster xmlrpc error." % serviceName) - self.__log.debug(get_exception_string()) - status = False - break - - if serviceAddress == 'not found' or not serviceAddress: - self.__log.critical("Failed to retrieve '%s' service address." % - serviceName) - status = False - elif serviceAddress.startswith("Error: "): - errs = serviceAddress[len("Error: "):] - self.__log.critical("Cluster could not be allocated because of the following errors.\n%s" % \ - errs) - status = False - else: - try: - self.__svcrgyClient.registerService(self.__cfg['hodring']['userid'], - self.jobId, self.__hostname, - serviceName, 'grid', serviceInfo) - - except HodInterruptException, h: - raise h - except: - self.__log.critical("'%s': registry xmlrpc error." % serviceName) - self.__log.debug(get_exception_string()) - status = False - - return status, serviceAddress, serviceInfo - - def __collect_jobtracker_ui(self, dir): - - link = self.mapredInfo + "/jobtracker.jsp" - parser = miniHTMLParser() - parser.setBaseUrl(self.mapredInfo) - node_cache = {} - - self.__log.debug("collect_jobtracker_ui seeded with " + link) - - def alarm_handler(number, stack): - raise AlarmException("timeout") - - signal.signal(signal.SIGALRM, alarm_handler) - - input = None - while link: - self.__log.debug("link: %s" % link) - # taskstats.jsp,taskdetails.jsp not included since too many to collect - if re.search( - "jobfailures\.jsp|jobtracker\.jsp|jobdetails\.jsp|jobtasks\.jsp", - link): - - for i in range(1,5): - if hodInterrupt.isSet(): - raise HodInterruptException() - try: - input = urllib.urlopen(link) - break - except: - self.__log.debug(get_exception_string()) - time.sleep(1) - - if input: - out = None - - self.__log.debug("collecting " + link + "...") - filename = re.sub(self.mapredInfo, "", link) - filename = dir + "/" + filename - filename = re.sub("http://","", filename) - filename = re.sub("[\?\&=:]","_",filename) - filename = filename + ".html" - - try: - tempdir, tail = os.path.split(filename) - if not os.path.exists(tempdir): - os.makedirs(tempdir) - except: - self.__log.debug(get_exception_string()) - - out = open(filename, 'w') - - bufSz = 8192 - - signal.alarm(10) - - try: - self.__log.debug("Starting to grab: %s" % link) - buf = input.read(bufSz) - - while len(buf) > 0: - # Feed the file into the HTML parser - parser.feed(buf) - - # Re-write the hrefs in the file - p = re.compile("\?(.+?)=(.+?)") - buf = p.sub(r"_\1_\2",buf) - p= re.compile("&(.+?)=(.+?)") - buf = p.sub(r"_\1_\2",buf) - p = re.compile("http://(.+?):(\d+)?") - buf = p.sub(r"\1_\2/",buf) - buf = re.sub("href=\"/","href=\"",buf) - p = re.compile("href=\"(.+?)\"") - buf = p.sub(r"href=\1.html",buf) - - out.write(buf) - buf = input.read(bufSz) - - signal.alarm(0) - input.close() - if out: - out.close() - - self.__log.debug("Finished grabbing: %s" % link) - except AlarmException: - if hodInterrupt.isSet(): - raise HodInterruptException() - if out: out.close() - if input: input.close() - - self.__log.debug("Failed to retrieve: %s" % link) - else: - self.__log.debug("Failed to retrieve: %s" % link) - - # Get the next link in level traversal order - link = parser.getNextLink() - - parser.close() - - def check_cluster(self, clusterInfo): - status = 0 - - if 'mapred' in clusterInfo: - mapredAddress = clusterInfo['mapred'][7:] - hdfsAddress = clusterInfo['hdfs'][7:] - status = get_cluster_status(hdfsAddress, mapredAddress) - if status == 0: - status = 12 - else: - status = 15 - - return status - - def is_cluster_deallocated(self, jobId): - """Returns True if the JobId that represents this cluster - is in the Completed or exiting state.""" - jobInfo = self.__nodePool.getJobInfo(jobId) - state = None - if jobInfo is not None and jobInfo.has_key('job_state'): - state = jobInfo['job_state'] - return ((state == 'C') or (state == 'E')) - - def cleanup(self): - if self.__nodePool: self.__nodePool.finalize() - - def get_job_id(self): - return self.jobId - - def delete_job(self, jobId): - '''Delete a job given it's ID''' - ret = 0 - if self.__nodePool: - ret = self.__nodePool.deleteJob(jobId) - else: - raise Exception("Invalid state: Node pool is not initialized to delete the given job.") - return ret - - def is_valid_account(self): - """Verify if the account being used to submit the job is a valid account. - This code looks for a file <install-dir>/bin/verify-account. - If the file is present, it executes the file, passing as argument - the account name. It returns the exit code and output from the - script on non-zero exit code.""" - - accountValidationScript = os.path.abspath('./verify-account') - if not os.path.exists(accountValidationScript): - return (0, None) - - account = self.__nodePool.getAccountString() - exitCode = 0 - errMsg = None - try: - accountValidationCmd = simpleCommand('Account Validation Command',\ - '%s %s' % (accountValidationScript, - account)) - accountValidationCmd.start() - accountValidationCmd.wait() - accountValidationCmd.join() - exitCode = accountValidationCmd.exit_code() - self.__log.debug('account validation script is run %d' \ - % exitCode) - errMsg = None - if exitCode is not 0: - errMsg = accountValidationCmd.output() - except Exception, e: - exitCode = 0 - self.__log.warn('Error executing account script: %s ' \ - 'Accounting is disabled.' \ - % get_exception_error_string()) - self.__log.debug(get_exception_string()) - return (exitCode, errMsg) - - def allocate(self, clusterDir, min, max=None): - status = 0 - failureCount = 0 - self.__svcrgyClient = self.__get_svcrgy_client() - - self.__log.debug("allocate %s %s %s" % (clusterDir, min, max)) - - if min < 3: - self.__log.critical("Minimum nodes must be greater than 2.") - status = 2 - else: - nodeSet = self.__nodePool.newNodeSet(min) - walltime = None - if self.__cfg['hod'].has_key('walltime'): - walltime = self.__cfg['hod']['walltime'] - self.jobId, exitCode = self.__nodePool.submitNodeSet(nodeSet, walltime) - # if the job submission returned an error other than no resources - # retry a couple of times - while (self.jobId is False) and (exitCode != 188): - if hodInterrupt.isSet(): - raise HodInterruptException() - - failureCount += 1 - if (failureCount >= self.__cfg['hod']['job-status-query-failure-retries']): - self.__log.debug("failed submitting job more than the retries. exiting") - break - else: - # wait a bit before retrying - time.sleep(self.__cfg['hod']['job-command-failure-interval']) - if hodInterrupt.isSet(): - raise HodInterruptException() - self.jobId, exitCode = self.__nodePool.submitNodeSet(nodeSet, walltime) - - if self.jobId: - jobStatus = None - try: - jobStatus = self.__check_job_status() - except HodInterruptException, h: - self.__log.info(HOD_INTERRUPTED_MESG) - self.delete_job(self.jobId) - self.__log.info("Cluster %s removed from queue." % self.jobId) - raise h - else: - if jobStatus == -1: - self.delete_job(self.jobId); - status = 4 - return status - - if jobStatus: - self.__log.info("Cluster Id %s" \ - % self.jobId) - try: - self.ringmasterXRS = self.__get_ringmaster_client() - - self.__log.debug("Ringmaster at : %s" % self.ringmasterXRS ) - ringClient = None - if self.ringmasterXRS: - ringClient = hodXRClient(self.ringmasterXRS) - - hdfsStatus, hdfsAddr, self.hdfsInfo = \ - self.__init_hadoop_service('hdfs', ringClient) - - if hdfsStatus: - self.__log.info("HDFS UI at http://%s" % self.hdfsInfo) - - mapredStatus, mapredAddr, self.mapredInfo = \ - self.__init_hadoop_service('mapred', ringClient) - - if mapredStatus: - self.__log.info("Mapred UI at http://%s" % self.mapredInfo) - - if self.__cfg['hod'].has_key('update-worker-info') \ - and self.__cfg['hod']['update-worker-info']: - workerInfoMap = {} - workerInfoMap['HDFS UI'] = 'http://%s' % self.hdfsInfo - workerInfoMap['Mapred UI'] = 'http://%s' % self.mapredInfo - # Ringmaster URL sample format : http://hostname:port/ - workerInfoMap['RM RPC Port'] = '%s' % self.ringmasterXRS.split(":")[2].strip("/") - if mapredAddr.find(':') != -1: - workerInfoMap['Mapred RPC Port'] = mapredAddr.split(':')[1] - ret = self.__nodePool.updateWorkerInfo(workerInfoMap, self.jobId) - if ret != 0: - self.__log.warn('Could not update HDFS and Mapred information.' \ - 'User Portal may not show relevant information.' \ - 'Error code=%s' % ret) - - self.__cfg.replace_escape_seqs() - - # Go generate the client side hadoop-site.xml now - # adding final-params as well, just so that conf on - # client-side and server-side are (almost) the same - clientParams = None - serverParams = {} - finalServerParams = {} - - # client-params - if self.__cfg['hod'].has_key('client-params'): - clientParams = self.__cfg['hod']['client-params'] - - # server-params - if self.__cfg['gridservice-mapred'].has_key('server-params'): - serverParams.update(\ - self.__cfg['gridservice-mapred']['server-params']) - if self.__cfg['gridservice-hdfs'].has_key('server-params'): - # note that if there are params in both mapred and hdfs - # sections, the ones in hdfs overwirte the ones in mapred - serverParams.update(\ - self.__cfg['gridservice-hdfs']['server-params']) - - # final-server-params - if self.__cfg['gridservice-mapred'].has_key(\ - 'final-server-params'): - finalServerParams.update(\ - self.__cfg['gridservice-mapred']['final-server-params']) - if self.__cfg['gridservice-hdfs'].has_key( - 'final-server-params'): - finalServerParams.update(\ - self.__cfg['gridservice-hdfs']['final-server-params']) - - clusterFactor = self.__cfg['hod']['cluster-factor'] - tempDir = self.__cfg['hod']['temp-dir'] - if not os.path.exists(tempDir): - os.makedirs(tempDir) - tempDir = os.path.join( tempDir, self.__cfg['hod']['userid']\ - + "." + self.jobId ) - mrSysDir = getMapredSystemDirectory(self.__cfg['hodring']['mapred-system-dir-root'],\ - self.__cfg['hod']['userid'], self.jobId) - self.__hadoopCfg.gen_site_conf(clusterDir, tempDir, min,\ - hdfsAddr, mrSysDir, mapredAddr, clientParams,\ - serverParams, finalServerParams,\ - clusterFactor) - self.__log.info("hadoop-site.xml at %s" % clusterDir) - # end of hadoop-site.xml generation - else: - status = 8 - else: - status = 7 - else: - status = 6 - if status != 0: - self.__log.debug("Cleaning up cluster id %s, as cluster could not be allocated." % self.jobId) - if ringClient is None: - self.delete_job(self.jobId) - else: - self.__log.debug("Calling rm.stop()") - ringClient.stopRM() - self.__log.debug("Returning from rm.stop()") - except HodInterruptException, h: - self.__log.info(HOD_INTERRUPTED_MESG) - if self.ringmasterXRS: - if ringClient is None: - ringClient = hodXRClient(self.ringmasterXRS) - self.__log.debug("Calling rm.stop()") - ringClient.stopRM() - self.__log.debug("Returning from rm.stop()") - self.__log.info("Cluster Shutdown by informing ringmaster.") - else: - self.delete_job(self.jobId) - self.__log.info("Cluster %s removed from queue directly." % self.jobId) - raise h - else: - self.__log.critical("No cluster found, ringmaster failed to run.") - status = 5 - - elif self.jobId == False: - if exitCode == 188: - self.__log.critical("Request execeeded maximum resource allocation.") - else: - self.__log.critical("Job submission failed with exit code %s" % exitCode) - status = 4 - else: - self.__log.critical("Scheduler failure, allocation failed.\n\n") - status = 4 - - if status == 5 or status == 6: - ringMasterErrors = self.__svcrgyClient.getRMError() - if ringMasterErrors: - self.__log.critical("Cluster could not be allocated because" \ - " of the following errors on the "\ - "ringmaster host %s.\n%s" % \ - (ringMasterErrors[0], ringMasterErrors[1])) - self.__log.debug("Stack trace on ringmaster: %s" % ringMasterErrors[2]) - return status - - def __isRingMasterAlive(self, rmAddr): - ret = True - rmSocket = tcpSocket(rmAddr) - try: - rmSocket.open() - rmSocket.close() - except tcpError: - ret = False - - return ret - - def deallocate(self, clusterDir, clusterInfo): - status = 0 - - nodeSet = self.__nodePool.newNodeSet(clusterInfo['min'], - id=clusterInfo['jobid']) - self.mapredInfo = clusterInfo['mapred'] - self.hdfsInfo = clusterInfo['hdfs'] - - try: - if self.__cfg['hod'].has_key('hadoop-ui-log-dir'): - clusterStatus = self.check_cluster(clusterInfo) - if clusterStatus != 14 and clusterStatus != 10: - # If JT is still alive - self.__collect_jobtracker_ui(self.__cfg['hod']['hadoop-ui-log-dir']) - else: - self.__log.debug('hadoop-ui-log-dir not specified. Skipping Hadoop UI log collection.') - except HodInterruptException, h: - # got an interrupt. just pass and proceed to qdel - pass - except: - self.__log.info("Exception in collecting Job tracker logs. Ignoring.") - - rmAddr = None - if clusterInfo.has_key('ring'): - # format is http://host:port/ We need host:port - rmAddr = clusterInfo['ring'][7:] - if rmAddr.endswith('/'): - rmAddr = rmAddr[:-1] - - if (rmAddr is None) or (not self.__isRingMasterAlive(rmAddr)): - # Cluster is already dead, don't try to contact ringmaster. - self.__nodePool.finalize() - status = 10 # As cluster is dead, we just set the status to 'cluster dead'. - else: - xrsAddr = clusterInfo['ring'] - rmClient = hodXRClient(xrsAddr) - self.__log.debug('calling rm.stop') - rmClient.stopRM() - self.__log.debug('completed rm.stop') - - # cleanup hod temp dirs - tempDir = os.path.join( self.__cfg['hod']['temp-dir'], \ - self.__cfg['hod']['userid'] + "." + clusterInfo['jobid'] ) - if os.path.exists(tempDir): - shutil.rmtree(tempDir) - - return status - -class hadoopScript: - def __init__(self, conf, execDir): - self.__environ = os.environ.copy() - self.__environ['HADOOP_CONF_DIR'] = conf - self.__execDir = execDir - - def run(self, script): - scriptThread = simpleCommand(script, script, self.__environ, 4, False, - False, self.__execDir) - scriptThread.start() - scriptThread.wait() - scriptThread.join() - - return scriptThread.exit_code() diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py deleted file mode 100644 index b2587bb77a..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py +++ /dev/null @@ -1,754 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# -*- python -*- - -import sys, os, getpass, pprint, re, cPickle, random, shutil, time, errno - -import hodlib.Common.logger - -from hodlib.ServiceRegistry.serviceRegistry import svcrgy -from hodlib.Common.xmlrpc import hodXRClient -from hodlib.Common.util import to_http_url, get_exception_string -from hodlib.Common.util import get_exception_error_string -from hodlib.Common.util import hodInterrupt, HodInterruptException -from hodlib.Common.util import HOD_INTERRUPTED_CODE - -from hodlib.Common.nodepoolutil import NodePoolUtil -from hodlib.Hod.hadoop import hadoopCluster, hadoopScript - -CLUSTER_DATA_FILE = 'clusters' -INVALID_STATE_FILE_MSGS = \ - [ - - "Requested operation cannot be performed. Cannot read %s: " + \ - "Permission denied.", - - "Requested operation cannot be performed. " + \ - "Cannot write to %s: Permission denied.", - - "Requested operation cannot be performed. " + \ - "Cannot read/write to %s: Permission denied.", - - "Cannot update %s: Permission denied. " + \ - "Cluster is deallocated, but info and list " + \ - "operations might show incorrect information.", - - ] - -class hodState: - def __init__(self, store): - self.__store = store - self.__stateFile = None - self.__init_store() - self.__STORE_EXT = ".state" - - def __init_store(self): - if not os.path.exists(self.__store): - os.mkdir(self.__store) - - def __set_state_file(self, id=None): - if id: - self.__stateFile = os.path.join(self.__store, "%s%s" % (id, - self.__STORE_EXT)) - else: - for item in os.listdir(self.__store): - if item.endswith(self.__STORE_EXT): - self.__stateFile = os.path.join(self.__store, item) - - def get_state_file(self): - return self.__stateFile - - def checkStateFile(self, id=None, modes=(os.R_OK,)): - # is state file exists/readable/writable/both? - self.__set_state_file(id) - - # return true if file doesn't exist, because HOD CAN create - # state file and so WILL have permissions to read and/or write - try: - os.stat(self.__stateFile) - except OSError, err: - if err.errno == errno.ENOENT: # error 2 (no such file) - return True - - # file exists - ret = True - for mode in modes: - ret = ret and os.access(self.__stateFile, mode) - return ret - - def read(self, id=None): - info = {} - - self.__set_state_file(id) - - if self.__stateFile: - if os.path.isfile(self.__stateFile): - stateFile = open(self.__stateFile, 'r') - try: - info = cPickle.load(stateFile) - except EOFError: - pass - - stateFile.close() - - return info - - def write(self, id, info): - self.__set_state_file(id) - if not os.path.exists(self.__stateFile): - self.clear(id) - - stateFile = open(self.__stateFile, 'w') - cPickle.dump(info, stateFile) - stateFile.close() - - def clear(self, id=None): - self.__set_state_file(id) - if self.__stateFile and os.path.exists(self.__stateFile): - os.remove(self.__stateFile) - else: - for item in os.listdir(self.__store): - if item.endswith(self.__STORE_EXT): - os.remove(item) - -class hodRunner: - - def __init__(self, cfg, log=None, cluster=None): - self.__hodhelp = hodHelp() - self.__ops = self.__hodhelp.ops - self.__cfg = cfg - self.__npd = self.__cfg['nodepooldesc'] - self.__opCode = 0 - self.__user = getpass.getuser() - self.__registry = None - self.__baseLogger = None - # Allowing to pass in log object to help testing - a stub can be passed in - if log is None: - self.__setup_logger() - else: - self.__log = log - - self.__userState = hodState(self.__cfg['hod']['user_state']) - - self.__clusterState = None - self.__clusterStateInfo = { 'env' : None, 'hdfs' : None, 'mapred' : None } - - # Allowing to pass in log object to help testing - a stib can be passed in - if cluster is None: - self.__cluster = hadoopCluster(self.__cfg, self.__log) - else: - self.__cluster = cluster - - def __setup_logger(self): - self.__baseLogger = hodlib.Common.logger.hodLog('hod') - self.__log = self.__baseLogger.add_logger(self.__user ) - - if self.__cfg['hod']['stream']: - self.__baseLogger.add_stream(level=self.__cfg['hod']['debug'], - addToLoggerNames=(self.__user ,)) - - if self.__cfg['hod'].has_key('syslog-address'): - self.__baseLogger.add_syslog(self.__cfg['hod']['syslog-address'], - level=self.__cfg['hod']['debug'], - addToLoggerNames=(self.__user ,)) - - def get_logger(self): - return self.__log - - def __setup_cluster_logger(self, directory): - self.__baseLogger.add_file(logDirectory=directory, level=4, - backupCount=self.__cfg['hod']['log-rollover-count'], - addToLoggerNames=(self.__user ,)) - - def __setup_cluster_state(self, directory): - self.__clusterState = hodState(directory) - - def __norm_cluster_dir(self, directory): - directory = os.path.expanduser(directory) - if not os.path.isabs(directory): - directory = os.path.join(self.__cfg['hod']['original-dir'], directory) - directory = os.path.abspath(directory) - - return directory - - def __setup_service_registry(self): - cfg = self.__cfg['hod'].copy() - cfg['debug'] = 0 - self.__registry = svcrgy(cfg, self.__log) - self.__registry.start() - self.__log.debug(self.__registry.getXMLRPCAddr()) - self.__cfg['hod']['xrs-address'] = self.__registry.getXMLRPCAddr() - self.__cfg['ringmaster']['svcrgy-addr'] = self.__cfg['hod']['xrs-address'] - - def __set_cluster_state_info(self, env, hdfs, mapred, ring, jobid, min, max): - self.__clusterStateInfo['env'] = env - self.__clusterStateInfo['hdfs'] = "http://%s" % hdfs - self.__clusterStateInfo['mapred'] = "http://%s" % mapred - self.__clusterStateInfo['ring'] = ring - self.__clusterStateInfo['jobid'] = jobid - self.__clusterStateInfo['min'] = min - self.__clusterStateInfo['max'] = max - - def __set_user_state_info(self, info): - userState = self.__userState.read(CLUSTER_DATA_FILE) - for key in info.keys(): - userState[key] = info[key] - - self.__userState.write(CLUSTER_DATA_FILE, userState) - - def __remove_cluster(self, clusterDir): - clusterInfo = self.__userState.read(CLUSTER_DATA_FILE) - if clusterDir in clusterInfo: - del(clusterInfo[clusterDir]) - self.__userState.write(CLUSTER_DATA_FILE, clusterInfo) - - def __cleanup(self): - if self.__registry: self.__registry.stop() - - def __check_operation(self, operation): - opList = operation.split() - - if not opList[0] in self.__ops: - self.__log.critical("Invalid hod operation specified: %s" % operation) - self._op_help(None) - self.__opCode = 2 - - return opList - - def __adjustMasterFailureCountConfig(self, nodeCount): - # This method adjusts the ringmaster.max-master-failures variable - # to a value that is bounded by the a function of the number of - # nodes. - - maxFailures = self.__cfg['ringmaster']['max-master-failures'] - # Count number of masters required - depends on which services - # are external - masters = 0 - if not self.__cfg['gridservice-hdfs']['external']: - masters += 1 - if not self.__cfg['gridservice-mapred']['external']: - masters += 1 - - # So, if there are n nodes and m masters, we look atleast for - # all masters to come up. Therefore, atleast m nodes should be - # good, which means a maximum of n-m master nodes can fail. - maxFailedNodes = nodeCount - masters - - # The configured max number of failures is now bounded by this - # number. - self.__cfg['ringmaster']['max-master-failures'] = \ - min(maxFailures, maxFailedNodes) - - def _op_allocate(self, args): - operation = "allocate" - argLength = len(args) - min = 0 - max = 0 - errorFlag = False - errorMsgs = [] - - if argLength == 3: - nodes = args[2] - clusterDir = self.__norm_cluster_dir(args[1]) - - if not os.path.exists(clusterDir): - try: - os.makedirs(clusterDir) - except OSError, err: - errorFlag = True - errorMsgs.append("Could not create cluster directory. %s" \ - % (str(err))) - elif not os.path.isdir(clusterDir): - errorFlag = True - errorMsgs.append( \ - "Invalid cluster directory (--hod.clusterdir or -d) : " + \ - clusterDir + " : Not a directory") - - if int(nodes) < 3 : - errorFlag = True - errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + \ - "Must be >= 3. Given nodes: %s" % nodes) - if errorFlag: - for msg in errorMsgs: - self.__log.critical(msg) - self.__opCode = 3 - return - - if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, \ - (os.R_OK, os.W_OK)): - self.__log.critical(INVALID_STATE_FILE_MSGS[2] % \ - self.__userState.get_state_file()) - self.__opCode = 1 - return - - clusterList = self.__userState.read(CLUSTER_DATA_FILE) - if clusterDir in clusterList.keys(): - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - # Check if the job is not running. Only then can we safely - # allocate another cluster. Otherwise the user would need - # to deallocate and free up resources himself. - if clusterInfo.has_key('jobid') and \ - self.__cluster.is_cluster_deallocated(clusterInfo['jobid']): - self.__log.warn("Found a dead cluster at cluster directory '%s'. Deallocating it to allocate a new one." % (clusterDir)) - self.__remove_cluster(clusterDir) - self.__clusterState.clear() - else: - self.__log.critical("Found a previously allocated cluster at cluster directory '%s'. HOD cannot determine if this cluster can be automatically deallocated. Deallocate the cluster if it is unused." % (clusterDir)) - self.__opCode = 12 - return - - self.__setup_cluster_logger(clusterDir) - - (status, message) = self.__cluster.is_valid_account() - if status is not 0: - if message: - for line in message: - self.__log.critical("verify-account output: %s" % line) - self.__log.critical("Cluster cannot be allocated because account verification failed. " \ - + "verify-account returned exit code: %s." % status) - self.__opCode = 4 - return - else: - self.__log.debug("verify-account returned zero exit code.") - if message: - self.__log.debug("verify-account output: %s" % message) - - if re.match('\d+-\d+', nodes): - (min, max) = nodes.split("-") - min = int(min) - max = int(max) - else: - try: - nodes = int(nodes) - min = nodes - max = nodes - except ValueError: - print self.__hodhelp.help(operation) - self.__log.critical( - "%s operation requires a pos_int value for n(nodecount)." % - operation) - self.__opCode = 3 - else: - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - self.__opCode = self.__cluster.check_cluster(clusterInfo) - if self.__opCode == 0 or self.__opCode == 15: - self.__setup_service_registry() - if hodInterrupt.isSet(): - self.__cleanup() - raise HodInterruptException() - self.__log.debug("Service Registry started.") - - self.__adjustMasterFailureCountConfig(nodes) - - try: - allocateStatus = self.__cluster.allocate(clusterDir, min, max) - except HodInterruptException, h: - self.__cleanup() - raise h - # Allocation has gone through. - # Don't care about interrupts any more - - try: - if allocateStatus == 0: - self.__set_cluster_state_info(os.environ, - self.__cluster.hdfsInfo, - self.__cluster.mapredInfo, - self.__cluster.ringmasterXRS, - self.__cluster.jobId, - min, max) - self.__setup_cluster_state(clusterDir) - self.__clusterState.write(self.__cluster.jobId, - self.__clusterStateInfo) - # Do we need to check for interrupts here ?? - - self.__set_user_state_info( - { clusterDir : self.__cluster.jobId, } ) - self.__opCode = allocateStatus - except Exception, e: - # Some unknown problem. - self.__cleanup() - self.__cluster.deallocate(clusterDir, self.__clusterStateInfo) - self.__opCode = 1 - raise Exception(e) - elif self.__opCode == 12: - self.__log.critical("Cluster %s already allocated." % clusterDir) - elif self.__opCode == 10: - self.__log.critical("dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - elif self.__opCode == 13: - self.__log.warn("hdfs dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - elif self.__opCode == 14: - self.__log.warn("mapred dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - - if self.__opCode > 0 and self.__opCode != 15: - self.__log.critical("Cannot allocate cluster %s" % clusterDir) - else: - print self.__hodhelp.help(operation) - self.__log.critical("%s operation requires two arguments. " % operation - + "A cluster directory and a nodecount.") - self.__opCode = 3 - - def _is_cluster_allocated(self, clusterDir): - if os.path.isdir(clusterDir): - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - if clusterInfo != {}: - return True - return False - - def _op_deallocate(self, args): - operation = "deallocate" - argLength = len(args) - if argLength == 2: - clusterDir = self.__norm_cluster_dir(args[1]) - if os.path.isdir(clusterDir): - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - if clusterInfo == {}: - self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True) - else: - self.__opCode = \ - self.__cluster.deallocate(clusterDir, clusterInfo) - # irrespective of whether deallocate failed or not\ - # remove the cluster state. - self.__clusterState.clear() - if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)): - self.__log.critical(INVALID_STATE_FILE_MSGS[3] % \ - self.__userState.get_state_file()) - self.__opCode = 1 - return - self.__remove_cluster(clusterDir) - else: - self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True) - else: - print self.__hodhelp.help(operation) - self.__log.critical("%s operation requires one argument. " % operation - + "A cluster path.") - self.__opCode = 3 - - def _op_list(self, args): - operation = 'list' - clusterList = self.__userState.read(CLUSTER_DATA_FILE) - for path in clusterList.keys(): - if not os.path.isdir(path): - self.__log.info("cluster state unknown\t%s\t%s" % (clusterList[path], path)) - continue - self.__setup_cluster_state(path) - clusterInfo = self.__clusterState.read() - if clusterInfo == {}: - # something wrong with the cluster directory. - self.__log.info("cluster state unknown\t%s\t%s" % (clusterList[path], path)) - continue - clusterStatus = self.__cluster.check_cluster(clusterInfo) - if clusterStatus == 12: - self.__log.info("alive\t%s\t%s" % (clusterList[path], path)) - elif clusterStatus == 10: - self.__log.info("dead\t%s\t%s" % (clusterList[path], path)) - elif clusterStatus == 13: - self.__log.info("hdfs dead\t%s\t%s" % (clusterList[path], path)) - elif clusterStatus == 14: - self.__log.info("mapred dead\t%s\t%s" % (clusterList[path], path)) - - def _op_info(self, args): - operation = 'info' - argLength = len(args) - if argLength == 2: - clusterDir = self.__norm_cluster_dir(args[1]) - if os.path.isdir(clusterDir): - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - if clusterInfo == {}: - # something wrong with the cluster directory. - self.__handle_invalid_cluster_directory(clusterDir) - else: - clusterStatus = self.__cluster.check_cluster(clusterInfo) - if clusterStatus == 12: - self.__print_cluster_info(clusterInfo) - self.__log.info("hadoop-site.xml at %s" % clusterDir) - elif clusterStatus == 10: - self.__log.critical("%s cluster is dead" % clusterDir) - elif clusterStatus == 13: - self.__log.warn("%s cluster hdfs is dead" % clusterDir) - elif clusterStatus == 14: - self.__log.warn("%s cluster mapred is dead" % clusterDir) - - if clusterStatus != 12: - if clusterStatus == 15: - self.__log.critical("Cluster %s not allocated." % clusterDir) - else: - self.__print_cluster_info(clusterInfo) - self.__log.info("hadoop-site.xml at %s" % clusterDir) - - self.__opCode = clusterStatus - else: - self.__handle_invalid_cluster_directory(clusterDir) - else: - print self.__hodhelp.help(operation) - self.__log.critical("%s operation requires one argument. " % operation - + "A cluster path.") - self.__opCode = 3 - - def __handle_invalid_cluster_directory(self, clusterDir, cleanUp=False): - if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)): - self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \ - self.__userState.get_state_file()) - self.__opCode = 1 - return - - clusterList = self.__userState.read(CLUSTER_DATA_FILE) - if clusterDir in clusterList.keys(): - # previously allocated cluster. - self.__log.critical("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (clusterList[clusterDir], clusterDir)) - if cleanUp: - self.__cluster.delete_job(clusterList[clusterDir]) - self.__log.critical("Freeing resources allocated to the cluster.") - if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)): - self.__log.critical(INVALID_STATE_FILE_MSGS[1] % \ - self.__userState.get_state_file()) - self.__opCode = 1 - return - self.__remove_cluster(clusterDir) - self.__opCode = 3 - else: - if not os.path.exists(clusterDir): - self.__log.critical( \ - "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \ - clusterDir + " : No such directory") - elif not os.path.isdir(clusterDir): - self.__log.critical( \ - "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \ - clusterDir + " : Not a directory") - else: - self.__log.critical( \ - "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \ - clusterDir + " : Not tied to any allocated cluster.") - self.__opCode = 15 - - def __print_cluster_info(self, clusterInfo): - keys = clusterInfo.keys() - - _dict = { - 'jobid' : 'Cluster Id', 'min' : 'Nodecount', - 'hdfs' : 'HDFS UI at' , 'mapred' : 'Mapred UI at' - } - - for key in _dict.keys(): - if clusterInfo.has_key(key): - self.__log.info("%s %s" % (_dict[key], clusterInfo[key])) - - if clusterInfo.has_key('ring'): - self.__log.debug("%s\t%s" % ('Ringmaster at ', clusterInfo['ring'])) - - if self.__cfg['hod']['debug'] == 4: - for var in clusterInfo['env'].keys(): - self.__log.debug("%s = %s" % (var, clusterInfo['env'][var])) - - def _op_help(self, arg): - if arg == None or arg.__len__() != 2: - print "hod commands:\n" - for op in self.__ops: - print self.__hodhelp.help(op) - else: - if arg[1] not in self.__ops: - print self.__hodhelp.help('help') - self.__log.critical("Help requested for invalid operation : %s"%arg[1]) - self.__opCode = 3 - else: print self.__hodhelp.help(arg[1]) - - def operation(self): - operation = self.__cfg['hod']['operation'] - try: - opList = self.__check_operation(operation) - if self.__opCode == 0: - if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)): - self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \ - self.__userState.get_state_file()) - self.__opCode = 1 - return self.__opCode - getattr(self, "_op_%s" % opList[0])(opList) - except HodInterruptException, h: - self.__log.critical("op: %s failed because of a process interrupt." \ - % operation) - self.__opCode = HOD_INTERRUPTED_CODE - except: - self.__log.critical("op: %s failed: %s" % (operation, - get_exception_error_string())) - self.__log.debug(get_exception_string()) - - self.__cleanup() - - self.__log.debug("return code: %s" % self.__opCode) - - return self.__opCode - - def script(self): - errorFlag = False - errorMsgs = [] - scriptRet = 0 # return from the script, if run - - script = self.__cfg['hod']['script'] - nodes = self.__cfg['hod']['nodecount'] - clusterDir = self.__cfg['hod']['clusterdir'] - - if not os.path.exists(script): - errorFlag = True - errorMsgs.append("Invalid script file (--hod.script or -s) : " + \ - script + " : No such file") - elif not os.path.isfile(script): - errorFlag = True - errorMsgs.append("Invalid script file (--hod.script or -s) : " + \ - script + " : Not a file.") - else: - isExecutable = os.access(script, os.X_OK) - if not isExecutable: - errorFlag = True - errorMsgs.append("Invalid script file (--hod.script or -s) : " + \ - script + " : Not an executable.") - - if not os.path.exists(clusterDir): - try: - os.makedirs(clusterDir) - except OSError, err: - errorFlag = True - errorMsgs.append("Could not create cluster directory. %s" % (str(err))) - elif not os.path.isdir(clusterDir): - errorFlag = True - errorMsgs.append( \ - "Invalid cluster directory (--hod.clusterdir or -d) : " + \ - clusterDir + " : Not a directory") - - if int(self.__cfg['hod']['nodecount']) < 3 : - errorFlag = True - errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + \ - "Must be >= 3. Given nodes: %s" % nodes) - - if errorFlag: - for msg in errorMsgs: - self.__log.critical(msg) - self.handle_script_exit_code(scriptRet, clusterDir) - sys.exit(3) - - try: - self._op_allocate(('allocate', clusterDir, str(nodes))) - if self.__opCode == 0: - if self.__cfg['hod'].has_key('script-wait-time'): - time.sleep(self.__cfg['hod']['script-wait-time']) - self.__log.debug('Slept for %d time. Now going to run the script' % self.__cfg['hod']['script-wait-time']) - if hodInterrupt.isSet(): - self.__log.debug('Hod interrupted - not executing script') - else: - scriptRunner = hadoopScript(clusterDir, - self.__cfg['hod']['original-dir']) - self.__opCode = scriptRunner.run(script) - scriptRet = self.__opCode - self.__log.info("Exit code from running the script: %d" % self.__opCode) - else: - self.__log.critical("Error %d in allocating the cluster. Cannot run the script." % self.__opCode) - - if hodInterrupt.isSet(): - # Got interrupt while executing script. Unsetting it for deallocating - hodInterrupt.setFlag(False) - if self._is_cluster_allocated(clusterDir): - self._op_deallocate(('deallocate', clusterDir)) - except HodInterruptException, h: - self.__log.critical("Script failed because of a process interrupt.") - self.__opCode = HOD_INTERRUPTED_CODE - except: - self.__log.critical("script: %s failed: %s" % (script, - get_exception_error_string())) - self.__log.debug(get_exception_string()) - - self.__cleanup() - - self.handle_script_exit_code(scriptRet, clusterDir) - - return self.__opCode - - def handle_script_exit_code(self, scriptRet, clusterDir): - # We want to give importance to a failed script's exit code, and write out exit code to a file separately - # so users can easily get it if required. This way they can differentiate between the script's exit code - # and hod's exit code. - if os.path.exists(clusterDir): - exit_code_file_name = (os.path.join(clusterDir, 'script.exitcode')) - if scriptRet != 0: - exit_code_file = open(exit_code_file_name, 'w') - print >>exit_code_file, scriptRet - exit_code_file.close() - self.__opCode = scriptRet - else: - #ensure script exit code file is not there: - if (os.path.exists(exit_code_file_name)): - os.remove(exit_code_file_name) - -class hodHelp: - def __init__(self): - self.ops = ['allocate', 'deallocate', 'info', 'list','script', 'help'] - - self.usage_strings = \ - { - 'allocate' : 'hod allocate -d <clusterdir> -n <nodecount> [OPTIONS]', - 'deallocate' : 'hod deallocate -d <clusterdir> [OPTIONS]', - 'list' : 'hod list [OPTIONS]', - 'info' : 'hod info -d <clusterdir> [OPTIONS]', - 'script' : - 'hod script -d <clusterdir> -n <nodecount> -s <script> [OPTIONS]', - 'help' : 'hod help <OPERATION>', - } - - self.description_strings = \ - { - 'allocate' : "Allocates a cluster of n nodes using the specified \n" + \ - " cluster directory to store cluster state \n" + \ - " information. The Hadoop site XML is also stored \n" + \ - " in this location.\n", - - 'deallocate' : "Deallocates a cluster using the specified \n" + \ - " cluster directory. This operation is also \n" + \ - " required to clean up a dead cluster.\n", - - 'list' : "List all clusters currently allocated by a user, \n" + \ - " along with limited status information and the \n" + \ - " cluster ID.\n", - - 'info' : "Provide detailed information on an allocated cluster.\n", - - 'script' : "Allocates a cluster of n nodes with the given \n" +\ - " cluster directory, runs the specified script \n" + \ - " using the allocated cluster, and then \n" + \ - " deallocates the cluster.\n", - - 'help' : "Print help for the operation and exit.\n" + \ - "Available operations : %s.\n" % self.ops, - } - - def usage(self, op): - return "Usage : " + self.usage_strings[op] + "\n" + \ - "For full description: hod help " + op + ".\n" - - def help(self, op=None): - if op is None: - return "hod <operation> [ARGS] [OPTIONS]\n" + \ - "Available operations : %s\n" % self.ops + \ - "For help on a particular operation : hod help <operation>.\n" + \ - "For all options : hod help options." - else: - return "Usage : " + self.usage_strings[op] + "\n" + \ - "Description : " + self.description_strings[op] + \ - "For all options : hod help options.\n" diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py deleted file mode 100644 index 4eafddb0c9..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py +++ /dev/null @@ -1,128 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""defines nodepool and nodeset as abstract interface for batch system""" -# -*- python -*- - -from hodlib.GridServices.service import * - -class NodeSet: - """a set of nodes as one allocation unit""" - - PENDING, COMMITTED, COMPLETE = range(3) - - def __init__(self, id, numNodes, preferredList, isPreemptee): - self.id = id - self.numNodes = numNodes - self.isPreemptee = isPreemptee - self.preferredList = preferredList - self.cmdDescSet = [] - - def getId(self): - """returns a unique id of the nodeset""" - return self.id - - def registerCommand(self, cmdDesc): - """register a command to the nodeset""" - self.cmdDescSet.append(cmdDesc) - - def getAddrList(self): - """get list of node host names - May return empty list if node set is not allocated yet""" - raise NotImplementedError - - def _getNumNodes(self): - return self.numNodes - - def _isPreemptee(self): - return self.isPreemptee - - def _getPreferredList(self): - return self.preferredList - - def _getCmdSet(self): - return self.cmdDescSet - -class NodePool: - """maintains a collection of node sets as they get allocated. - Also the base class for all kinds of nodepools. """ - - def __init__(self, nodePoolDesc, cfg, log): - self.nodePoolDesc = nodePoolDesc - self.nodeSetDict = {} - self._cfg = cfg - self.nextNodeSetId = 0 - self._log = log - - - def newNodeSet(self, numNodes, preferred=[], isPreemptee=True, id=None): - """create a nodeset possibly with asked properties""" - raise NotImplementedError - - def submitNodeSet(self, nodeSet, walltime = None, qosLevel = None, - account = None, resourcelist = None): - """submit the nodeset request to nodepool - return False if error happened""" - raise NotImplementedError - - def pollNodeSet(self, nodeSet): - """return status of node set""" - raise NotImplementedError - - def getWorkers(self): - """return the hosts that comprise this nodepool""" - raise NotImplementedError - - def runWorkers(self, nodeSet = None, args = []): - """Run node set workers.""" - - raise NotImplementedError - - def freeNodeSet(self, nodeset): - """free a node set""" - raise NotImplementedError - - def finalize(self): - """cleans up all nodesets""" - raise NotImplementedError - - def getServiceId(self): - raise NotImplementedError - - def getJobInfo(self, jobId=None): - raise NotImplementedError - - def deleteJob(self, jobId): - """Delete a job, given it's id""" - raise NotImplementedError - - def isJobFeasible(self): - """Check if job can run by looking at any user/job limits""" - raise NotImplementedError - - def updateWorkerInfo(self, workerInfoMap, jobId): - """Update information about the workers started by this NodePool.""" - raise NotImplementedError - - def getAccountString(self): - """Return the account string for this job""" - raise NotImplementedError - - def getNextNodeSetId(self): - id = self.nextNodeSetId - self.nextNodeSetId += 1 - - return id - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py deleted file mode 100644 index 02d6dbfca0..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py +++ /dev/null @@ -1,928 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -#!/usr/bin/env python -"""hodring launches hadoop commands on work node and - cleans up all the work dirs afterward -""" -# -*- python -*- -import os, sys, time, shutil, getpass, xml.dom.minidom, xml.dom.pulldom -import socket, sets, urllib, csv, signal, pprint, random, re, httplib - -from xml.dom import getDOMImplementation -from pprint import pformat -from optparse import OptionParser -from urlparse import urlparse -from hodlib.Common.util import local_fqdn, parseEquals, getMapredSystemDirectory, isProcessRunning -from hodlib.Common.tcp import tcpSocket, tcpError - -binfile = sys.path[0] -libdir = os.path.dirname(binfile) -sys.path.append(libdir) - -import hodlib.Common.logger - -from hodlib.GridServices.service import * -from hodlib.Common.util import * -from hodlib.Common.socketServers import threadedHTTPServer -from hodlib.Common.hodsvc import hodBaseService -from hodlib.Common.threads import simpleCommand -from hodlib.Common.xmlrpc import hodXRClient - -mswindows = (sys.platform == "win32") -originalcwd = os.getcwd() - -reHdfsURI = re.compile("hdfs://(.*?:\d+)(.*)") - -class CommandDesc: - """A class that represents the commands that - are run by hodring""" - def __init__(self, dict, log): - self.log = log - self.log.debug("In command desc") - self.log.debug("Done in command desc") - dict.setdefault('argv', []) - dict.setdefault('version', None) - dict.setdefault('envs', {}) - dict.setdefault('workdirs', []) - dict.setdefault('attrs', {}) - dict.setdefault('final-attrs', {}) - dict.setdefault('fg', False) - dict.setdefault('ignorefailures', False) - dict.setdefault('stdin', None) - - self.log.debug("Printing dict") - self._checkRequired(dict) - self.dict = dict - - def _checkRequired(self, dict): - if 'name' not in dict: - raise ValueError, "Command description lacks 'name'" - if 'program' not in dict: - raise ValueError, "Command description lacks 'program'" - if 'pkgdirs' not in dict: - raise ValueError, "Command description lacks 'pkgdirs'" - - def getName(self): - return self.dict['name'] - - def getProgram(self): - return self.dict['program'] - - def getArgv(self): - return self.dict['argv'] - - def getVersion(self): - return self.dict['version'] - - def getEnvs(self): - return self.dict['envs'] - - def getPkgDirs(self): - return self.dict['pkgdirs'] - - def getWorkDirs(self): - return self.dict['workdirs'] - - def getAttrs(self): - return self.dict['attrs'] - - def getfinalAttrs(self): - return self.dict['final-attrs'] - - def isForeground(self): - return self.dict['fg'] - - def isIgnoreFailures(self): - return self.dict['ignorefailures'] - - def getStdin(self): - return self.dict['stdin'] - - def parseDesc(str): - - dict = CommandDesc._parseMap(str) - - dict['argv'] = CommandDesc._parseList(dict['argv']) - dict['envs'] = CommandDesc._parseMap(dict['envs']) - dict['pkgdirs'] = CommandDesc._parseList(dict['pkgdirs'], ':') - dict['workdirs'] = CommandDesc._parseList(dict['workdirs'], ':') - dict['attrs'] = CommandDesc._parseMap(dict['attrs']) - dict['final-attrs'] = CommandDesc._parseMap(dict['final-attrs']) - - return CommandDesc(dict) - - parseDesc = staticmethod(parseDesc) - - def _parseList(str, delim = ','): - list = [] - for row in csv.reader([str], delimiter=delim, escapechar='\\', - quoting=csv.QUOTE_NONE, doublequote=False): - list.extend(row) - return list - - _parseList = staticmethod(_parseList) - - def _parseMap(str): - """Parses key value pairs""" - dict = {} - for row in csv.reader([str], escapechar='\\', quoting=csv.QUOTE_NONE, doublequote=False): - for f in row: - [k, v] = f.split('=', 1) - dict[k] = v - return dict - - _parseMap = staticmethod(_parseMap) - -class MRSystemDirectoryManager: - """Class that is responsible for managing the MapReduce system directory""" - - def __init__(self, jtPid, mrSysDir, fsName, hadoopPath, log, retries=120): - self.__jtPid = jtPid - self.__mrSysDir = mrSysDir - self.__fsName = fsName - self.__hadoopPath = hadoopPath - self.__log = log - self.__retries = retries - - def toCleanupArgs(self): - return " --jt-pid %s --mr-sys-dir %s --fs-name %s --hadoop-path %s " \ - % (self.__jtPid, self.__mrSysDir, self.__fsName, self.__hadoopPath) - - def removeMRSystemDirectory(self): - - jtActive = isProcessRunning(self.__jtPid) - count = 0 # try for a max of a minute for the process to end - while jtActive and (count<self.__retries): - time.sleep(0.5) - jtActive = isProcessRunning(self.__jtPid) - count += 1 - - if count == self.__retries: - self.__log.warn('Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory') - return - - self.__log.debug('jt is now inactive') - - cmd = "%s dfs -fs hdfs://%s -rmr %s" % (self.__hadoopPath, self.__fsName, \ - self.__mrSysDir) - self.__log.debug('Command to run to remove system directory: %s' % (cmd)) - try: - hadoopCommand = simpleCommand('mr-sys-dir-cleaner', cmd) - hadoopCommand.start() - hadoopCommand.wait() - hadoopCommand.join() - ret = hadoopCommand.exit_code() - if ret != 0: - self.__log.warn("Error in removing MapReduce system directory '%s' from '%s' using path '%s'" \ - % (self.__mrSysDir, self.__fsName, self.__hadoopPath)) - self.__log.warn(pprint.pformat(hadoopCommand.output())) - else: - self.__log.info("Removed MapReduce system directory successfully.") - except: - self.__log.error('Exception while cleaning up MapReduce system directory. May not be cleaned up. %s', \ - get_exception_error_string()) - self.__log.debug(get_exception_string()) - - -def createMRSystemDirectoryManager(dict, log): - keys = [ 'jt-pid', 'mr-sys-dir', 'fs-name', 'hadoop-path' ] - for key in keys: - if (not dict.has_key(key)) or (dict[key] is None): - return None - - mrSysDirManager = MRSystemDirectoryManager(int(dict['jt-pid']), dict['mr-sys-dir'], \ - dict['fs-name'], dict['hadoop-path'], log) - return mrSysDirManager - -class HadoopCommand: - """Runs a single hadoop command""" - - def __init__(self, id, desc, tempdir, tardir, log, javahome, - mrSysDir, restart=False): - self.desc = desc - self.log = log - self.javahome = javahome - self.__mrSysDir = mrSysDir - self.program = desc.getProgram() - self.name = desc.getName() - self.workdirs = desc.getWorkDirs() - self.hadoopdir = tempdir - self.confdir = os.path.join(self.hadoopdir, '%d-%s' % (id, self.name), - "confdir") - self.logdir = os.path.join(self.hadoopdir, '%d-%s' % (id, self.name), - "logdir") - self.out = os.path.join(self.logdir, '%s.out' % self.name) - self.err = os.path.join(self.logdir, '%s.err' % self.name) - - self.child = None - self.restart = restart - self.filledInKeyVals = [] - self._createWorkDirs() - self._createHadoopSiteXml() - self._createHadoopLogDir() - self.__hadoopThread = None - self.stdErrContents = "" # store list of contents for returning to user - - def _createWorkDirs(self): - for dir in self.workdirs: - if os.path.exists(dir): - if not os.access(dir, os.F_OK | os.R_OK | os.W_OK | os.X_OK): - raise ValueError, "Workdir %s does not allow rwx permission." % (dir) - continue - try: - os.makedirs(dir) - except: - pass - - def getFilledInKeyValues(self): - return self.filledInKeyVals - - def createXML(self, doc, attr, topElement, final): - for k,v in attr.iteritems(): - self.log.debug('_createHadoopSiteXml: ' + str(k) + " " + str(v)) - if ( v == "fillinport" ): - v = "%d" % (ServiceUtil.getUniqRandomPort(low=50000, log=self.log)) - - keyvalpair = '' - if isinstance(v, (tuple, list)): - for item in v: - keyvalpair = "%s%s=%s," % (keyvalpair, k, item) - keyvalpair = keyvalpair[:-1] - else: - keyvalpair = k + '=' + v - - self.filledInKeyVals.append(keyvalpair) - if(k == "mapred.job.tracker"): # total hack for time's sake - keyvalpair = k + "=" + v - self.filledInKeyVals.append(keyvalpair) - - if ( v == "fillinhostport"): - port = "%d" % (ServiceUtil.getUniqRandomPort(low=50000, log=self.log)) - self.log.debug('Setting hostname to: %s' % local_fqdn()) - v = local_fqdn() + ':' + port - - keyvalpair = '' - if isinstance(v, (tuple, list)): - for item in v: - keyvalpair = "%s%s=%s," % (keyvalpair, k, item) - keyvalpair = keyvalpair[:-1] - else: - keyvalpair = k + '=' + v - - self.filledInKeyVals.append(keyvalpair) - if ( v == "fillindir"): - v = self.__mrSysDir - pass - - prop = None - if isinstance(v, (tuple, list)): - for item in v: - prop = self._createXmlElement(doc, k, item, "No description", final) - topElement.appendChild(prop) - else: - if k == 'fs.default.name': - prop = self._createXmlElement(doc, k, "hdfs://" + v, "No description", final) - else: - prop = self._createXmlElement(doc, k, v, "No description", final) - topElement.appendChild(prop) - - def _createHadoopSiteXml(self): - if self.restart: - if not os.path.exists(self.confdir): - os.makedirs(self.confdir) - else: - assert os.path.exists(self.confdir) == False - os.makedirs(self.confdir) - - implementation = getDOMImplementation() - doc = implementation.createDocument('', 'configuration', None) - comment = doc.createComment("This is an auto generated hadoop-site.xml, do not modify") - topElement = doc.documentElement - topElement.appendChild(comment) - - finalAttr = self.desc.getfinalAttrs() - self.createXML(doc, finalAttr, topElement, True) - attr = {} - attr1 = self.desc.getAttrs() - for k,v in attr1.iteritems(): - if not finalAttr.has_key(k): - attr[k] = v - self.createXML(doc, attr, topElement, False) - - - siteName = os.path.join(self.confdir, "hadoop-site.xml") - sitefile = file(siteName, 'w') - print >> sitefile, topElement.toxml() - sitefile.close() - self.log.debug('created %s' % (siteName)) - - def _createHadoopLogDir(self): - if self.restart: - if not os.path.exists(self.logdir): - os.makedirs(self.logdir) - else: - assert os.path.exists(self.logdir) == False - os.makedirs(self.logdir) - - def _createXmlElement(self, doc, name, value, description, final): - prop = doc.createElement("property") - nameP = doc.createElement("name") - string = doc.createTextNode(name) - nameP.appendChild(string) - valueP = doc.createElement("value") - string = doc.createTextNode(value) - valueP.appendChild(string) - desc = doc.createElement("description") - string = doc.createTextNode(description) - desc.appendChild(string) - prop.appendChild(nameP) - prop.appendChild(valueP) - prop.appendChild(desc) - if (final): - felement = doc.createElement("final") - string = doc.createTextNode("true") - felement.appendChild(string) - prop.appendChild(felement) - pass - - return prop - - def getMRSystemDirectoryManager(self): - return MRSystemDirectoryManager(self.__hadoopThread.getPid(), self.__mrSysDir, \ - self.desc.getfinalAttrs()['fs.default.name'], \ - self.path, self.log) - - def run(self, dir): - status = True - args = [] - desc = self.desc - - self.log.debug(pprint.pformat(desc.dict)) - - - self.log.debug("Got package dir of %s" % dir) - - self.path = os.path.join(dir, self.program) - - self.log.debug("path: %s" % self.path) - args.append(self.path) - args.extend(desc.getArgv()) - envs = desc.getEnvs() - fenvs = os.environ - - for k, v in envs.iteritems(): - fenvs[k] = v - - if envs.has_key('HADOOP_OPTS'): - fenvs['HADOOP_OPTS'] = envs['HADOOP_OPTS'] - self.log.debug("HADOOP_OPTS : %s" % fenvs['HADOOP_OPTS']) - - fenvs['JAVA_HOME'] = self.javahome - fenvs['HADOOP_CONF_DIR'] = self.confdir - fenvs['HADOOP_LOG_DIR'] = self.logdir - - self.log.info(pprint.pformat(fenvs)) - - hadoopCommand = '' - for item in args: - hadoopCommand = "%s%s " % (hadoopCommand, item) - - # Redirecting output and error to self.out and self.err - hadoopCommand = hadoopCommand + ' 1>%s 2>%s ' % (self.out, self.err) - - self.log.debug('running command: %s' % (hadoopCommand)) - self.log.debug('hadoop env: %s' % fenvs) - self.log.debug('Command stdout will be redirected to %s ' % self.out + \ - 'and command stderr to %s' % self.err) - - self.__hadoopThread = simpleCommand('hadoop', hadoopCommand, env=fenvs) - self.__hadoopThread.start() - - while self.__hadoopThread.stdin == None: - time.sleep(.2) - self.log.debug("hadoopThread still == None ...") - - input = desc.getStdin() - self.log.debug("hadoop input: %s" % input) - if input: - if self.__hadoopThread.is_running(): - print >>self.__hadoopThread.stdin, input - else: - self.log.error("hadoop command failed to start") - - self.__hadoopThread.stdin.close() - - self.log.debug("isForground: %s" % desc.isForeground()) - if desc.isForeground(): - self.log.debug("Waiting on hadoop to finish...") - self.__hadoopThread.wait() - - self.log.debug("Joining hadoop thread...") - self.__hadoopThread.join() - if self.__hadoopThread.exit_code() != 0: - status = False - else: - status = self.getCommandStatus() - - self.log.debug("hadoop run status: %s" % status) - - if status == False: - self.handleFailedCommand() - - if (status == True) or (not desc.isIgnoreFailures()): - return status - else: - self.log.error("Ignoring Failure") - return True - - def kill(self): - self.__hadoopThread.kill() - if self.__hadoopThread: - self.__hadoopThread.join() - - def addCleanup(self, list): - list.extend(self.workdirs) - list.append(self.confdir) - - def getCommandStatus(self): - status = True - ec = self.__hadoopThread.exit_code() - if (ec != 0) and (ec != None): - status = False - return status - - def handleFailedCommand(self): - self.log.error('hadoop error: %s' % ( - self.__hadoopThread.exit_status_string())) - # read the contents of redirected stderr to print information back to user - if os.path.exists(self.err): - f = None - try: - f = open(self.err) - lines = f.readlines() - # format - for line in lines: - self.stdErrContents = "%s%s" % (self.stdErrContents, line) - finally: - if f is not None: - f.close() - self.log.error('See %s.out and/or %s.err for details. They are ' % \ - (self.name, self.name) + \ - 'located at subdirectories under either ' + \ - 'hodring.work-dirs or hodring.log-destination-uri.') - -class HodRing(hodBaseService): - """The main class for hodring that - polls the commands it runs""" - def __init__(self, config): - hodBaseService.__init__(self, 'hodring', config['hodring']) - self.log = self.logs['main'] - self._http = None - self.__pkg = None - self.__pkgDir = None - self.__tempDir = None - self.__running = {} - self.__hadoopLogDirs = [] - self.__init_temp_dir() - - def __init_temp_dir(self): - self.__tempDir = os.path.join(self._cfg['temp-dir'], - "%s.%s.hodring" % (self._cfg['userid'], - self._cfg['service-id'])) - if not os.path.exists(self.__tempDir): - os.makedirs(self.__tempDir) - os.chdir(self.__tempDir) - - def __fetch(self, url, spath): - retry = 3 - success = False - while (retry != 0 and success != True): - try: - input = urllib.urlopen(url) - bufsz = 81920 - buf = input.read(bufsz) - out = open(spath, 'w') - while len(buf) > 0: - out.write(buf) - buf = input.read(bufsz) - input.close() - out.close() - success = True - except: - self.log.debug("Failed to copy file") - retry = retry - 1 - if (retry == 0 and success != True): - raise IOError, "Failed to copy the files" - - - def __get_name(self, addr): - parsedUrl = urlparse(addr) - path = parsedUrl[2] - split = path.split('/', 1) - return split[1] - - def __get_dir(self, name): - """Return the root directory inside the tarball - specified by name. Assumes that the tarball begins - with a root directory.""" - import tarfile - myTarFile = tarfile.open(name) - hadoopPackage = myTarFile.getnames()[0] - self.log.debug("tarball name : %s hadoop package name : %s" %(name,hadoopPackage)) - return hadoopPackage - - def getRunningValues(self): - return self.__running.values() - - def getTempDir(self): - return self.__tempDir - - def getHadoopLogDirs(self): - return self.__hadoopLogDirs - - def __download_package(self, ringClient): - self.log.debug("Found download address: %s" % - self._cfg['download-addr']) - try: - addr = 'none' - downloadTime = self._cfg['tarball-retry-initial-time'] # download time depends on tarball size and network bandwidth - - increment = 0 - - addr = ringClient.getTarList(self.hostname) - - while(addr == 'none'): - rand = self._cfg['tarball-retry-initial-time'] + increment + \ - random.uniform(0,self._cfg['tarball-retry-interval']) - increment = increment + 1 - self.log.debug("got no tarball. Retrying again in %s seconds." % rand) - time.sleep(rand) - addr = ringClient.getTarList(self.hostname) - - - self.log.debug("got this address %s" % addr) - - tarName = self.__get_name(addr) - self.log.debug("tar package name: %s" % tarName) - - fetchPath = os.path.join(os.getcwd(), tarName) - self.log.debug("fetch path: %s" % fetchPath) - - self.__fetch(addr, fetchPath) - self.log.debug("done fetching") - - tarUrl = "http://%s:%d/%s" % (self._http.server_address[0], - self._http.server_address[1], - tarName) - try: - ringClient.registerTarSource(self.hostname, tarUrl,addr) - #ringClient.tarDone(addr) - except KeyError, e: - self.log.error("registerTarSource and tarDone failed: ", e) - raise KeyError(e) - - check = untar(fetchPath, os.getcwd()) - - if (check == False): - raise IOError, "Untarring failed." - - self.__pkg = self.__get_dir(tarName) - self.__pkgDir = os.path.join(os.getcwd(), self.__pkg) - except Exception, e: - self.log.error("Failed download tar package: %s" % - get_exception_error_string()) - raise Exception(e) - - def __run_hadoop_commands(self, restart=True): - id = 0 - for desc in self._cfg['commanddesc']: - self.log.debug(pprint.pformat(desc.dict)) - mrSysDir = getMapredSystemDirectory(self._cfg['mapred-system-dir-root'], - self._cfg['userid'], self._cfg['service-id']) - self.log.debug('mrsysdir is %s' % mrSysDir) - cmd = HadoopCommand(id, desc, self.__tempDir, self.__pkgDir, self.log, - self._cfg['java-home'], mrSysDir, restart) - - self.__hadoopLogDirs.append(cmd.logdir) - self.log.debug("hadoop log directory: %s" % self.__hadoopLogDirs) - - try: - # if the tarball isn't there, we use the pkgs dir given. - if self.__pkgDir == None: - pkgdir = desc.getPkgDirs() - else: - pkgdir = self.__pkgDir - - self.log.debug('This is the packcage dir %s ' % (pkgdir)) - if not cmd.run(pkgdir): - addnInfo = "" - if cmd.stdErrContents is not "": - addnInfo = " Information from stderr of the command:\n%s" % (cmd.stdErrContents) - raise Exception("Could not launch the %s using %s/bin/hadoop.%s" % (desc.getName(), pkgdir, addnInfo)) - except Exception, e: - self.log.debug("Exception running hadoop command: %s\n%s" % (get_exception_error_string(), get_exception_string())) - self.__running[id] = cmd - raise Exception(e) - - id += 1 - if desc.isForeground(): - continue - self.__running[id-1] = cmd - - # ok.. now command is running. If this HodRing got jobtracker, - # Check if it is ready for accepting jobs, and then only return - self.__check_jobtracker(desc, id-1, pkgdir) - - def __check_jobtracker(self, desc, id, pkgdir): - # Check jobtracker status. Return properly if it is ready to accept jobs. - # Currently Checks for Jetty to come up, the last thing that can be checked - # before JT completes initialisation. To be perfectly reliable, we need - # hadoop support - name = desc.getName() - if name == 'jobtracker': - # Yes I am the Jobtracker - self.log.debug("Waiting for jobtracker to initialise") - version = desc.getVersion() - self.log.debug("jobtracker version : %s" % version) - hadoopCmd = self.getRunningValues()[id] - attrs = hadoopCmd.getFilledInKeyValues() - attrs = parseEquals(attrs) - jobTrackerAddr = attrs['mapred.job.tracker'] - self.log.debug("jobtracker rpc server : %s" % jobTrackerAddr) - if version < 16: - jettyAddr = jobTrackerAddr.split(':')[0] + ':' + \ - attrs['mapred.job.tracker.info.port'] - else: - jettyAddr = attrs['mapred.job.tracker.http.address'] - self.log.debug("Jobtracker jetty : %s" % jettyAddr) - - # Check for Jetty to come up - # For this do a http head, and then look at the status - defaultTimeout = socket.getdefaulttimeout() - # socket timeout isn`t exposed at httplib level. Setting explicitly. - socket.setdefaulttimeout(1) - sleepTime = 0.5 - jettyStatus = False - jettyStatusmsg = "" - while sleepTime <= 32: - # There is a possibility that the command might fail after a while. - # This code will check if the command failed so that a better - # error message can be returned to the user. - if not hadoopCmd.getCommandStatus(): - self.log.critical('Hadoop command found to have failed when ' \ - 'checking for jobtracker status') - hadoopCmd.handleFailedCommand() - addnInfo = "" - if hadoopCmd.stdErrContents is not "": - addnInfo = " Information from stderr of the command:\n%s" \ - % (hadoopCmd.stdErrContents) - raise Exception("Could not launch the %s using %s/bin/hadoop.%s" \ - % (desc.getName(), pkgdir, addnInfo)) - - try: - jettyConn = httplib.HTTPConnection(jettyAddr) - jettyConn.request("HEAD", "/jobtracker.jsp") - # httplib inherently retries the following till socket timeout - resp = jettyConn.getresponse() - if resp.status != 200: - # Some problem? - jettyStatus = False - jettyStatusmsg = "Jetty gave a non-200 response to a HTTP-HEAD" +\ - " request. HTTP Status (Code, Msg): (%s, %s)" % \ - ( resp.status, resp.reason ) - break - else: - self.log.info("Jetty returned a 200 status (%s)" % resp.reason) - self.log.info("JobTracker successfully initialised") - return - except socket.error: - self.log.debug("Jetty gave a socket error. Sleeping for %s" \ - % sleepTime) - time.sleep(sleepTime) - sleepTime = sleepTime * 2 - except Exception, e: - jettyStatus = False - jettyStatusmsg = ("Process(possibly other than jetty) running on" + \ - " port assigned to jetty is returning invalid http response") - break - socket.setdefaulttimeout(defaultTimeout) - if not jettyStatus: - self.log.critical("Jobtracker failed to initialise.") - if jettyStatusmsg: - self.log.critical( "Reason: %s" % jettyStatusmsg ) - else: self.log.critical( "Reason: Jetty failed to give response") - raise Exception("JobTracker failed to initialise") - - def stop(self): - self.log.debug("Entered hodring stop.") - if self._http: - self.log.debug("stopping http server...") - self._http.stop() - - self.log.debug("call hodsvcrgy stop...") - hodBaseService.stop(self) - - def _xr_method_clusterStart(self, initialize=True): - return self.clusterStart(initialize) - - def _xr_method_clusterStop(self): - return self.clusterStop() - - def start(self): - """Run and maintain hodring commands""" - - try: - if self._cfg.has_key('download-addr'): - self._http = threadedHTTPServer('', self._cfg['http-port-range']) - self.log.info("Starting http server...") - self._http.serve_forever() - self.log.debug("http://%s:%d" % (self._http.server_address[0], - self._http.server_address[1])) - - hodBaseService.start(self) - - ringXRAddress = None - if self._cfg.has_key('ringmaster-xrs-addr'): - ringXRAddress = "http://%s:%s/" % (self._cfg['ringmaster-xrs-addr'][0], - self._cfg['ringmaster-xrs-addr'][1]) - self.log.debug("Ringmaster at %s" % ringXRAddress) - - self.log.debug("Creating service registry XML-RPC client.") - serviceClient = hodXRClient(to_http_url( - self._cfg['svcrgy-addr'])) - if ringXRAddress == None: - self.log.info("Did not get ringmaster XML-RPC address. Fetching information from service registry.") - ringList = serviceClient.getServiceInfo(self._cfg['userid'], - self._cfg['service-id'], 'ringmaster', 'hod') - - self.log.debug(pprint.pformat(ringList)) - - if len(ringList): - if isinstance(ringList, list): - ringXRAddress = ringList[0]['xrs'] - - count = 0 - while (ringXRAddress == None and count < 3000): - ringList = serviceClient.getServiceInfo(self._cfg['userid'], - self._cfg['service-id'], 'ringmaster', 'hod') - - if len(ringList): - if isinstance(ringList, list): - ringXRAddress = ringList[0]['xrs'] - - count = count + 1 - time.sleep(.2) - - if ringXRAddress == None: - raise Exception("Could not get ringmaster XML-RPC server address.") - - self.log.debug("Creating ringmaster XML-RPC client.") - ringClient = hodXRClient(ringXRAddress) - - id = self.hostname + "_" + str(os.getpid()) - - if 'download-addr' in self._cfg: - self.__download_package(ringClient) - else: - self.log.debug("Did not find a download address.") - - cmdlist = [] - firstTime = True - increment = 0 - hadoopStartupTime = 2 - - cmdlist = ringClient.getCommand(id) - - while (cmdlist == []): - if firstTime: - sleepTime = increment + self._cfg['cmd-retry-initial-time'] + hadoopStartupTime\ - + random.uniform(0,self._cfg['cmd-retry-interval']) - firstTime = False - else: - sleepTime = increment + self._cfg['cmd-retry-initial-time'] + \ - + random.uniform(0,self._cfg['cmd-retry-interval']) - self.log.debug("Did not get command list. Waiting for %s seconds." % (sleepTime)) - time.sleep(sleepTime) - increment = increment + 1 - cmdlist = ringClient.getCommand(id) - - self.log.debug(pformat(cmdlist)) - cmdDescs = [] - for cmds in cmdlist: - cmdDescs.append(CommandDesc(cmds['dict'], self.log)) - - self._cfg['commanddesc'] = cmdDescs - - self.log.info("Running hadoop commands...") - - self.__run_hadoop_commands(False) - - masterParams = [] - for k, cmd in self.__running.iteritems(): - masterParams.extend(cmd.filledInKeyVals) - - self.log.debug("printing getparams") - self.log.debug(pformat(id)) - self.log.debug(pformat(masterParams)) - # when this is on a required host, the ringMaster already has our masterParams - if(len(masterParams) > 0): - ringClient.addMasterParams(id, masterParams) - except Exception, e: - raise Exception(e) - - def clusterStart(self, initialize=True): - """Start a stopped mapreduce/dfs cluster""" - if initialize: - self.log.debug('clusterStart Method Invoked - Initialize') - else: - self.log.debug('clusterStart Method Invoked - No Initialize') - try: - self.log.debug("Creating service registry XML-RPC client.") - serviceClient = hodXRClient(to_http_url(self._cfg['svcrgy-addr']), - None, None, 0, 0, 0) - - self.log.info("Fetching ringmaster information from service registry.") - count = 0 - ringXRAddress = None - while (ringXRAddress == None and count < 3000): - ringList = serviceClient.getServiceInfo(self._cfg['userid'], - self._cfg['service-id'], 'ringmaster', 'hod') - if len(ringList): - if isinstance(ringList, list): - ringXRAddress = ringList[0]['xrs'] - count = count + 1 - - if ringXRAddress == None: - raise Exception("Could not get ringmaster XML-RPC server address.") - - self.log.debug("Creating ringmaster XML-RPC client.") - ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0) - - id = self.hostname + "_" + str(os.getpid()) - - cmdlist = [] - if initialize: - if 'download-addr' in self._cfg: - self.__download_package(ringClient) - else: - self.log.debug("Did not find a download address.") - while (cmdlist == []): - cmdlist = ringClient.getCommand(id) - else: - while (cmdlist == []): - cmdlist = ringClient.getAdminCommand(id) - - self.log.debug(pformat(cmdlist)) - cmdDescs = [] - for cmds in cmdlist: - cmdDescs.append(CommandDesc(cmds['dict'], self.log)) - - self._cfg['commanddesc'] = cmdDescs - - if initialize: - self.log.info("Running hadoop commands again... - Initialize") - self.__run_hadoop_commands() - masterParams = [] - for k, cmd in self.__running.iteritems(): - self.log.debug(cmd) - masterParams.extend(cmd.filledInKeyVals) - - self.log.debug("printing getparams") - self.log.debug(pformat(id)) - self.log.debug(pformat(masterParams)) - # when this is on a required host, the ringMaster already has our masterParams - if(len(masterParams) > 0): - ringClient.addMasterParams(id, masterParams) - else: - self.log.info("Running hadoop commands again... - No Initialize") - self.__run_hadoop_commands() - - except: - self.log.error(get_exception_string()) - - return True - - def clusterStop(self): - """Stop a running mapreduce/dfs cluster without stopping the hodring""" - self.log.debug('clusterStop Method Invoked') - try: - for cmd in self.__running.values(): - cmd.kill() - self.__running = {} - except: - self.log.error(get_exception_string()) - - return True diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py deleted file mode 100644 index 49b03dcfbe..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py +++ /dev/null @@ -1,334 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""Maui/Torque implementation of NodePool""" -# -*- python -*- - -import os, sys, csv, socket, time, re, pprint - -from hodlib.Hod.nodePool import * -from hodlib.Schedulers.torque import torqueInterface -from hodlib.Common.threads import simpleCommand -from hodlib.Common.util import get_exception_string, args_to_string, local_fqdn, \ - TORQUE_USER_LIMITS_COMMENT_FIELD - -class TorqueNodeSet(NodeSet): - def __init__(self, id, numNodes, preferredList, isPreemptee): - NodeSet.__init__(self, id, numNodes, preferredList, isPreemptee) - self.qsubId = None - self.addrList = [] - - def _setQsubId(self, qsubId): - self.qsubId = qsubId - - def _setAddrList(self, addrList): - self.addrList = addrList - - def getAddrList(self): - return self.addrList - -class TorquePool(NodePool): - def __init__(self, nodePoolDesc, cfg, log): - NodePool.__init__(self, nodePoolDesc, cfg, log) - - environ = os.environ.copy() - - if self._cfg['resource_manager'].has_key('pbs-server'): - environ['PBS_DEFAULT'] = self._cfg['resource_manager']['pbs-server'] - - self.__torque = torqueInterface( - self._cfg['resource_manager']['batch-home'], environ, self._log) - - def getAccountString(self): - account = '' - if self._cfg['resource_manager'].has_key('pbs-account'): - account = self._cfg['resource_manager']['pbs-account'] - return account - - def __gen_submit_params(self, nodeSet, walltime = None, qosLevel = None, - account = None): - argList = [] - stdinList = [] - - npd = self.nodePoolDesc - - def gen_stdin_list(): - # Here we are basically generating the standard input for qsub. - # Specifically a script to exec ringmaster. - stdinList.append('#!/bin/sh') - - ringBin = os.path.join(self._cfg['hod']['base-dir'], 'bin', - 'ringmaster') - ringArgs = [ringBin,] - ringArgs.extend(self._cfg.get_args(exclude=('hod'))) - - ringMasterCommand = args_to_string(ringArgs) - - self._log.debug("ringmaster cmd: %s" % ringMasterCommand) - - stdinList.append(ringMasterCommand) - - def gen_arg_list(): - def process_qsub_attributes(): - rawAttributes = self.nodePoolDesc.getAttrs() - - # 'W:x' is used to specify torque management extentensions ie -W x= ... - resourceManagementExtensions = '' - if 'W:x' in rawAttributes: - resourceManagementExtensions = rawAttributes['W:x'] - - if qosLevel: - if len(resourceManagementExtensions) > 0: - resourceManagementExtensions += ';' - resourceManagementExtensions += 'QOS:%s' % (qosLevel) - - rawAttributes['W:x'] = resourceManagementExtensions - - hostname = local_fqdn() - - # key values are expected to have string values. - rawAttributes['l:nodes'] = "%s" % nodeSet._getNumNodes() - - if walltime: - rawAttributes['l:walltime'] = "%s" % walltime - - #create a dict of dictionaries for - # various arguments of torque - cmds = {} - for key in rawAttributes: - value = rawAttributes[key] - - if key.find(':') == -1: - raise ValueError, 'Syntax error: missing colon after %s in %s=%s' % ( - key, key, value) - - [option, subOption] = key.split(':', 1) - if not option in cmds: - cmds[option] = {} - cmds[option][subOption] = value - - opts = [] - #create a string from this - #dictionary of dictionaries createde above - for k in cmds: - csv = [] - nv = cmds[k] - for n in nv: - v = nv[n] - if len(n) == 0: - csv.append(v) - else: - csv.append('%s=%s' % (n, v)) - opts.append('-%s' % (k)) - opts.append(','.join(csv)) - - for option in cmds: - commandList = [] - for subOption in cmds[option]: - value = cmds[option][subOption] - if len(subOption) == 0: - commandList.append(value) - else: - commandList.append("%s=%s" % (subOption, value)) - opts.append('-%s' % option) - opts.append(','.join(commandList)) - - return opts - - pkgdir = npd.getPkgDir() - - qsub = os.path.join(pkgdir, 'bin', 'qsub') - sdd = self._cfg['servicedesc'] - - gsvc = None - for key in sdd: - gsvc = sdd[key] - break - - argList.extend(process_qsub_attributes()) - - argList.extend(('-N', '"' + self._cfg['hod']['title'] + '"')) - argList.extend(('-r','n')) - - if 'pbs-user' in self._cfg['resource_manager']: - argList.extend(('-u', self._cfg['resource_manager']['pbs-user'])) - - argList.extend(('-d','/tmp/')) - if 'queue' in self._cfg['resource_manager']: - queue = self._cfg['resource_manager']['queue'] - argList.extend(('-q',queue)) - - # In HOD 0.4, we pass in an account string only if it is mentioned. - # Also, we don't append userid to the account string, as HOD jobs run as the - # user running them, not as 'HOD' user. - if self._cfg['resource_manager'].has_key('pbs-account'): - argList.extend(('-A', (self._cfg['resource_manager']['pbs-account']))) - - if 'env-vars' in self._cfg['resource_manager']: - qsub_envs = self._cfg['resource_manager']['env-vars'] - argList.extend(('-v', self.__keyValToString(qsub_envs))) - - gen_arg_list() - gen_stdin_list() - - return argList, stdinList - - def __keyValToString(self, keyValList): - ret = "" - for key in keyValList: - ret = "%s%s=%s," % (ret, key, keyValList[key]) - return ret[:-1] - - def newNodeSet(self, numNodes, preferred=[], isPreemptee=True, id=None): - if not id: - id = self.getNextNodeSetId() - - nodeSet = TorqueNodeSet(id, numNodes, preferred, isPreemptee) - - self.nodeSetDict[nodeSet.getId()] = nodeSet - - return nodeSet - - def submitNodeSet(self, nodeSet, walltime = None, qosLevel = None, - account = None): - - argList, stdinList = self.__gen_submit_params(nodeSet, walltime, qosLevel, - account) - - jobId, exitCode = self.__torque.qsub(argList, stdinList) - - ## UNUSED CODE: LINE ## - nodeSet.qsubId = jobId - - return jobId, exitCode - - def freeNodeSet(self, nodeSet): - - exitCode = self.deleteJob(nodeSet.getId()) - - del self.nodeSetDict[nodeSet.getId()] - - return exitCode - - def finalize(self): - status = 0 - exitCode = 0 - for nodeSet in self.nodeSetDict.values(): - exitCode = self.freeNodeSet(nodeSet) - - if exitCode > 0 and exitCode != 153: - status = 4 - - return status - - ## UNUSED METHOD ?? ## - def getWorkers(self): - hosts = [] - - qstatInfo = self.__torque(self.getServiceId()) - if qstatInfo: - hosts = qstatInfop['exec_host'] - - return hosts - - ## UNUSED METHOD ?? ## - def pollNodeSet(self, nodeSet): - status = NodeSet.COMPLETE - nodeSet = self.nodeSetDict[0] - - qstatInfo = self.__torque(self.getServiceId()) - - if qstatMap: - jobstate = qstatMap['job_state'] - exechost = qstatMap['exec_host'] - - if jobstate == 'Q': - status = NodeSet.PENDING - elif exechost == None: - status = NodeSet.COMMITTED - else: - nodeSet._setAddrList(exec_host) - - return status - - def getServiceId(self): - id = None - - nodeSets = self.nodeSetDict.values() - if len(nodeSets): - id = nodeSets[0].qsubId - - if id == None: - id = os.getenv('PBS_JOBID') - - return id - - def getJobInfo(self, jobId=None): - - jobNonExistentErrorCode = 153 - self.__jobInfo = { 'job_state' : False } - - if jobId == None: - jobId = self.getServiceId() - - qstatInfo, exitCode = self.__torque.qstat(jobId) - if exitCode == 0: - self.__jobInfo = qstatInfo - elif exitCode == jobNonExistentErrorCode: - # This really means that the job completed - # However, setting only job_state for now, not - # any other attributes, as none seem required. - self.__jobInfo = { 'job_state' : 'C' } - - return self.__jobInfo - - def deleteJob(self, jobId): - exitCode = self.__torque.qdel(jobId) - return exitCode - - def isJobFeasible(self): - comment = None - msg = None - if self.__jobInfo.has_key('comment'): - comment = self.__jobInfo['comment'] - try: - if comment: - commentField = re.compile(self._cfg['hod']['job-feasibility-attr']) - match = commentField.search(comment) - if match: - reqUsage = int(match.group(1)) - currentUsage = int(match.group(2)) - maxUsage = int(match.group(3)) - msg = "Current Usage:%s, Requested:%s, Maximum Limit:%s " % \ - (currentUsage, reqUsage, maxUsage) - if reqUsage > maxUsage: - return "Never", msg - if reqUsage + currentUsage > maxUsage: - return False, msg - except Exception, e: - self._log.error("Error in isJobFeasible : %s" %e) - raise Exception(e) - return True, msg - - def runWorkers(self, args): - return self.__torque.pbsdsh(args) - - def updateWorkerInfo(self, workerInfoMap, jobId): - workerInfoStr = '' - for key in workerInfoMap.keys(): - workerInfoStr = '%s,%s:%s' % (workerInfoStr, key, workerInfoMap[key]) - exitCode = self.__torque.qalter("notes", workerInfoStr[1:], jobId) - return exitCode - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py deleted file mode 100644 index 33f145baab..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py +++ /dev/null @@ -1,218 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import os, re, time -from hodlib.Common.threads import loop, func -from hodlib.Common.threads import simpleCommand -from hodlib.Common.util import get_exception_string, hadoopVersion - -class HadoopJobStatus: - """This class represents the status of a single Hadoop job""" - - def __init__(self, jobId, status): - self.__jobId = jobId - self.__status = status - - def getJobId(self): - return self.__jobId - - def getStatus(self): - return self.__status - -class HadoopClientException(Exception): - """This class represents an exception that is raised when we fail in - running the job client.""" - - def __init__(self, errorCode): - self.errorCode = errorCode - -class JobTrackerMonitor: - """This class monitors the JobTracker of an allocated cluster - periodically to detect whether it is idle. If it is found - to be idle for more than a configured limit, it calls back - registered handlers who can act upon the idle cluster.""" - - def __init__(self, log, idleJTHandler, interval, limit, - hadoopDir, javaHome, servInfoProvider): - self.__log = log - self.__idlenessLimit = limit - self.__idleJobTrackerHandler = idleJTHandler - self.__hadoopDir = hadoopDir - hadoopPath = os.path.join(self.__hadoopDir, "bin", "hadoop") - #hadoop directory can be from pkgs or a temp location like tarball. Verify once. - if not os.path.exists(hadoopPath): - raise Exception('Invalid Hadoop path specified: %s' % hadoopPath) - self.__javaHome = javaHome - # Note that when this object is created, we don't yet know the JT URL. - # The service info provider will be polled until we get the URL. - self.__serviceInfoProvider = servInfoProvider - self.__jobCountRegExp = re.compile("([0-9]+) jobs currently running.*") - self.__jobStatusRegExp = re.compile("(\S+)\s+(\d)\s+\d+\s+\S+$") - self.__firstIdleTime = 0 - self.__hadoop15Version = { 'major' : '0', 'minor' : '15' } - #Assumption: we are not going to support versions older than 0.15 for Idle Job tracker. - if not self.__isCompatibleHadoopVersion(self.__hadoop15Version): - raise Exception('Incompatible Hadoop Version: Cannot check status') - self.__stopFlag = False - self.__jtURLFinderThread = func(name='JTURLFinderThread', functionRef=self.getJobTrackerURL) - self.__jtMonitorThread = loop(name='JTMonitorThread', functionRef=self.monitorJobTracker, - sleep=interval) - self.__jobTrackerURL = None - - def start(self): - """This method starts a thread that will determine the JobTracker URL""" - self.__jtURLFinderThread.start() - - def stop(self): - self.__log.debug('Joining the monitoring thread.') - self.__stopFlag = True - if self.__jtMonitorThread.isAlive(): - self.__jtMonitorThread.join() - self.__log.debug('Joined the monitoring thread.') - - def getJobTrackerURL(self): - """This method periodically checks the service info provider for the JT URL""" - self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred') - while not self.__stopFlag and not self.__isValidJobTrackerURL(): - time.sleep(10) - if not self.__stopFlag: - self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred') - else: - break - - if self.__isValidJobTrackerURL(): - self.__log.debug('Got URL %s. Starting monitoring' % self.__jobTrackerURL) - self.__jtMonitorThread.start() - - def monitorJobTracker(self): - """This method is periodically called to monitor the JobTracker of the cluster.""" - try: - if self.__isIdle(): - if self.__idleJobTrackerHandler: - self.__log.info('Detected cluster as idle. Calling registered callback handler.') - self.__idleJobTrackerHandler.handleIdleJobTracker() - except: - self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string()) - - def getJobsStatus(self): - """This method should return the status of all jobs that are run on the HOD allocated - hadoop cluster""" - jobStatusList = [] - try: - hadoop16Version = { 'major' : '0', 'minor' : '16' } - if self.__isCompatibleHadoopVersion(hadoop16Version): - jtStatusCommand = self.__initStatusCommand(option='-list all') - jtStatusCommand.start() - jtStatusCommand.wait() - jtStatusCommand.join() - if jtStatusCommand.exit_code() == 0: - for line in jtStatusCommand.output(): - jobStatus = self.__extractJobStatus(line) - if jobStatus is not None: - jobStatusList.append(jobStatus) - except: - self.__log.debug('Exception while getting job statuses. %s' % get_exception_string()) - return jobStatusList - - def __isValidJobTrackerURL(self): - """This method checks that the passed in URL is not one of the special case strings - returned by the getServiceAddr API""" - return ((self.__jobTrackerURL != None) and (self.__jobTrackerURL != 'not found') \ - and (not self.__jobTrackerURL.startswith('Error'))) - - def __extractJobStatus(self, line): - """This method parses an output line from the job status command and creates - the JobStatus object if there is a match""" - jobStatus = None - line = line.strip() - jsMatch = self.__jobStatusRegExp.match(line) - if jsMatch: - jobStatus = HadoopJobStatus(jsMatch.group(1), int(jsMatch.group(2))) - return jobStatus - - def __isIdle(self): - """This method checks if the JobTracker is idle beyond a certain limit.""" - jobCount = 0 - err = False - - try: - jobCount = self.__getJobCount() - except HadoopClientException, hce: - self.__log.debug('HadoopClientException handled in getting job count. \ - Error code: %s' % hce.errorCode) - err = True - - if (jobCount==0) or err: - if self.__firstIdleTime == 0: - #detecting idleness for the first time - self.__firstIdleTime = time.time() - else: - if ((time.time()-self.__firstIdleTime) >= self.__idlenessLimit): - self.__log.info('Idleness limit crossed for cluster') - return True - else: - # reset idleness time - self.__firstIdleTime = 0 - - return False - - def __getJobCount(self): - """This method executes the hadoop job -list command and parses the output to detect - the number of running jobs.""" - - # We assume here that the poll interval is small enough to detect running jobs. - # If jobs start and stop within the poll interval, the cluster would be incorrectly - # treated as idle. Hadoop 2266 will provide a better mechanism than this. - jobs = -1 - jtStatusCommand = self.__initStatusCommand() - jtStatusCommand.start() - jtStatusCommand.wait() - jtStatusCommand.join() - if jtStatusCommand.exit_code() == 0: - for line in jtStatusCommand.output(): - match = self.__jobCountRegExp.match(line) - if match: - jobs = int(match.group(1)) - elif jtStatusCommand.exit_code() == 1: - # for now, exit code 1 comes for any exception raised by JobClient. If hadoop gets - # to differentiate and give more granular exit codes, we can check for those errors - # corresponding to network errors etc. - raise HadoopClientException(jtStatusCommand.exit_code()) - return jobs - - def __isCompatibleHadoopVersion(self, expectedVersion): - """This method determines whether the version of hadoop being used is one that - is higher than the expectedVersion. - This can be used for checking if a particular feature is available or not""" - ver = hadoopVersion(self.__hadoopDir, self.__javaHome, self.__log) - ret = False - - if (ver['major']!=None) and (int(ver['major']) >= int(expectedVersion['major'])) \ - and (ver['minor']!=None) and (int(ver['minor']) >= int(expectedVersion['minor'])): - ret = True - return ret - - def __initStatusCommand(self, option="-list"): - """This method initializes the command to run to check the JT status""" - cmd = None - hadoopPath = os.path.join(self.__hadoopDir, 'bin', 'hadoop') - cmdStr = "%s job -jt %s" % (hadoopPath, self.__jobTrackerURL) - cmdStr = "%s %s" % (cmdStr, option) - self.__log.debug('cmd str %s' % cmdStr) - env = os.environ - env['JAVA_HOME'] = self.__javaHome - cmd = simpleCommand('HadoopStatus', cmdStr, env) - return cmd - diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py deleted file mode 100644 index a289d95d75..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py +++ /dev/null @@ -1,1019 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -#!/usr/bin/env python -"""manages services and nodepool""" -# -*- python -*- - -import os, sys, random, time, sets, shutil, threading -import urllib, urlparse, re, getpass, pprint, signal, shutil - -from pprint import pformat -from HTMLParser import HTMLParser - -binfile = sys.path[0] -libdir = os.path.dirname(binfile) -sys.path.append(libdir) - -import hodlib.Common.logger -from hodlib.RingMaster.idleJobTracker import JobTrackerMonitor, HadoopJobStatus - -from hodlib.Common.threads import func - -from hodlib.Hod.nodePool import * -from hodlib.Common.util import * -from hodlib.Common.nodepoolutil import NodePoolUtil -from hodlib.Common.socketServers import hodXMLRPCServer -from hodlib.Common.socketServers import threadedHTTPServer -from hodlib.NodePools import * -from hodlib.NodePools.torque import * -from hodlib.GridServices import * -from hodlib.Common.descGenerator import * -from hodlib.Common.xmlrpc import hodXRClient -from hodlib.Common.miniHTMLParser import miniHTMLParser -from hodlib.Common.threads import simpleCommand - -class ringMasterServer: - """The RPC server that exposes all the master config - changes. Also, one of these RPC servers runs as a proxy - and all the hodring instances register with this proxy""" - instance = None - xmlrpc = None - - def __init__(self, cfg, log, logMasterSources, retry=5): - try: - from hodlib.Common.socketServers import twistedXMLRPCServer - ringMasterServer.xmlrpc = twistedXMLRPCServer("", - cfg['ringmaster']['xrs-port-range']) - except ImportError: - log.info("Twisted interface not found. Using hodXMLRPCServer.") - ringMasterServer.xmlrpc = hodXMLRPCServer("", - cfg['ringmaster']['xrs-port-range']) - - ringMasterServer.xmlrpc.register_instance(logMasterSources) - self.logMasterSources = logMasterSources - ringMasterServer.xmlrpc.serve_forever() - - while not ringMasterServer.xmlrpc.is_alive(): - time.sleep(.5) - - log.debug('Ringmaster RPC Server at %d' % - ringMasterServer.xmlrpc.server_address[1]) - - def startService(ss, cfg, np, log, rm): - logMasterSources = _LogMasterSources(ss, cfg, np, log, rm) - ringMasterServer.instance = ringMasterServer(cfg, log, logMasterSources) - - def stopService(): - ringMasterServer.xmlrpc.stop() - - def getPort(): - return ringMasterServer.instance.port - - def getAddress(): - return 'http://%s:%d/' % (socket.gethostname(), - ringMasterServer.xmlrpc.server_address[1]) - - startService = staticmethod(startService) - stopService = staticmethod(stopService) - getPort = staticmethod(getPort) - getAddress = staticmethod(getAddress) - -class _LogMasterSources: - """All the methods that are run by the RPC server are - added into this class """ - - def __init__(self, serviceDict, cfg, np, log, rm): - self.serviceDict = serviceDict - self.tarSource = [] - self.tarSourceLock = threading.Lock() - self.dict = {} - self.count = {} - self.logsourceList = [] - self.logsourceListLock = threading.Lock() - self.masterParam = [] - self.masterParamLock = threading.Lock() - self.verify = 'none' - self.cmdLock = threading.Lock() - self.cfg = cfg - self.log = log - self.np = np - self.rm = rm - self.hdfsHost = None - self.mapredHost = None - self.maxconnect = self.cfg['ringmaster']['max-connect'] - self.log.debug("Using max-connect value %s"%self.maxconnect) - - - def registerTarSource(self, hostname, url, addr=None): - self.log.debug("registering: " + url) - lock = self.tarSourceLock - lock.acquire() - self.dict[url] = url - self.count[url] = 0 - # addr is None when ringMaster himself invokes this method - if addr: - c = self.count[addr] - self.count[addr] = c - 1 - lock.release() - if addr: - str = "%s is done" % (addr) - self.log.debug(str) - return url - - def getTarList(self,hodring): # this looks useful - lock = self.tarSourceLock - lock.acquire() - leastkey = None - leastval = -1 - for k, v in self.count.iteritems(): - if (leastval == -1): - leastval = v - pass - if (v <= leastval and v < self.maxconnect): - leastkey = k - leastval = v - if (leastkey == None): - url = 'none' - else: - url = self.dict[leastkey] - self.count[leastkey] = leastval + 1 - self.log.debug("%s %d" % (leastkey, self.count[leastkey])) - lock.release() - self.log.debug('sending url ' + url+" to "+hodring) # this looks useful - return url - - def tarDone(self, uri): - str = "%s is done" % (uri) - self.log.debug(str) - lock = self.tarSourceLock - lock.acquire() - c = self.count[uri] - self.count[uri] = c - 1 - lock.release() - return uri - - def status(self): - return True - -# FIXME: this code is broken, it relies on a central service registry -# -# def clusterStart(self, changedClusterParams=[]): -# self.log.debug("clusterStart method invoked.") -# self.dict = {} -# self.count = {} -# try: -# if (len(changedClusterParams) > 0): -# self.log.debug("Updating config.") -# for param in changedClusterParams: -# (key, sep1, val) = param.partition('=') -# (i1, sep2, i2) = key.partition('.') -# try: -# prev = self.cfg[i1][i2] -# self.rm.cfg[i1][i2] = val -# self.cfg[i1][i2] = val -# self.log.debug("\nModified [%s][%s]=%s to [%s][%s]=%s" % (i1, i2, prev, i1, i2, val)) -# except KeyError, e: -# self.log.info("Skipping %s as no such config parameter found in ringmaster" % param) -# self.log.debug("Regenerating Service Description.") -# dGen = DescGenerator(self.rm.cfg) -# self.rm.cfg['servicedesc'] = dGen.createServiceDescDict() -# self.cfg['servicedesc'] = self.rm.cfg['servicedesc'] -# -# self.rm.tar = None -# if self.rm.cfg['ringmaster'].has_key('hadoop-tar-ball'): -# self.rm.download = True -# self.rm.tar = self.rm.cfg['ringmaster']['hadoop-tar-ball'] -# self.log.debug("self.rm.tar=%s" % self.rm.tar) -# -# self.rm.cd_to_tempdir() -# -# self.rm.tarAddress = None -# hostname = socket.gethostname() -# if (self.rm.download): -# self.rm.basename = os.path.basename(self.rm.tar) -# dest = os.path.join(os.getcwd(), self.rm.basename) -# src = self.rm.tar -# self.log.debug("cp %s -> %s" % (src, dest)) -# shutil.copy(src, dest) -# self.rm.tarAddress = "%s%s" % (self.rm.httpAddress, self.rm.basename) -# self.registerTarSource(hostname, self.rm.tarAddress) -# self.log.debug("Registered new tarAddress %s" % self.rm.tarAddress) -# else: -# self.log.debug("Download not set.") -# -# if (self.rm.tar != None): -# self.cfg['hodring']['download-addr'] = self.rm.tarAddress -# self.rm.cfg['hodring']['download-addr'] = self.rm.tarAddress -# -# sdl = self.rm.cfg['servicedesc'] -# workDirs = self.rm.getWorkDirs(self.rm.cfg, True) -# hdfsDesc = sdl['hdfs'] -# hdfs = None -# if hdfsDesc.isExternal(): -# hdfs = HdfsExternal(hdfsDesc, workDirs) -# else: -# hdfs = Hdfs(hdfsDesc, workDirs, 0, False, True) -# -# self.rm.serviceDict[hdfs.getName()] = hdfs -# mrDesc = sdl['mapred'] -# mr = None -# if mrDesc.isExternal(): -# mr = MapReduceExternal(mrDesc, workDirs) -# else: -# mr = MapReduce(mrDesc, workDirs, 1) -# self.rm.serviceDict[mr.getName()] = mr -# -# ringList = self.rm.serviceClient.getServiceInfo(self.cfg['hodring']['userid'], -# self.np.getServiceId(), 'hodring', 'hod') -# -# slaveList = ringList -# hdfsringXRAddress = None -# # Start HDFS Master - Step 1 -# if not hdfsDesc.isExternal(): -# masterFound = False -# for ring in ringList: -# ringXRAddress = ring['xrs'] -# if ringXRAddress == None: -# raise Exception("Could not get hodring XML-RPC server address.") -# if (ringXRAddress.find(self.hdfsHost) != -1): -# ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0) -# hdfsringXRAddress = ringXRAddress -# self.log.debug("Invoking clusterStart on " + ringXRAddress + " (HDFS Master)") -# ringClient.clusterStart() -# masterFound = True -# slaveList.remove(ring) -# break -# if not masterFound: -# raise Exception("HDFS Master host not found") -# while hdfs.getInfoAddrs() == None: -# self.log.debug("Waiting for HDFS Master (Name Node) to register dfs.info.port") -# time.sleep(1) -# -# # Start MAPRED Master - Step 2 -# if not mrDesc.isExternal(): -# masterFound = False -# for ring in ringList: -# ringXRAddress = ring['xrs'] -# if ringXRAddress == None: -# raise Exception("Could not get hodring XML-RPC server address.") -# if (not mrDesc.isExternal() and ringXRAddress.find(self.mapredHost) != -1): -# ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0) -# self.log.debug("Invoking clusterStart on " + ringXRAddress + " (MAPRED Master)") -# ringClient.clusterStart() -# masterFound = True -# slaveList.remove(ring) -# break -# if not masterFound: -# raise Excpetion("MAPRED Master host not found") -# while mr.getInfoAddrs() == None: -# self.log.debug("Waiting for MAPRED Master (Job Tracker) to register \ -# mapred.job.tracker.info.port") -# time.sleep(1) -# -# # Start Slaves - Step 3 -# for ring in slaveList: -# ringXRAddress = ring['xrs'] -# if ringXRAddress == None: -# raise Exception("Could not get hodring XML-RPC server address.") -# ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0) -# self.log.debug("Invoking clusterStart on " + ringXRAddress + " (Slaves)") -# ringThread = func(name='hodring_slaves_start', functionRef=ringClient.clusterStart()) -# ring['thread'] = ringThread -# ringThread.start() -# -# for ring in slaveList: -# ringThread = ring['thread'] -# if ringThread == None: -# raise Exception("Could not get hodring thread (Slave).") -# ringThread.join() -# self.log.debug("Completed clusterStart on " + ring['xrs'] + " (Slave)") -# -# # Run Admin Commands on HDFS Master - Step 4 -# if not hdfsDesc.isExternal(): -# if hdfsringXRAddress == None: -# raise Exception("HDFS Master host not found (to Run Admin Commands)") -# ringClient = hodXRClient(hdfsringXRAddress, None, None, 0, 0, 0, False, 0) -# self.log.debug("Invoking clusterStart(False) - Admin on " -# + hdfsringXRAddress + " (HDFS Master)") -# ringClient.clusterStart(False) -# -# except: -# self.log.debug(get_exception_string()) -# return False -# -# self.log.debug("Successfully started cluster.") -# return True -# -# def clusterStop(self): -# self.log.debug("clusterStop method invoked.") -# try: -# hdfsAddr = self.getServiceAddr('hdfs') -# if hdfsAddr.find(':') != -1: -# h, p = hdfsAddr.split(':', 1) -# self.hdfsHost = h -# self.log.debug("hdfsHost: " + self.hdfsHost) -# mapredAddr = self.getServiceAddr('mapred') -# if mapredAddr.find(':') != -1: -# h, p = mapredAddr.split(':', 1) -# self.mapredHost = h -# self.log.debug("mapredHost: " + self.mapredHost) -# ringList = self.rm.serviceClient.getServiceInfo(self.cfg['hodring']['userid'], -# self.np.getServiceId(), -# 'hodring', 'hod') -# for ring in ringList: -# ringXRAddress = ring['xrs'] -# if ringXRAddress == None: -# raise Exception("Could not get hodring XML-RPC server address.") -# ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False) -# self.log.debug("Invoking clusterStop on " + ringXRAddress) -# ringThread = func(name='hodring_stop', functionRef=ringClient.clusterStop()) -# ring['thread'] = ringThread -# ringThread.start() -# -# for ring in ringList: -# ringThread = ring['thread'] -# if ringThread == None: -# raise Exception("Could not get hodring thread.") -# ringThread.join() -# self.log.debug("Completed clusterStop on " + ring['xrs']) -# -# except: -# self.log.debug(get_exception_string()) -# return False -# -# self.log.debug("Successfully stopped cluster.") -# -# return True - - def getCommand(self, addr): - """This method is called by the - hodrings to get commands from - the ringmaster""" - lock = self.cmdLock - cmdList = [] - lock.acquire() - try: - try: - for v in self.serviceDict.itervalues(): - if (not v.isExternal()): - if v.isLaunchable(self.serviceDict): - # If a master is still not launched, or the number of - # retries for launching master is not reached, - # launch master - if not v.isMasterLaunched() and \ - (v.getMasterFailureCount() <= \ - self.cfg['ringmaster']['max-master-failures']): - cmdList = v.getMasterCommands(self.serviceDict) - v.setlaunchedMaster() - v.setMasterAddress(addr) - break - if cmdList == []: - for s in self.serviceDict.itervalues(): - if (not v.isExternal()): - if s.isMasterInitialized(): - cl = s.getWorkerCommands(self.serviceDict) - cmdList.extend(cl) - else: - cmdList = [] - break - except: - self.log.debug(get_exception_string()) - finally: - lock.release() - pass - - cmd = addr + pformat(cmdList) - self.log.debug("getCommand returning " + cmd) - return cmdList - - def getAdminCommand(self, addr): - """This method is called by the - hodrings to get admin commands from - the ringmaster""" - lock = self.cmdLock - cmdList = [] - lock.acquire() - try: - try: - for v in self.serviceDict.itervalues(): - cmdList = v.getAdminCommands(self.serviceDict) - if cmdList != []: - break - except Exception, e: - self.log.debug(get_exception_string()) - finally: - lock.release() - pass - cmd = addr + pformat(cmdList) - self.log.debug("getAdminCommand returning " + cmd) - return cmdList - - def addMasterParams(self, addr, vals): - """This method is called by - hodring to update any parameters - its changed for the commands it was - running""" - self.log.debug('Comment: adding master params from %s' % addr) - self.log.debug(pformat(vals)) - lock = self.masterParamLock - lock.acquire() - try: - for v in self.serviceDict.itervalues(): - if v.isMasterLaunched(): - if (v.getMasterAddress() == addr): - v.setMasterParams(vals) - v.setMasterInitialized() - except: - self.log.debug(get_exception_string()) - pass - lock.release() - - return addr - - def setHodRingErrors(self, addr, errors): - """This method is called by the hodrings to update errors - it encountered while starting up""" - self.log.critical("Hodring at %s failed with following errors:\n%s" \ - % (addr, errors)) - lock = self.masterParamLock - lock.acquire() - try: - for v in self.serviceDict.itervalues(): - if v.isMasterLaunched(): - if (v.getMasterAddress() == addr): - # strip the PID part. - idx = addr.rfind('_') - if idx is not -1: - addr = addr[:idx] - v.setMasterFailed("Hodring at %s failed with following" \ - " errors:\n%s" % (addr, errors)) - except: - self.log.debug(get_exception_string()) - pass - lock.release() - return True - - def getKeys(self): - lock= self.masterParamLock - lock.acquire() - keys = self.serviceDict.keys() - lock.release() - - return keys - - def getServiceAddr(self, name): - addr = 'not found' - self.log.debug("getServiceAddr name: %s" % name) - lock= self.masterParamLock - lock.acquire() - try: - service = self.serviceDict[name] - except KeyError: - pass - else: - self.log.debug("getServiceAddr service: %s" % service) - # Check if we should give up ! If the limit on max failures is hit, - # give up. - err = service.getMasterFailed() - if (err is not None) and \ - (service.getMasterFailureCount() > \ - self.cfg['ringmaster']['max-master-failures']): - self.log.critical("Detected errors (%s) beyond allowed number"\ - " of failures (%s). Flagging error to client" \ - % (service.getMasterFailureCount(), \ - self.cfg['ringmaster']['max-master-failures'])) - addr = "Error: " + err - elif (service.isMasterInitialized()): - addr = service.getMasterAddrs()[0] - else: - addr = 'not found' - lock.release() - self.log.debug("getServiceAddr addr %s: %s" % (name, addr)) - - return addr - - def getURLs(self, name): - addr = 'none' - lock = self.masterParamLock - lock.acquire() - - try: - service = self.serviceDict[name] - except KeyError: - pass - else: - if (service.isMasterInitialized()): - addr = service.getInfoAddrs()[0] - - lock.release() - - return addr - - def stopRM(self): - """An XMLRPC call which will spawn a thread to stop the Ringmaster program.""" - # We spawn a thread here because we want the XMLRPC call to return. Calling - # stop directly from here will also stop the XMLRPC server. - try: - self.log.debug("inside xml-rpc call to stop ringmaster") - rmStopperThread = func('RMStopper', self.rm.stop) - rmStopperThread.start() - self.log.debug("returning from xml-rpc call to stop ringmaster") - return True - except: - self.log.debug("Exception in stop: %s" % get_exception_string()) - return False - -class RingMaster: - def __init__(self, cfg, log, **kwds): - """starts nodepool and services""" - self.download = False - self.httpServer = None - self.cfg = cfg - self.log = log - self.__hostname = local_fqdn() - self.workDirs = None - - # ref to the idle job tracker object. - self.__jtMonitor = None - self.__idlenessDetected = False - self.__stopInProgress = False - self.__isStopped = False # to let main exit - self.__exitCode = 0 # exit code with which the ringmaster main method should return - - self.workers_per_ring = self.cfg['ringmaster']['workers_per_ring'] - - self.__initialize_signal_handlers() - - sdd = self.cfg['servicedesc'] - gsvc = None - for key in sdd: - gsvc = sdd[key] - break - - npd = self.cfg['nodepooldesc'] - self.np = NodePoolUtil.getNodePool(npd, cfg, log) - - self.log.debug("Getting service ID.") - - self.serviceId = self.np.getServiceId() - - self.log.debug("Got service ID: %s" % self.serviceId) - - self.tarSrcLoc = None - if self.cfg['ringmaster'].has_key('hadoop-tar-ball'): - self.download = True - self.tarSrcLoc = self.cfg['ringmaster']['hadoop-tar-ball'] - - self.cd_to_tempdir() - - if (self.download): - self.__copy_tarball(os.getcwd()) - self.basename = self.__find_tarball_in_dir(os.getcwd()) - if self.basename is None: - raise Exception('Did not find tarball copied from %s in %s.' - % (self.tarSrcLoc, os.getcwd())) - - self.serviceAddr = to_http_url(self.cfg['ringmaster']['svcrgy-addr']) - - self.log.debug("Service registry @ %s" % self.serviceAddr) - - self.serviceClient = hodXRClient(self.serviceAddr) - self.serviceDict = {} - try: - sdl = self.cfg['servicedesc'] - - workDirs = self.getWorkDirs(cfg) - - hdfsDesc = sdl['hdfs'] - hdfs = None - - # Determine hadoop Version - hadoopVers = hadoopVersion(self.__getHadoopDir(), \ - self.cfg['hodring']['java-home'], self.log) - - if (hadoopVers['major']==None) or (hadoopVers['minor']==None): - raise Exception('Could not retrive the version of Hadoop.' - + ' Check the Hadoop installation or the value of the hodring.java-home variable.') - if hdfsDesc.isExternal(): - hdfs = HdfsExternal(hdfsDesc, workDirs, version=int(hadoopVers['minor'])) - hdfs.setMasterParams( self.cfg['gridservice-hdfs'] ) - else: - hdfs = Hdfs(hdfsDesc, workDirs, 0, version=int(hadoopVers['minor']), - workers_per_ring = self.workers_per_ring) - - self.serviceDict[hdfs.getName()] = hdfs - - mrDesc = sdl['mapred'] - mr = None - if mrDesc.isExternal(): - mr = MapReduceExternal(mrDesc, workDirs, version=int(hadoopVers['minor'])) - mr.setMasterParams( self.cfg['gridservice-mapred'] ) - else: - mr = MapReduce(mrDesc, workDirs,1, version=int(hadoopVers['minor']), - workers_per_ring = self.workers_per_ring) - - self.serviceDict[mr.getName()] = mr - except: - self.log.critical("Exception in creating Hdfs and Map/Reduce descriptor objects: \ - %s." % get_exception_error_string()) - self.log.debug(get_exception_string()) - raise - - # should not be starting these in a constructor - ringMasterServer.startService(self.serviceDict, cfg, self.np, log, self) - - self.rpcserver = ringMasterServer.getAddress() - - self.httpAddress = None - self.tarAddress = None - hostname = socket.gethostname() - if (self.download): - self.httpServer = threadedHTTPServer(hostname, - self.cfg['ringmaster']['http-port-range']) - - self.httpServer.serve_forever() - self.httpAddress = "http://%s:%d/" % (self.httpServer.server_address[0], - self.httpServer.server_address[1]) - self.tarAddress = "%s%s" % (self.httpAddress, self.basename) - - ringMasterServer.instance.logMasterSources.registerTarSource(hostname, - self.tarAddress) - else: - self.log.debug("Download not set.") - - self.log.debug("%s %s %s %s %s" % (self.cfg['ringmaster']['userid'], - self.serviceId, self.__hostname, 'ringmaster', 'hod')) - - if self.cfg['ringmaster']['register']: - if self.httpAddress: - self.serviceClient.registerService(self.cfg['ringmaster']['userid'], - self.serviceId, self.__hostname, 'ringmaster', 'hod', { - 'xrs' : self.rpcserver, 'http' : self.httpAddress }) - else: - self.serviceClient.registerService(self.cfg['ringmaster']['userid'], - self.serviceId, self.__hostname, 'ringmaster', 'hod', { - 'xrs' : self.rpcserver, }) - - self.log.debug("Registered with serivce registry: %s." % self.serviceAddr) - - hodRingPath = os.path.join(cfg['ringmaster']['base-dir'], 'bin', 'hodring') - hodRingWorkDir = os.path.join(cfg['hodring']['temp-dir'], 'hodring' + '_' - + getpass.getuser()) - - self.cfg['hodring']['hodring'] = [hodRingWorkDir,] - self.cfg['hodring']['svcrgy-addr'] = self.cfg['ringmaster']['svcrgy-addr'] - self.cfg['hodring']['service-id'] = self.np.getServiceId() - - self.cfg['hodring']['ringmaster-xrs-addr'] = self.__url_to_addr(self.rpcserver) - - if (self.tarSrcLoc != None): - cfg['hodring']['download-addr'] = self.tarAddress - - self.__init_job_tracker_monitor(ringMasterServer.instance.logMasterSources) - - def __init_job_tracker_monitor(self, logMasterSources): - hadoopDir = self.__getHadoopDir() - self.log.debug('hadoopdir=%s, java-home=%s' % \ - (hadoopDir, self.cfg['hodring']['java-home'])) - try: - self.__jtMonitor = JobTrackerMonitor(self.log, self, - self.cfg['ringmaster']['jt-poll-interval'], - self.cfg['ringmaster']['idleness-limit'], - hadoopDir, self.cfg['hodring']['java-home'], - logMasterSources) - self.log.debug('starting jt monitor') - self.__jtMonitor.start() - except: - self.log.critical('Exception in running idle job tracker. This cluster cannot be deallocated if idle.\ - Exception message: %s' % get_exception_error_string()) - self.log.debug('Exception details: %s' % get_exception_string()) - - - def __getHadoopDir(self): - hadoopDir = None - if self.cfg['ringmaster'].has_key('hadoop-tar-ball'): - tarFile = os.path.join(os.getcwd(), self.basename) - ret = untar(tarFile, os.getcwd()) - if not ret: - raise Exception('Untarring tarfile %s to directory %s failed. Cannot find hadoop directory.' \ - % (tarFile, os.getcwd())) - hadoopDir = os.path.join(os.getcwd(), self.__get_dir(tarFile)) - else: - hadoopDir = self.cfg['gridservice-mapred']['pkgs'] - self.log.debug('Returning Hadoop directory as: %s' % hadoopDir) - return hadoopDir - - def __get_dir(self, name): - """Return the root directory inside the tarball - specified by name. Assumes that the tarball begins - with a root directory.""" - import tarfile - myTarFile = tarfile.open(name) - hadoopPackage = myTarFile.getnames()[0] - self.log.debug("tarball name : %s hadoop package name : %s" %(name,hadoopPackage)) - return hadoopPackage - - def __find_tarball_in_dir(self, dir): - """Find the tarball among files specified in the given - directory. We need this method because how the tarball - source URI is given depends on the method of copy and - we can't get the tarball name from that. - This method will fail if there are multiple tarballs - in the directory with the same suffix.""" - files = os.listdir(dir) - for file in files: - if self.tarSrcLoc.endswith(file): - return file - return None - - def __copy_tarball(self, destDir): - """Copy the hadoop tar ball from a remote location to the - specified destination directory. Based on the URL it executes - an appropriate copy command. Throws an exception if the command - returns a non-zero exit code.""" - # for backwards compatibility, treat the default case as file:// - url = '' - if self.tarSrcLoc.startswith('/'): - url = 'file:/' - src = '%s%s' % (url, self.tarSrcLoc) - if src.startswith('file://'): - src = src[len('file://')-1:] - cpCmd = '/bin/cp' - cmd = '%s %s %s' % (cpCmd, src, destDir) - self.log.debug('Command to execute: %s' % cmd) - copyProc = simpleCommand('remote copy', cmd) - copyProc.start() - copyProc.wait() - copyProc.join() - ret = copyProc.exit_code() - self.log.debug('Completed command execution. Exit Code: %s.' % ret) - - if ret != 0: - output = copyProc.output() - raise Exception('Could not copy tarball using command %s. Exit code: %s. Output: %s' - % (cmd, ret, output)) - else: - raise Exception('Unsupported URL for file: %s' % src) - -# input: http://hostname:port/. output: [hostname,port] - def __url_to_addr(self, url): - addr = url.rstrip('/') - if addr.startswith('http://'): - addr = addr.replace('http://', '', 1) - addr_parts = addr.split(':') - return [addr_parts[0], int(addr_parts[1])] - - def __initialize_signal_handlers(self): - def sigStop(sigNum, handler): - sig_wrapper(sigNum, self.stop) - - signal.signal(signal.SIGTERM, sigStop) - signal.signal(signal.SIGINT, sigStop) - signal.signal(signal.SIGQUIT, sigStop) - - def __clean_up(self): - tempDir = self.__get_tempdir() - os.chdir(os.path.split(tempDir)[0]) - if os.path.exists(tempDir): - shutil.rmtree(tempDir, True) - - self.log.debug("Cleaned up temporary dir: %s" % tempDir) - - def __get_tempdir(self): - dir = os.path.join(self.cfg['ringmaster']['temp-dir'], - "%s.%s.ringmaster" % (self.cfg['ringmaster']['userid'], - self.np.getServiceId())) - return dir - - def getWorkDirs(self, cfg, reUse=False): - - if (not reUse) or (self.workDirs == None): - import math - frand = random.random() - while math.ceil(frand) != math.floor(frand): - frand = frand * 100 - - irand = int(frand) - uniq = '%s-%d-%s' % (socket.gethostname(), os.getpid(), irand) - dirs = [] - parentDirs = cfg['ringmaster']['work-dirs'] - for p in parentDirs: - dir = os.path.join(p, uniq) - dirs.append(dir) - self.workDirs = dirs - - return self.workDirs - - def _fetchLink(self, link, parentDir): - parser = miniHTMLParser() - self.log.debug("Checking link %s" %link) - while link: - - # Get the file from the site and link - input = urllib.urlopen(link) - out = None - contentType = input.info().gettype() - isHtml = contentType == 'text/html' - - #print contentType - if isHtml: - parser.setBaseUrl(input.geturl()) - else: - parsed = urlparse.urlparse(link) - hp = parsed[1] - h = hp - p = None - if hp.find(':') != -1: - h, p = hp.split(':', 1) - path = parsed[2] - path = path.split('/') - file = os.path.join(parentDir, h, p) - for c in path: - if c == '': - continue - file = os.path.join(file, c) - - try: - self.log.debug('Creating %s' % file) - dir, tail = os.path.split(file) - if not os.path.exists(dir): - os.makedirs(dir) - except: - self.log.debug(get_exception_string()) - - out = open(file, 'w') - - bufSz = 8192 - buf = input.read(bufSz) - while len(buf) > 0: - if isHtml: - # Feed the file into the HTML parser - parser.feed(buf) - if out: - out.write(buf) - buf = input.read(bufSz) - - input.close() - if out: - out.close() - - # Search the retfile here - - # Get the next link in level traversal order - link = parser.getNextLink() - - parser.close() - - def _finalize(self): - try: - # FIXME: get dir from config - dir = 'HOD-log-P%d' % (os.getpid()) - dir = os.path.join('.', dir) - except: - self.log.debug(get_exception_string()) - - self.np.finalize() - - def handleIdleJobTracker(self): - self.log.critical("Detected idle job tracker for %s seconds. The allocation will be cleaned up." \ - % self.cfg['ringmaster']['idleness-limit']) - self.__idlenessDetected = True - - def cd_to_tempdir(self): - dir = self.__get_tempdir() - - if not os.path.exists(dir): - os.makedirs(dir) - os.chdir(dir) - - return dir - - def getWorkload(self): - return self.workload - - def getHostName(self): - return self.__hostname - - def start(self): - """run the thread main loop""" - - self.log.debug("Entered start method.") - hodring = os.path.join(self.cfg['ringmaster']['base-dir'], - 'bin', 'hodring') - largs = [hodring] - targs = self.cfg.get_args(section='hodring') - largs.extend(targs) - - hodringCmd = "" - for item in largs: - hodringCmd = "%s%s " % (hodringCmd, item) - - self.log.debug(hodringCmd) - - if self.np.runWorkers(largs) > 0: - self.log.critical("Failed to start worker.") - - self.log.debug("Returned from runWorkers.") - - self._finalize() - - def __findExitCode(self): - """Determine the exit code based on the status of the cluster or jobs run on them""" - xmlrpcServer = ringMasterServer.instance.logMasterSources - if xmlrpcServer.getServiceAddr('hdfs') == 'not found' or \ - xmlrpcServer.getServiceAddr('hdfs').startswith("Error: "): - self.__exitCode = 7 - elif xmlrpcServer.getServiceAddr('mapred') == 'not found' or \ - xmlrpcServer.getServiceAddr('mapred').startswith("Error: "): - self.__exitCode = 8 - else: - clusterStatus = get_cluster_status(xmlrpcServer.getServiceAddr('hdfs'), - xmlrpcServer.getServiceAddr('mapred')) - if clusterStatus != 0: - self.__exitCode = clusterStatus - else: - self.__exitCode = self.__findHadoopJobsExitCode() - self.log.debug('exit code %s' % self.__exitCode) - - def __findHadoopJobsExitCode(self): - """Determine the consolidate exit code of hadoop jobs run on this cluster, provided - this information is available. Return 0 otherwise""" - ret = 0 - failureStatus = 3 - failureCount = 0 - if self.__jtMonitor: - jobStatusList = self.__jtMonitor.getJobsStatus() - try: - if len(jobStatusList) > 0: - for jobStatus in jobStatusList: - self.log.debug('job status for %s: %s' % (jobStatus.getJobId(), - jobStatus.getStatus())) - if jobStatus.getStatus() == failureStatus: - failureCount = failureCount+1 - if failureCount > 0: - if failureCount == len(jobStatusList): # all jobs failed - ret = 16 - else: - ret = 17 - except: - self.log.debug('exception in finding hadoop jobs exit code' % get_exception_string()) - return ret - - def stop(self): - self.log.debug("RingMaster stop method invoked.") - if self.__stopInProgress or self.__isStopped: - return - self.__stopInProgress = True - if ringMasterServer.instance is not None: - self.log.debug('finding exit code') - self.__findExitCode() - self.log.debug('stopping ringmaster instance') - ringMasterServer.stopService() - else: - self.__exitCode = 6 - if self.__jtMonitor is not None: - self.__jtMonitor.stop() - if self.httpServer: - self.httpServer.stop() - - self.__clean_up() - self.__isStopped = True - - def shouldStop(self): - """Indicates whether the main loop should exit, either due to idleness condition, - or a stop signal was received""" - return self.__idlenessDetected or self.__isStopped - - def getExitCode(self): - """return the exit code of the program""" - return self.__exitCode - -def main(cfg,log): - try: - rm = None - dGen = DescGenerator(cfg) - cfg = dGen.initializeDesc() - rm = RingMaster(cfg, log) - rm.start() - while not rm.shouldStop(): - time.sleep(1) - rm.stop() - log.debug('returning from main') - return rm.getExitCode() - except Exception, e: - if log: - log.critical(get_exception_string()) - raise Exception(e) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py deleted file mode 100644 index a4e8f95dfd..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py +++ /dev/null @@ -1,175 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import os, pprint, re, time - -from hodlib.Common.threads import simpleCommand -from hodlib.Common.util import args_to_string -from hodlib.Common.logger import hodDummyLogger - -reQstatLine = re.compile("^\s*(\w+)\s*=\s*(.*)\s*$") - -class torqueInterface: - def __init__(self, torqueDir, environment, log=None): - self.__qsub = os.path.join(torqueDir, 'bin', 'qsub') - self.__qdel = os.path.join(torqueDir, 'bin', 'qdel') - self.__qstat = os.path.join(torqueDir, 'bin', 'qstat') - self.__pbsNodes = os.path.join(torqueDir, 'bin', 'pbsnodes') - self.__pbsdsh = os.path.join(torqueDir, 'bin', 'pbsdsh') - self.__qalter = os.path.join(torqueDir, 'bin', 'qalter') - self.__env = environment - - self.__log = log - if not self.__log: - self.__log = hodDummyLogger() - - def qsub(self, argList, stdinList): - jobID = False - exitCode = 0 - - qsubCommand = "%s %s" % (self.__qsub, args_to_string(argList)) - - self.__log.debug("qsub -> %s" % qsubCommand) - - qsubProcess = simpleCommand('qsub', qsubCommand, env=self.__env) - qsubProcess.start() - - while qsubProcess.stdin == None: - time.sleep(.2) - - try: - for line in stdinList: - self.__log.debug("qsub stdin: %s" % line) - print >>qsubProcess.stdin, line - qsubProcess.stdin.close() - except IOError, i: - # If torque's qsub is given invalid params, it fails & returns immediately - # Check for such errors here - # Wait for command execution to finish - qsubProcess.wait() - qsubProcess.join() - output = qsubProcess.output() - if output!=[]: - self.__log.critical("qsub Failure : %s " % output[0].strip()) - self.__log.critical("qsub Command : %s" % qsubCommand) - return None, qsubProcess.exit_code() - - qsubProcess.wait() - qsubProcess.join() - - exitCode = qsubProcess.exit_code() - if exitCode == 0: - buffer = qsubProcess.output() - jobID = buffer[0].rstrip('\n') - self.__log.debug("qsub jobid: %s" % jobID) - else: - self.__log.critical("qsub error: %s" % qsubProcess.exit_status_string()) - - return jobID, exitCode - - def qstat(self, jobID): - qstatInfo = None - - qstatCommand = "%s -f -1 %s" % (self.__qstat, jobID) - - self.__log.debug(qstatCommand) - - qstatProcess = simpleCommand('qstat', qstatCommand, env=self.__env) - qstatProcess.start() - qstatProcess.wait() - qstatProcess.join() - - exitCode = qstatProcess.exit_code() - if exitCode > 0: - self.__log.warn('qstat error: %s' % qstatProcess.exit_status_string()) - else: - qstatInfo = {} - for line in qstatProcess.output(): - line = line.rstrip() - if line.find('=') != -1: - qstatMatch = reQstatLine.match(line) - if qstatMatch: - key = qstatMatch.group(1) - value = qstatMatch.group(2) - qstatInfo[key] = value - - if 'exec_host' in qstatInfo: - list = qstatInfo['exec_host'].split('+') - addrList = [] - - for item in list: - [head, end] = item.split('/', 1) - addrList.append(head) - - qstatInfo['exec_host'] = addrList - - return qstatInfo, exitCode - - def pbs_nodes(self, argString): - pass - - def qdel(self, jobId, force=False): - exitCode = 0 - qdel = self.__qdel - if force: - qdel = "%s -p %s" % (qdel, jobId) - else: - qdel = "%s %s" % (qdel, jobId) - - self.__log.debug(qdel) - - qdelProcess = simpleCommand('qdel', qdel, env=self.__env) - qdelProcess.start() - qdelProcess.wait() - qdelProcess.join() - - exitCode = qdelProcess.exit_code() - - return exitCode - - def pbsdsh(self, arguments): - status = None - - pbsdshCommand = "%s %s" % (self.__pbsdsh, args_to_string(arguments)) - - self.__log.debug("pbsdsh command: %s" % pbsdshCommand) - - pbsdsh = simpleCommand('pbsdsh', pbsdshCommand, env=self.__env) - pbsdsh.start() - - for i in range(0, 30): - status = pbsdsh.exit_code() - if status: - self.__log.error("pbsdsh failed: %s" % pbsdsh.exit_status_string()) - break - - if not status: status = 0 - - return status - - def qalter(self, fieldName, fieldValue, jobId): - """Update the job field with fieldName with the fieldValue. - The fieldValue must be modifiable after the job is submitted.""" - - # E.g. to alter comment: qalter -W notes='value` jobId - qalterCmd = '%s -W %s=\"%s\" %s' % (self.__qalter, fieldName, fieldValue, jobId) - self.__log.debug("qalter command: %s" % qalterCmd) - qalterProcess = simpleCommand('qalter', qalterCmd, env=self.__env) - qalterProcess.start() - qalterProcess.wait() - qalterProcess.join() - exitCode = qalterProcess.exit_code() - - return exitCode diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py deleted file mode 100644 index 0e80d20ce6..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py +++ /dev/null @@ -1,49 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -"""HOD Service Proxy Implementation""" -# -*- python -*- - -import sys, time, signal, httplib, socket, threading -import sha, base64, hmac -import xml.dom.minidom - -from hodlib.Common.socketServers import hodHTTPServer -from hodlib.Common.hodsvc import hodBaseService -from hodlib.Common.threads import loop -from hodlib.Common.tcp import tcpSocket -from hodlib.Common.util import get_exception_string -from hodlib.Common.AllocationManagerUtil import * - -class svcpxy(hodBaseService): - def __init__(self, config): - hodBaseService.__init__(self, 'serviceProxy', config['service_proxy'], - xrtype='twisted') - self.amcfg=config['allocation_manager'] - - def _xr_method_isProjectUserValid(self, userid, project, ignoreErrors = False, timeOut = 15): - return self.isProjectUserValid(userid, project, ignoreErrors, timeOut) - - def isProjectUserValid(self, userid, project, ignoreErrors, timeOut): - """Method thats called upon by - the hodshell to verify if the - specified (user, project) combination - is valid""" - self.logs['main'].info("Begin isProjectUserValid()") - am = AllocationManagerUtil.getAllocationManager(self.amcfg['id'], - self.amcfg, - self.logs['main']) - self.logs['main'].info("End isProjectUserValid()") - return am.getQuote(userid, project) diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py deleted file mode 100644 index ac18ff77d4..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py +++ /dev/null @@ -1,127 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import sys, time, socket, threading, copy, pprint - -from hodlib.Common.hodsvc import hodBaseService -from hodlib.Common.threads import loop -from hodlib.Common.tcp import tcpSocket -from hodlib.Common.util import get_exception_string -import logging - -class svcrgy(hodBaseService): - def __init__(self, config, log=None): - hodBaseService.__init__(self, 'serviceRegistry', config) - - self.__serviceDict = {} - self.__failCount = {} - self.__released = {} - self.__locked = {} - - self.__serviceDictLock = threading.Lock() - self.RMErrorMsgs = None # Ringmaster error messages - self.log = log - if self.log is None: - self.log = logging.getLogger() - - def __get_job_key(self, userid, job): - return "%s-%s" % (userid, job) - - def _xr_method_registerService(self, userid, job, host, name, type, dict): - return self.registerService(userid, job, host, name, type, dict) - - def _xr_method_getServiceInfo(self, userid=None, job=None, name=None, - type=None): - return self.getServiceInfo(userid, job, name, type) - - def _xr_method_setRMError(self, args): - self.log.debug("setRMError called with %s" % args) - self.RMErrorMsgs = args - return True - - def _xr_method_getRMError(self): - self.log.debug("getRMError called") - if self.RMErrorMsgs is not None: - return self.RMErrorMsgs - else: - self.log.debug("no Ringmaster error messages") - return False - - def registerService(self, userid, job, host, name, type, dict): - """Method thats called upon by - the ringmaster to register to the - the service registry""" - lock = self.__serviceDictLock - lock.acquire() - try: - self.logs['main'].debug("Registering %s.%s.%s.%s.%s..." % ( - userid, job, host, name, type)) - id = "%s.%s" % (name, type) - - if userid in self.__serviceDict: - if job in self.__serviceDict[userid]: - if host in self.__serviceDict[userid][job]: - self.__serviceDict[userid][job][host].append( - {id : dict,}) - else: - self.__serviceDict[userid][job][host] = [ - {id : dict,},] - else: - self.__serviceDict[userid][job] = {host : [ - { id : dict,},]} - else: - self.__serviceDict[userid] = {job : {host : [ - { id : dict,},]}} - - finally: - lock.release() - - return True - - def getXMLRPCAddr(self): - """return the xml rpc server address""" - return self._xrc.server_address - - def getServiceInfo(self, userid=None, job=None, name=None, type=None): - """This method is called upon by others - to query for a particular service returns - a dictionary of elements""" - - self.logs['main'].debug("inside getServiceInfo: %s.%s.%s" % (userid, job, name)) - retdict = {} - lock = self.__serviceDictLock - lock.acquire() - try: - if userid in self.__serviceDict: - if job in self.__serviceDict[userid]: - if name and type: - retdict = [] - id = "%s.%s" % (name, type) - for host in self.__serviceDict[userid][job]: - for dict in self.__serviceDict[userid][job][host]: - [loopID, ] = dict.keys() - if loopID == id: - retdict.append(dict[id]) - else: - retdict = copy.deepcopy( - self.__serviceDict[userid][job]) - elif not job: - retdict = copy.deepcopy(self.__serviceDict[userid]) - elif not userid: - retdict = copy.deepcopy(self.__serviceDict) - finally: - lock.release() - - return retdict diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py deleted file mode 100644 index 56759d7963..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - diff --git a/third_party/hadoop-0.20.0/contrib/hod/ivy.xml b/third_party/hadoop-0.20.0/contrib/hod/ivy.xml deleted file mode 100644 index e775663256..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/ivy.xml +++ /dev/null @@ -1,22 +0,0 @@ -<?xml version="1.0" ?> -<ivy-module version="1.0"> - <info organisation="org.apache.hadoop" module="${ant.project.name}"> - <license name="Apache 2.0"/> - <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/> - <description> - Apache Hadoop - </description> - </info> - <configurations defaultconfmapping="default"> - <!--these match the Maven configurations--> - <conf name="default" extends="master,runtime"/> - <conf name="master" description="contains the artifact but no dependencies"/> - <conf name="runtime" description="runtime but not the artifact" /> - <!--Private configurations. --> - - <conf name="common" visibility="private" - description="artifacts needed to compile/test the application"/> - </configurations> - <dependencies> - </dependencies> -</ivy-module> diff --git a/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties b/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties deleted file mode 100644 index a470b372ad..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties +++ /dev/null @@ -1,5 +0,0 @@ -#This properties file lists the versions of the various artifacts used by streaming. -#It drives ivy and the generation of a maven POM - -#Please list the dependencies name with version if they are different from the ones -#listed in the global libraries.properties file (in alphabetical order) diff --git a/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh b/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh deleted file mode 100644 index 61de9cddf9..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -COMMANDS=( "qstat" "qalter" "checkjob" ) -ERROR=0 -for (( i=0; i<${#COMMANDS[@]}; i++ )) -do - cmd=${COMMANDS[$i]} - CMD_PATH=`which $cmd 2>/dev/null` - if [ $? -ne 0 ] - then - echo Could not find $cmd in PATH - ERROR=1 - fi -done -if [ $ERROR -ne 0 ] -then - exit 1 -fi - -jobs=`qstat -i |grep -o -e '^[0-9]*'` -for job in $jobs -do - echo -en "$job\t" - PATTERN="job [^ ]* violates active HARD MAXPROC limit of \([0-9]*\) for user [^ ]*[ ]*(R: \([0-9]*\), U: \([0-9]*\))" - OUT=`checkjob $job 2>&1|grep -o -e "$PATTERN"` - if [ $? -eq 0 ] - then - echo -en "| Exceeds resource limits\t" - COMMENT_FIELD=`echo $OUT|sed -e "s/$PATTERN/User-limits exceeded. Requested:\2 Used:\3 MaxLimit:\1/"` - qstat -f $job|grep '^[ \t]*comment = .*$' >/dev/null - if [ $? -ne 0 ] - then - echo -en "| Comment field updated\t" - qalter $job -W comment="$COMMENT_FIELD" >/dev/null - else - echo -en "| Comment field already set\t" - fi - else - echo -en "| Doesn't exceed limits.\t" - fi - echo -done diff --git a/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py b/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py deleted file mode 100644 index c8fd4dbc02..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/bin/sh - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""":" -work_dir=$(dirname $0) -base_name=$(basename $0) -cd $work_dir - -if [ $HOD_PYTHON_HOME ]; then -exec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"} -elif [ -e /usr/bin/python ]; then -exec /usr/bin/python -OO -u $base_name ${1+"$@"} -elif [ -e /usr/local/bin/python ]; then -exec /usr/local/bin/python -OO -u $base_name ${1+"$@"} -else -exec python -OO -u $base_name ${1+"$@"} -fi -":""" - -from os import popen3 -import os, sys -import re -import time -from datetime import datetime -from optparse import OptionParser - -myName = os.path.basename(sys.argv[0]) -myName = re.sub(".*/", "", myName) - -reVersion = re.compile(".*(\d+_\d+).*") - -VERSION = '$HeadURL: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20/src/contrib/hod/support/logcondense.py $' - -reMatch = reVersion.match(VERSION) -if reMatch: - VERSION = reMatch.group(1) - VERSION = re.sub("_", ".", VERSION) -else: - VERSION = 'DEV' - -options = ( {'short' : "-p", - 'long' : "--package", - 'type' : "string", - 'action' : "store", - 'dest' : "package", - 'metavar' : " ", - 'default' : 'hadoop', - 'help' : "Bin file for hadoop"}, - - {'short' : "-d", - 'long' : "--days", - 'type' : "int", - 'action' : "store", - 'dest' : "days", - 'metavar' : " ", - 'default' : 7, - 'help' : "Number of days before logs are deleted"}, - - {'short' : "-c", - 'long' : "--config", - 'type' : "string", - 'action' : "store", - 'dest' : "config", - 'metavar' : " ", - 'default' : None, - 'help' : "config directory for hadoop"}, - - {'short' : "-l", - 'long' : "--logs", - 'type' : "string", - 'action' : "store", - 'dest' : "log", - 'metavar' : " ", - 'default' : "/user", - 'help' : "directory prefix under which logs are stored per user"}, - - {'short' : "-n", - 'long' : "--dynamicdfs", - 'type' : "string", - 'action' : "store", - 'dest' : "dynamicdfs", - 'metavar' : " ", - 'default' : "false", - 'help' : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"} - ) - -def getDfsCommand(options, args): - if (options.config == None): - cmd = options.package + " " + "dfs " + args - else: - cmd = options.package + " " + "--config " + options.config + " dfs " + args - return cmd - -def runcondense(): - import shutil - - options = process_args() - # if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs, - # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for - # deleting datanode logs - filteredNames = ['jobtracker'] - deletedNamePrefixes = ['*-tasktracker-*'] - if options.dynamicdfs == 'true': - filteredNames.append('namenode') - deletedNamePrefixes.append('*-datanode-*') - - filepath = '%s/\*/hod-logs/' % (options.log) - cmd = getDfsCommand(options, "-lsr " + filepath) - (stdin, stdout, stderr) = popen3(cmd) - lastjobid = 'none' - toPurge = { } - for line in stdout: - try: - m = re.match("^.*\s(.*)\n$", line) - filename = m.group(1) - # file name format: <prefix>/<user>/hod-logs/<jobid>/[0-9]*-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz - # first strip prefix: - if filename.startswith(options.log): - filename = filename.lstrip(options.log) - if not filename.startswith('/'): - filename = '/' + filename - else: - continue - - # Now get other details from filename. - k = re.match("/(.*)/hod-logs/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename) - if k: - username = k.group(1) - jobid = k.group(2) - datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5))) - datetimenow = datetime.utcnow() - diff = datetimenow - datetimefile - filedate = k.group(3) + k.group(4) + k.group(5) - newdate = datetimenow.strftime("%Y%m%d") - print "%s %s %s %d" % (filename, filedate, newdate, diff.days) - - # if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs. - foundFilteredName = False - for name in filteredNames: - if filename.find(name) >= 0: - foundFilteredName = True - break - - if foundFilteredName: - continue - - if (diff.days > options.days): - desttodel = filename - if not toPurge.has_key(jobid): - toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid - except Exception, e: - print >> sys.stderr, e - - for job in toPurge.keys(): - try: - for prefix in deletedNamePrefixes: - cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix) - print cmd - ret = 0 - ret = os.system(cmd) - if (ret != 0): - print >> sys.stderr, "Command failed to delete file " + cmd - except Exception, e: - print >> sys.stderr, e - - -def process_args(): - global options, myName, VERSION - - usage = "usage: %s <ARGS>" % (myName) - - version = "%s %s" % (myName, VERSION) - - argParser = OptionParser(usage=usage, version=VERSION) - - for option_element in options: - argParser.add_option(option_element['short'], option_element['long'], - type=option_element['type'], action=option_element['action'], - dest=option_element['dest'], default=option_element['default'], - metavar=option_element['metavar'], help=option_element['help']) - - (parsedOptions, args) = argParser.parse_args() - - if not os.path.exists(parsedOptions.package): - argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package) - if not os.path.exists(parsedOptions.config): - argParser.error("Could not find config: %s" % parsedOptions.config) - if parsedOptions.days <= 0: - argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config) - if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false': - argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs) - - return parsedOptions - - -if __name__ == '__main__': - runcondense() - diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py deleted file mode 100644 index 12c2f1e1da..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py b/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py deleted file mode 100644 index 5645d388b7..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py +++ /dev/null @@ -1,33 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import sys - -sampleText = "Hello World!" - -if __name__=="__main__": - args = sys.argv[1:] - if args[0] == "1": - # print sample text to stderr - sys.stdout.write(sampleText) - - elif args[0] == "2": - # print sample text to stderr - sys.stderr.write(sampleText) - - # Add any other helper programs here, with different values for args[0] - pass - diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py b/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py deleted file mode 100644 index 578d812cc0..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py +++ /dev/null @@ -1,113 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, re, sys - -class BaseTestSuite(): - def __init__(self, name, excludes): - self.name = name - self.excludes = excludes - pass - - def runTests(self): - # Create a runner - self.runner = unittest.TextTestRunner() - - # Get all the test-case classes - # From module import * - mod = __import__(self.name, fromlist=['*']) - modItemsList = dir(mod) - - allsuites = [] - - # Create all the test suites - for modItem in modItemsList: - if re.search(r"^test_", modItem): - # Yes this is a test class - if modItem not in self.excludes: - test_class = getattr(mod, modItem) - allsuites.append(unittest.makeSuite(test_class)) - - # Create a master suite to be run. - alltests = unittest.TestSuite(tuple(allsuites)) - - # Run the master test suite. - runner = self.runner.run(alltests) - if(runner.wasSuccessful()): return 0 - printLine( "%s test(s) failed." % runner.failures.__len__()) - printLine( "%s test(s) threw errors." % runner.errors.__len__()) - return runner.failures.__len__() + runner.errors.__len__() - - def cleanUp(self): - # suite tearDown - pass - -def printLine(str): - print >>sys.stderr, str - -def printSeparator(): - str = "" - for i in range(0,79): - str = str + "*" - print >>sys.stderr, "\n", str, "\n" - -# This class captures all log messages logged by hodRunner and other classes. -# It is then used to verify that certain log messages have come. This is one -# way to validate that messages printed to the logger are correctly written. -class MockLogger: - def __init__(self): - self.__logLines = {} - - def info(self, message): - self.__logLines[message] = 'info' - - def critical(self, message): - self.__logLines[message] = 'critical' - - def warn(self, message): - self.__logLines[message] = 'warn' - - def debug(self, message): - # don't track debug lines. - pass - - # verify a certain message has been logged at the defined level of severity. - def hasMessage(self, message, level): - if not self.__logLines.has_key(message): - return False - return self.__logLines[message] == level - -# Stub class to test cluster manipulation operations. -class MockHadoopCluster: - - def __init__(self): - # store the operations received. - self.__operations = {} - - def delete_job(self, jobid): - self.__operations['delete_job'] = [jobid] - - def is_cluster_deallocated(self, dummy): - return False - - def wasOperationPerformed(self, operation, args): - if self.__operations.has_key(operation): - actualArgs = self.__operations[operation] - for arg in actualArgs: - if arg not in args: - break - else: - return True - return False diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/main.py b/third_party/hadoop-0.20.0/contrib/hod/testing/main.py deleted file mode 100644 index ec4d4fdd01..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/main.py +++ /dev/null @@ -1,83 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re - -myPath = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myPath) -testingDir = os.path.join(rootDirectory, "testing") - -sys.path.append(rootDirectory) - -from testing.lib import printSeparator, printLine - -moduleList = [] -allList = [] -excludes = [ - ] - -# Build a module list by scanning through all files in testingDir -for file in os.listdir(testingDir): - if(re.search(r".py$", file) and re.search(r"^test", file)): - # All .py files with names starting in 'test' - module = re.sub(r"^test","",file) - module = re.sub(r".py$","",module) - allList.append(module) - if module not in excludes: - moduleList.append(module) - -printLine("All testcases - %s" % allList) -printLine("Excluding the testcases - %s" % excludes) -printLine("Executing the testcases - %s" % moduleList) - -testsResult = 0 -# Now import each of these modules and start calling the corresponding -#testSuite methods -for moduleBaseName in moduleList: - try: - module = "testing.test" + moduleBaseName - suiteCaller = "Run" + moduleBaseName + "Tests" - printSeparator() - printLine("Running %s" % suiteCaller) - - # Import the corresponding test cases module - imported_module = __import__(module , fromlist=[suiteCaller] ) - - # Call the corresponding suite method now - testRes = getattr(imported_module, suiteCaller)() - testsResult = testsResult + testRes - printLine("Finished %s. TestSuite Result : %s\n" % \ - (suiteCaller, testRes)) - except ImportError, i: - # Failed to import a test module - printLine(i) - testsResult = testsResult + 1 - pass - except AttributeError, n: - # Failed to get suiteCaller from a test module - printLine(n) - testsResult = testsResult + 1 - pass - except Exception, e: - # Test module suiteCaller threw some exception - printLine("%s failed. \nReason : %s" % (suiteCaller, e)) - printLine("Skipping %s" % suiteCaller) - testsResult = testsResult + 1 - pass - -if testsResult != 0: - printSeparator() - printLine("Total testcases with failure or error : %s" % testsResult) -sys.exit(testsResult) diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py deleted file mode 100644 index b15f6803dd..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py +++ /dev/null @@ -1,123 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -excludes = [] - -import tempfile, getpass -from xml.dom import minidom - -from hodlib.Hod.hadoop import hadoopConfig - -# All test-case classes should have the naming convention test_.* -class test_hadoopConfig(unittest.TestCase): - def setUp(self): - self.__hadoopConfig = hadoopConfig() - self.rootDir = '/tmp/hod-%s' % getpass.getuser() - if not os.path.exists(self.rootDir): - os.mkdir(self.rootDir) - self.testingDir = tempfile.mkdtemp( dir=self.rootDir, - prefix='HadoopTestSuite.test_hadoopConfig') - self.confDir = tempfile.mkdtemp(dir=self.rootDir, - prefix='HadoopTestSuite.test_hadoopConfig') - self.tempDir = '/tmp/hod-%s/something' % getpass.getuser() - self.hadoopSite = os.path.join(self.confDir,'hadoop-site.xml') - self.numNodes = 4 - self.hdfsAddr = 'nosuchhost1.apache.org:50505' - self.mapredAddr = 'nosuchhost2.apache.org:50506' - self.finalServerParams = { - 'mapred.child.java.opts' : '-Xmx1024m', - 'mapred.compress.map.output' : 'false', - } - self.serverParams = { - 'mapred.userlog.limit' : '200', - 'mapred.userlog.retain.hours' : '10', - 'mapred.reduce.parallel.copies' : '20', - } - self.clientParams = { - 'mapred.tasktracker.tasks.maximum' : '2', - 'io.sort.factor' : '100', - 'io.sort.mb' : '200', - 'mapred.userlog.limit.kb' : '1024', - 'io.file.buffer.size' : '262144', - } - self.clusterFactor = 1.9 - self.mySysDir = '/user/' + getpass.getuser() + '/mapredsystem' - pass - - def testSuccess(self): - self.__hadoopConfig.gen_site_conf( - confDir = self.confDir,\ - tempDir = self.tempDir,\ - numNodes = self.numNodes,\ - hdfsAddr = self.hdfsAddr,\ - mrSysDir = self.mySysDir,\ - mapredAddr = self.mapredAddr,\ - clientParams = self.clientParams,\ - serverParams = self.serverParams,\ - finalServerParams = self.finalServerParams,\ - clusterFactor = self.clusterFactor - - ) - xmldoc = minidom.parse(self.hadoopSite) - xmldoc = xmldoc.childNodes[0] # leave out xml spec - properties = xmldoc.childNodes # children of tag configuration - keyvals = {} - for prop in properties: - if not isinstance(prop,minidom.Comment): - # ---------- tag -------------------- -value elem-- data -- - name = prop.getElementsByTagName('name')[0].childNodes[0].data - value = prop.getElementsByTagName('value')[0].childNodes[0].data - keyvals[name] = value - - # fs.default.name should start with hdfs:// - assert(keyvals['fs.default.name'].startswith('hdfs://')) - assert(keyvals['hadoop.tmp.dir'] == self.tempDir) - - # TODO other tests - pass - - def tearDown(self): - if os.path.exists(self.hadoopSite): os.unlink(self.hadoopSite) - if os.path.exists(self.confDir) : os.rmdir(self.confDir) - if os.path.exists(self.testingDir) : os.rmdir(self.testingDir) - pass - -class HadoopTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunHadoopTests(): - suite = HadoopTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunHadoopTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py deleted file mode 100644 index 350cccb6e3..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py +++ /dev/null @@ -1,310 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, getpass, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -import tempfile -from testing.lib import BaseTestSuite, MockLogger, MockHadoopCluster -from hodlib.Hod.hod import hodRunner, hodState -from hodlib.Common.desc import NodePoolDesc - -excludes = [] - -# Information about all clusters is written to a file called clusters.state. -from hodlib.Hod.hod import CLUSTER_DATA_FILE as TEST_CLUSTER_DATA_FILE, \ - INVALID_STATE_FILE_MSGS - -# Temp directory prefix -TMP_DIR_PREFIX=os.path.join('/tmp', 'hod-%s' % (getpass.getuser())) - -# build a config object with all required keys for initializing hod. -def setupConf(): - cfg = { - 'hod' : { - 'original-dir' : os.getcwd(), - 'stream' : True, - # store all the info about clusters in this directory - 'user_state' : '/tmp/hodtest', - 'debug' : 3, - 'java-home' : os.getenv('JAVA_HOME'), - 'cluster' : 'dummy', - 'cluster-factor' : 1.8, - 'xrs-port-range' : (32768,65536), - 'allocate-wait-time' : 3600, - 'temp-dir' : '/tmp/hod' - }, - # just set everything to dummy. Need something to initialize the - # node pool description object. - 'resource_manager' : { - 'id' : 'dummy', - 'batch-home' : 'dummy', - 'queue' : 'dummy', - } - } - cfg['nodepooldesc'] = NodePoolDesc(cfg['resource_manager']) - return cfg - -# Test class that defines methods to test invalid arguments to hod operations. -class test_InvalidArgsOperations(unittest.TestCase): - def setUp(self): - - self.cfg = setupConf() - # initialize the mock objects - self.log = MockLogger() - self.cluster = MockHadoopCluster() - - # Use the test logger. This will be used for test verification. - self.client = hodRunner(self.cfg, log=self.log, cluster=self.cluster) - # Create the hodState object to set the test state you want. - self.state = hodState(self.cfg['hod']['user_state']) - if not os.path.exists(self.cfg['hod']['user_state']): - os.path.mkdir(self.cfg['hod']['user_state']) - p = os.path.join(self.cfg['hod']['user_state'], '%s.state' % TEST_CLUSTER_DATA_FILE) - # ensure cluster data file exists, so write works in the tests. - f = open(p, 'w') - f.close() - - def tearDown(self): - # clean up cluster data file and directory - p = os.path.join(self.cfg['hod']['user_state'], '%s.state' % TEST_CLUSTER_DATA_FILE) - os.remove(p) - os.rmdir(self.cfg['hod']['user_state']) - - # Test that list works with deleted cluster directories - more than one entries which are invalid. - def testListInvalidDirectory(self): - userState = { os.path.join(TMP_DIR_PREFIX, 'testListInvalidDirectory1') : '123.dummy.id1', - os.path.join(TMP_DIR_PREFIX, 'testListInvalidDirectory2') : '123.dummy.id2' } - self.__setupClusterState(userState) - self.client._op_list(['list']) - # assert that required errors are logged. - for clusterDir in userState.keys(): - self.assertTrue(self.log.hasMessage('cluster state unknown\t%s\t%s' \ - % (userState[clusterDir], clusterDir), 'info')) - - # simulate a test where a directory is deleted, and created again, without deallocation - clusterDir = os.path.join(TMP_DIR_PREFIX, 'testListEmptyDirectory') - os.makedirs(clusterDir) - self.assertTrue(os.path.isdir(clusterDir)) - userState = { clusterDir : '123.dummy.id3' } - self.__setupClusterState(userState, False) - self.client._op_list(['list']) - self.assertTrue(self.log.hasMessage('cluster state unknown\t%s\t%s' \ - % (userState[clusterDir], clusterDir), 'info')) - os.rmdir(clusterDir) - - # Test that info works with a deleted cluster directory - def testInfoInvalidDirectory(self): - clusterDir = os.path.join(TMP_DIR_PREFIX, 'testInfoInvalidDirectory') - userState = { clusterDir : '456.dummy.id' } - self.__setupClusterState(userState) - self.client._op_info(['info', clusterDir]) - self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical')) - - # simulate a test where a directory is deleted, and created again, without deallocation - clusterDir = os.path.join(TMP_DIR_PREFIX, 'testInfoEmptyDirectory') - os.makedirs(clusterDir) - self.assertTrue(os.path.isdir(clusterDir)) - userState = { clusterDir : '456.dummy.id1' } - self.__setupClusterState(userState, False) - self.client._op_info(['info', clusterDir]) - self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical')) - os.rmdir(clusterDir) - - # Test info works with an invalid cluster directory - def testInfoNonExistentDirectory(self): - clusterDir = '/tmp/hod/testInfoNonExistentDirectory' - self.client._op_info(['info', clusterDir]) - self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical')) - - # Test that deallocation works on a deleted cluster directory - # by clearing the job, and removing the state - def testDeallocateInvalidDirectory(self): - clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateInvalidDirectory') - jobid = '789.dummy.id' - userState = { clusterDir : jobid } - self.__setupClusterState(userState) - self.client._op_deallocate(['deallocate', clusterDir]) - # verify job was deleted - self.assertTrue(self.cluster.wasOperationPerformed('delete_job', jobid)) - # verify appropriate message was logged. - self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical')) - self.assertTrue(self.log.hasMessage("Freeing resources allocated to the cluster.", 'critical')) - # verify that the state information was cleared. - userState = self.state.read(TEST_CLUSTER_DATA_FILE) - self.assertFalse(clusterDir in userState.keys()) - - # simulate a test where a directory is deleted, and created again, without deallocation - clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateEmptyDirectory') - os.makedirs(clusterDir) - self.assertTrue(os.path.isdir(clusterDir)) - jobid = '789.dummy.id1' - userState = { clusterDir : jobid } - self.__setupClusterState(userState, False) - self.client._op_deallocate(['deallocate', clusterDir]) - # verify job was deleted - self.assertTrue(self.cluster.wasOperationPerformed('delete_job', jobid)) - # verify appropriate message was logged. - self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical')) - self.assertTrue(self.log.hasMessage("Freeing resources allocated to the cluster.", 'critical')) - # verify that the state information was cleared. - userState = self.state.read(TEST_CLUSTER_DATA_FILE) - self.assertFalse(clusterDir in userState.keys()) - os.rmdir(clusterDir) - - # Test that deallocation works on a nonexistent directory. - def testDeallocateNonExistentDirectory(self): - clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateNonExistentDirectory') - self.client._op_deallocate(['deallocate', clusterDir]) - # there should be no call.. - self.assertFalse(self.cluster.wasOperationPerformed('delete_job', None)) - self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical')) - - # Test that allocation on an previously deleted directory fails. - def testAllocateOnDeletedDirectory(self): - clusterDir = os.path.join(TMP_DIR_PREFIX, 'testAllocateOnDeletedDirectory') - os.makedirs(clusterDir) - self.assertTrue(os.path.isdir(clusterDir)) - jobid = '1234.abc.com' - userState = { clusterDir : jobid } - self.__setupClusterState(userState, False) - self.client._op_allocate(['allocate', clusterDir, '3']) - self.assertTrue(self.log.hasMessage("Found a previously allocated cluster at "\ - "cluster directory '%s'. HOD cannot determine if this cluster "\ - "can be automatically deallocated. Deallocate the cluster if it "\ - "is unused." % (clusterDir), 'critical')) - os.rmdir(clusterDir) - - def __setupClusterState(self, clusterStateMap, verifyDirIsAbsent=True): - for clusterDir in clusterStateMap.keys(): - # ensure directory doesn't exist, just in case. - if verifyDirIsAbsent: - self.assertFalse(os.path.exists(clusterDir)) - # set up required state. - self.state.write(TEST_CLUSTER_DATA_FILE, clusterStateMap) - # verify everything is stored correctly. - state = self.state.read(TEST_CLUSTER_DATA_FILE) - for clusterDir in clusterStateMap.keys(): - self.assertTrue(clusterDir in state.keys()) - self.assertEquals(clusterStateMap[clusterDir], state[clusterDir]) - -class test_InvalidHodStateFiles(unittest.TestCase): - def setUp(self): - self.rootDir = '/tmp/hod-%s' % getpass.getuser() - self.cfg = setupConf() # creat a conf - # Modify hod.user_state - self.cfg['hod']['user_state'] = tempfile.mkdtemp(dir=self.rootDir, - prefix='HodTestSuite.test_InvalidHodStateFiles_') - self.log = MockLogger() # mock logger - self.cluster = MockHadoopCluster() # mock hadoop cluster - self.client = hodRunner(self.cfg, log=self.log, cluster=self.cluster) - self.state = hodState(self.cfg['hod']['user_state']) - self.statePath = os.path.join(self.cfg['hod']['user_state'], '%s.state' % \ - TEST_CLUSTER_DATA_FILE) - self.clusterDir = tempfile.mkdtemp(dir=self.rootDir, - prefix='HodTestSuite.test_InvalidHodStateFiles_') - - def testOperationWithInvalidStateFile(self): - jobid = '1234.hadoop.apache.org' - # create user state file with invalid permissions - stateFile = open(self.statePath, "w") - os.chmod(self.statePath, 000) # has no read/write permissions - self.client._hodRunner__cfg['hod']['operation'] = \ - "info %s" % self.clusterDir - ret = self.client.operation() - os.chmod(self.statePath, 700) # restore permissions - stateFile.close() - os.remove(self.statePath) - - # print self.log._MockLogger__logLines - self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \ - os.path.realpath(self.statePath), 'critical')) - self.assertEquals(ret, 1) - - def testAllocateWithInvalidStateFile(self): - jobid = '1234.hadoop.apache.org' - # create user state file with invalid permissions - stateFile = open(self.statePath, "w") - os.chmod(self.statePath, 0400) # has no write permissions - self.client._hodRunner__cfg['hod']['operation'] = \ - "allocate %s %s" % (self.clusterDir, '3') - ret = self.client.operation() - os.chmod(self.statePath, 700) # restore permissions - stateFile.close() - os.remove(self.statePath) - - # print self.log._MockLogger__logLines - self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[2] % \ - os.path.realpath(self.statePath), 'critical')) - self.assertEquals(ret, 1) - - def testAllocateWithInvalidStateStore(self): - jobid = '1234.hadoop.apache.org' - self.client._hodRunner__cfg['hod']['operation'] = \ - "allocate %s %s" % (self.clusterDir, 3) - - ###### check with no executable permissions ###### - stateFile = open(self.statePath, "w") # create user state file - os.chmod(self.cfg['hod']['user_state'], 0600) - ret = self.client.operation() - os.chmod(self.cfg['hod']['user_state'], 0700) # restore permissions - stateFile.close() - os.remove(self.statePath) - # print self.log._MockLogger__logLines - self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \ - os.path.realpath(self.statePath), 'critical')) - self.assertEquals(ret, 1) - - ###### check with no write permissions ###### - stateFile = open(self.statePath, "w") # create user state file - os.chmod(self.cfg['hod']['user_state'], 0500) - ret = self.client.operation() - os.chmod(self.cfg['hod']['user_state'], 0700) # restore permissions - stateFile.close() - os.remove(self.statePath) - # print self.log._MockLogger__logLines - self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \ - os.path.realpath(self.statePath), 'critical')) - self.assertEquals(ret, 1) - - def tearDown(self): - if os.path.exists(self.clusterDir): os.rmdir(self.clusterDir) - if os.path.exists(self.cfg['hod']['user_state']): - os.rmdir(self.cfg['hod']['user_state']) - - -class HodTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunHodTests(): - # modulename_suite - suite = HodTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunHodTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py deleted file mode 100644 index 93e18333fd..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py +++ /dev/null @@ -1,113 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite -from hodlib.HodRing.hodRing import MRSystemDirectoryManager, createMRSystemDirectoryManager -from hodlib.Common.threads import simpleCommand - -excludes = [] - -# duplicating temporarily until HADOOP-2848 is committed. -class MyMockLogger: - def __init__(self): - self.__logLines = {} - - def info(self, message): - self.__logLines[message] = 'info' - - def critical(self, message): - self.__logLines[message] = 'critical' - - def warn(self, message): - self.__logLines[message] = 'warn' - - def debug(self, message): - # don't track debug lines. - pass - - # verify a certain message has been logged at the defined level of severity. - def hasMessage(self, message, level): - if not self.__logLines.has_key(message): - return False - return self.__logLines[message] == level - -class test_MRSystemDirectoryManager(unittest.TestCase): - - def setUp(self): - self.log = MyMockLogger() - - def testCleanupArgsString(self): - sysDirMgr = MRSystemDirectoryManager(1234, '/user/hod/mapredsystem/hoduser.123.abc.com', \ - 'def.com:5678', '/usr/bin/hadoop', self.log) - str = sysDirMgr.toCleanupArgs() - self.assertTrue(" --jt-pid 1234 --mr-sys-dir /user/hod/mapredsystem/hoduser.123.abc.com --fs-name def.com:5678 --hadoop-path /usr/bin/hadoop ", str) - - def testCreateMRSysDirInvalidParams(self): - # test that no mr system directory manager is created if required keys are not present - # this case will test scenarios of non jobtracker daemons. - keys = [ 'jt-pid', 'mr-sys-dir', 'fs-name', 'hadoop-path' ] - map = { 'jt-pid' : 1234, - 'mr-sys-dir' : '/user/hod/mapredsystem/hoduser.def.com', - 'fs-name' : 'ghi.com:1234', - 'hadoop-path' : '/usr/bin/hadoop' - } - for key in keys: - val = map[key] - map[key] = None - self.assertEquals(createMRSystemDirectoryManager(map, self.log), None) - map[key] = val - - def testUnresponsiveJobTracker(self): - # simulate an unresponsive job tracker, by giving a command that runs longer than the retries - # verify that the program returns with the right error message. - sc = simpleCommand("sleep", "sleep 300") - sc.start() - pid = sc.getPid() - while pid is None: - pid = sc.getPid() - sysDirMgr = MRSystemDirectoryManager(pid, '/user/yhemanth/mapredsystem/hoduser.123.abc.com', \ - 'def.com:5678', '/usr/bin/hadoop', self.log, retries=3) - sysDirMgr.removeMRSystemDirectory() - self.log.hasMessage("Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory", 'warn') - sc.kill() - sc.wait() - sc.join() - -class HodCleanupTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunHodCleanupTests(): - # modulename_suite - suite = HodCleanupTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunHodCleanupTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py deleted file mode 100644 index 609c19908a..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py +++ /dev/null @@ -1,117 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -excludes = [] - -import tempfile, getpass, logging -from xml.dom import minidom - -from hodlib.Hod.hadoop import hadoopConfig -from hodlib.HodRing.hodRing import CommandDesc, HadoopCommand - -# All test-case classes should have the naming convention test_.* -class test_HadoopCommand(unittest.TestCase): - def setUp(self): - self.rootDir = '/tmp/hod-%s' % getpass.getuser() - self.id = 0 - self.desc = None - self.tempDir = os.path.join(self.rootDir,'test_HadoopCommand_tempDir') - self.pkgDir = os.path.join(self.rootDir,'test_HadoopCommand_pkgDir') - self.log = logging.getLogger() # TODO Use MockLogger - self.javaHome = '/usr/java/bin/' - self.mrSysDir = '/user/' + getpass.getuser() + '/mapredsystem' - - self.attrs = {} - self.finalAttrs = { - 'fs.default.name': 'nohost.apache.com:56366', - 'mapred.child.java.opts' : '-Xmx1024m', - 'mapred.compress.map.output' : 'false', - } - self.attrs = { - 'mapred.userlog.limit' : '200', - 'mapred.userlog.retain.hours' : '10', - 'mapred.reduce.parallel.copies' : '20', - } - self.desc = CommandDesc( - { - 'name' : 'dummyHadoop', - 'program' : 'bin/hadoop', - 'pkgdirs' : self.pkgDir, - 'final-attrs' : self.finalAttrs, - 'attrs' : self.attrs, - }, self.log - ) - # TODO - # 4th arg to HadoopCommand 'tardir' is not used at all. Instead pkgdir is - # specified through HadoopCommand.run(pkgdir). This could be changed so - # that pkgdir is specified at the time of object creation. - # END OF TODO - self.hadoopCommand = HadoopCommand(self.id, self.desc, self.tempDir, - self.pkgDir, self.log, self.javaHome, - self.mrSysDir, restart=True) - self.hadoopSite = os.path.join( self.hadoopCommand.confdir, - 'hadoop-site.xml') - pass - - def test_createHadoopSiteXml(self): - self.hadoopCommand._createHadoopSiteXml() - xmldoc = minidom.parse(self.hadoopSite) - xmldoc = xmldoc.childNodes[0] # leave out xml spec - properties = xmldoc.childNodes # children of tag configuration - keyvals = {} - for prop in properties: - if not isinstance(prop,minidom.Comment): - # ---------- tag -------------------- -value elem-- data -- - name = prop.getElementsByTagName('name')[0].childNodes[0].data - value = prop.getElementsByTagName('value')[0].childNodes[0].data - keyvals[name] = value - - # fs.default.name should start with hdfs:// - assert(keyvals['fs.default.name'].startswith('hdfs://')) - - # TODO other tests - pass - - def tearDown(self): - pass - -class HodRingTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunHodRingTests(): - # modulename_suite - suite = HodRingTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunHodRingTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py deleted file mode 100644 index a09fd04709..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py +++ /dev/null @@ -1,88 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -excludes = ['test_MINITEST3'] - -# All test-case classes should have the naming convention test_.* -class test_MINITEST1(unittest.TestCase): - def setUp(self): - pass - - # All testMethods have to have their names start with 'test' - def testSuccess(self): - pass - - def testFailure(self): - pass - - def tearDown(self): - pass - -class test_MINITEST2(unittest.TestCase): - def setUp(self): - pass - - # All testMethods have to have their names start with 'test' - def testSuccess(self): - pass - - def testFailure(self): - pass - - def tearDown(self): - pass - -class test_MINITEST3(unittest.TestCase): - def setUp(self): - pass - - # All testMethods have to have their names start with 'test' - def testSuccess(self): - pass - - def testFailure(self): - pass - - def tearDown(self): - pass - -class ModuleTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunModuleTests(): - # modulename_suite - suite = ModuleTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunModuleTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py deleted file mode 100644 index 5a02e066b0..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py +++ /dev/null @@ -1,171 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -import logging - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -excludes = ['test_MINITEST1', 'test_MINITEST2'] - -from hodlib.GridServices import * -from hodlib.Common.desc import ServiceDesc -from hodlib.RingMaster.ringMaster import _LogMasterSources - -configuration = { - 'hod': {}, - 'resource_manager': { - 'id': 'torque', - 'batch-home': '/home/y/' - }, - 'ringmaster': { - 'max-connect' : 2, - 'max-master-failures' : 5 - }, - 'hodring': { - }, - 'gridservice-mapred': { - 'id': 'mapred' - } , - 'gridservice-hdfs': { - 'id': 'hdfs' - }, - 'servicedesc' : {} , - 'nodepooldesc': {} , - } - -# All test-case classes should have the naming convention test_.* -class test_MINITEST1(unittest.TestCase): - def setUp(self): - pass - - # All testMethods have to have their names start with 'test' - def testSuccess(self): - pass - - def testFailure(self): - pass - - def tearDown(self): - pass - -class test_Multiple_Workers(unittest.TestCase): - def setUp(self): - self.config = configuration - self.config['ringmaster']['workers_per_ring'] = 2 - - hdfsDesc = self.config['servicedesc']['hdfs'] = ServiceDesc(self.config['gridservice-hdfs']) - mrDesc = self.config['servicedesc']['mapred'] = ServiceDesc(self.config['gridservice-mapred']) - - self.hdfs = Hdfs(hdfsDesc, [], 0, 19, workers_per_ring = \ - self.config['ringmaster']['workers_per_ring']) - self.mr = MapReduce(mrDesc, [],1, 19, workers_per_ring = \ - self.config['ringmaster']['workers_per_ring']) - - self.log = logging.getLogger() - pass - - # All testMethods have to have their names start with 'test' - def testWorkersCount(self): - self.serviceDict = {} - self.serviceDict[self.hdfs.getName()] = self.hdfs - self.serviceDict[self.mr.getName()] = self.mr - self.rpcSet = _LogMasterSources(self.serviceDict, self.config, None, self.log, None) - - cmdList = self.rpcSet.getCommand('host1') - self.assertEquals(len(cmdList), 2) - self.assertEquals(cmdList[0].dict['argv'][0], 'namenode') - self.assertEquals(cmdList[1].dict['argv'][0], 'namenode') - addParams = ['fs.default.name=host1:51234', 'dfs.http.address=host1:5125' ] - self.rpcSet.addMasterParams('host1', addParams) - # print "NN is launched" - - cmdList = self.rpcSet.getCommand('host2') - self.assertEquals(len(cmdList), 1) - self.assertEquals(cmdList[0].dict['argv'][0], 'jobtracker') - addParams = ['mapred.job.tracker=host2:51236', - 'mapred.job.tracker.http.address=host2:51237'] - self.rpcSet.addMasterParams('host2', addParams) - # print "JT is launched" - - cmdList = self.rpcSet.getCommand('host3') - # Verify the workers count per ring : TTs + DNs - self.assertEquals(len(cmdList), - self.config['ringmaster']['workers_per_ring'] * 2) - pass - - def testFailure(self): - pass - - def tearDown(self): - pass - -class test_GetCommand(unittest.TestCase): - def setUp(self): - self.config = configuration - - hdfsDesc = self.config['servicedesc']['hdfs'] = ServiceDesc(self.config['gridservice-hdfs']) - mrDesc = self.config['servicedesc']['mapred'] = ServiceDesc(self.config['gridservice-mapred']) - - # API : serviceObj = service(desc, workDirs, reqNodes, version) - self.hdfs = Hdfs(hdfsDesc, [], 0, 17) - self.hdfsExternal = HdfsExternal(hdfsDesc, [], 17) - self.mr = MapReduce(mrDesc, [],1, 17) - self.mrExternal = MapReduceExternal(mrDesc, [], 17) - - self.log = logging.getLogger() - pass - - # All testMethods have to have their names start with 'test' - def testBothInternal(self): - self.serviceDict = {} - self.serviceDict[self.hdfs.getName()] = self.hdfs - self.serviceDict[self.mr.getName()] = self.mr - self.rpcSet = _LogMasterSources(self.serviceDict, self.config, None, self.log, None) - - cmdList = self.rpcSet.getCommand('localhost') - self.assertEquals(cmdList.__len__(), 2) - self.assertEquals(cmdList[0].dict['argv'][0], 'namenode') - self.assertEquals(cmdList[1].dict['argv'][0], 'namenode') - pass - - def tearDown(self): - pass - -class RingmasterRPCsTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunRingmasterRPCsTests(): - # modulename_suite - suite = RingmasterRPCsTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunRingmasterRPCsTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py deleted file mode 100644 index 22753cfe90..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py +++ /dev/null @@ -1,99 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -# module specific imports -import os, tempfile, random - -excludes = [] - -import getpass -from hodlib.Common.threads import simpleCommand -from testing.helper import sampleText - -# All test-case classes should have the naming convention test_.* -class test_SimpleCommand(unittest.TestCase): - def setUp(self): - self.rootDir = '/tmp/hod-%s' % getpass.getuser() - if not os.path.exists(self.rootDir): - os.mkdir(self.rootDir) - self.prefix= 'ThreadsTestSuite.test_SimpleCommand' - self.testFile = None - pass - - def testRedirectedStdout(self): - self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \ - prefix=self.prefix) - cmd=simpleCommand('helper','%s %s 1 1>%s' % \ - (sys.executable, \ - os.path.join(rootDirectory, "testing", "helper.py"), \ - self.testFile.name)) - - cmd.start() - cmd.join() - - self.testFile.seek(0) - stdout = self.testFile.read() - # print stdout, sampleText - assert(stdout == sampleText) - pass - - def testRedirectedStderr(self): - self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \ - prefix=self.prefix) - cmd=simpleCommand('helper','%s %s 2 2>%s' % \ - (sys.executable, \ - os.path.join(rootDirectory, "testing", "helper.py"), \ - self.testFile.name)) - cmd.start() - cmd.join() - - self.testFile.seek(0) - stderror = self.testFile.read() - # print stderror, sampleText - assert(stderror == sampleText) - pass - - def tearDown(self): - if self.testFile: self.testFile.close() - pass - -class ThreadsTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunThreadsTests(): - # modulename_suite - suite = ThreadsTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunThreadsTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py deleted file mode 100644 index 7e23dca3be..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py +++ /dev/null @@ -1,180 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite - -excludes = [''] - -import tempfile, shutil, getpass, random -from hodlib.Common.types import typeValidator - -# All test-case classes should have the naming convention test_.* -class test_typeValidator(unittest.TestCase): - def setUp(self): - self.originalDir = os.getcwd() - self.validator = typeValidator(self.originalDir) - self.tempDir = tempfile.mkdtemp(dir='/tmp/hod-%s' % getpass.getuser(), - prefix='test_Types_typeValidator_tempDir') - self.tempFile = tempfile.NamedTemporaryFile(dir=self.tempDir) - - # verification : error strings - self.errorStringsForVerify = { - 'pos_int' : 0, - 'uri' : '%s is an invalid uri', - 'directory' : 0, - 'file' : 0, - } - - # verification : valid vals - self.verifyValidVals = [ - ('pos_int', 0), - ('pos_int', 1), - ('directory', self.tempDir), - ('directory', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir)), - ('file', self.tempFile.name), - ('file', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempFile.name)), - ('uri', 'file://localhost/' + self.tempDir), - ('uri', 'file:///' + self.tempDir), - ('uri', 'file:///tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir)), - ('uri', 'file://localhost/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir)), - ('uri', 'http://hadoop.apache.org/core/'), - ('uri', self.tempDir), - ('uri', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir)), - ] - - # generate an invalid uri - randomNum = random.random() - while os.path.exists('/%s' % randomNum): - # Just to be sure :) - randomNum = random.random() - invalidUri = 'file://localhost/%s' % randomNum - - # verification : invalid vals - self.verifyInvalidVals = [ - ('pos_int', -1), - ('uri', invalidUri), - ('directory', self.tempFile.name), - ('file', self.tempDir), - ] - - # normalization : vals - self.normalizeVals = [ - ('pos_int', 1, 1), - ('pos_int', '1', 1), - ('directory', self.tempDir, self.tempDir), - ('directory', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir), - self.tempDir), - ('file', self.tempFile.name, self.tempFile.name), - ('file', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempFile.name), - self.tempFile.name), - ('uri', 'file://localhost' + self.tempDir, - 'file://' + self.tempDir), - ('uri', 'file://127.0.0.1' + self.tempDir, - 'file://' + self.tempDir), - ('uri', 'http://hadoop.apache.org/core', - 'http://hadoop.apache.org/core'), - ('uri', self.tempDir, self.tempDir), - ('uri', '/tmp/hod-%s/../../%s' % \ - (getpass.getuser(), self.tempDir), - self.tempDir), - ] - pass - - # All testMethods have to have their names start with 'test' - def testnormalize(self): - for (type, originalVal, normalizedVal) in self.normalizeVals: - # print type, originalVal, normalizedVal,\ - # self.validator.normalize(type, originalVal) - assert(self.validator.normalize(type, originalVal) == normalizedVal) - pass - - def test__normalize(self): - # Special test for functionality of private method __normalizedPath - tmpdir = tempfile.mkdtemp(dir=self.originalDir) #create in self.originalDir - oldWd = os.getcwd() - os.chdir('/') - tmpdirName = re.sub(".*/","",tmpdir) - # print re.sub(".*/","",tmpdirName) - # print os.path.join(self.originalDir,tmpdir) - (type, originalVal, normalizedVal) = \ - ('file', tmpdirName, \ - os.path.join(self.originalDir,tmpdirName)) - assert(self.validator.normalize(type, originalVal) == normalizedVal) - os.chdir(oldWd) - os.rmdir(tmpdir) - pass - - def testverify(self): - # test verify method - - # test valid vals - for (type,value) in self.verifyValidVals: - valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 } - valueInfo = self.validator.verify(type,value) - # print type, value, valueInfo - assert(valueInfo['isValid'] == 1) - - # test invalid vals - for (type,value) in self.verifyInvalidVals: - valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 } - valueInfo = self.validator.verify(type,value) - # print type, value, valueInfo - assert(valueInfo['isValid'] == 0) - if valueInfo['errorData'] != 0: - # if there is any errorData, check - assert(valueInfo['errorData'] == \ - self.errorStringsForVerify[type] % value) - - pass - - def tearDown(self): - self.tempFile.close() - if os.path.exists(self.tempDir): - shutil.rmtree(self.tempDir) - pass - -class TypesTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunTypesTests(): - # modulename_suite - suite = TypesTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunTypesTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py deleted file mode 100644 index 62003c99df..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py +++ /dev/null @@ -1,62 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from testing.lib import BaseTestSuite -from hodlib.Common.util import * -from hodlib.Common.threads import simpleCommand - -excludes = [] - -class test_Util(unittest.TestCase): - - def testProcessStatus(self): - sc = simpleCommand('testsleep', 'sleep 60') - sc.start() - pid = sc.getPid() - while pid is None: - pid = sc.getPid() - self.assertTrue(isProcessRunning(pid)) - sc.kill() - sc.wait() - sc.join() - self.assertFalse(isProcessRunning(pid)) - - -class UtilTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - pass - - def cleanUp(self): - # suite tearDown - pass - -def RunUtilTests(): - # modulename_suite - suite = UtilTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunUtilTests() diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py deleted file mode 100644 index f630032dfa..0000000000 --- a/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py +++ /dev/null @@ -1,109 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import unittest, os, sys, re, threading, time - -myDirectory = os.path.realpath(sys.argv[0]) -rootDirectory = re.sub("/testing/.*", "", myDirectory) - -sys.path.append(rootDirectory) - -from hodlib.Common.xmlrpc import hodXRClient -from hodlib.Common.socketServers import hodXMLRPCServer -from hodlib.GridServices.service import ServiceUtil -from hodlib.Common.util import hodInterrupt, HodInterruptException - -from testing.lib import BaseTestSuite - -excludes = [] - -global serverPort -serverPort = None - -class test_HodXRClient(unittest.TestCase): - def setUp(self): - pass - - # All testMethods have to have their names start with 'test' - def testSuccess(self): - global serverPort - client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False) - self.assertEqual(client.testing(), True) - pass - - def testFailure(self): - """HOD should raise Exception when unregistered rpc is called""" - global serverPort - client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False) - self.assertRaises(Exception, client.noMethod) - pass - - def testTimeout(self): - """HOD should raise Exception when rpc call times out""" - # Give client some random nonexistent url - serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000) - client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False) - self.assertRaises(Exception, client.testing) - pass - - def testInterrupt(self): - """ HOD should raise HodInterruptException when interrupted""" - - def interrupt(testClass): - testClass.assertRaises(HodInterruptException, client.testing) - - serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000) - client = hodXRClient('http://localhost:' + str(serverPort)) - myThread = threading.Thread(name='testinterrupt', target=interrupt,args=(self,)) - # Set the global interrupt - hodInterrupt.setFlag() - myThread.start() - myThread.join() - pass - - def tearDown(self): - pass - -class XmlrpcTestSuite(BaseTestSuite): - def __init__(self): - # suite setup - BaseTestSuite.__init__(self, __name__, excludes) - - def rpcCall(): - return True - - global serverPort - serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000) - self.server = hodXMLRPCServer('localhost', [serverPort]) - self.server.register_function(rpcCall, 'testing') - self.thread = threading.Thread(name="server", - target=self.server._serve_forever) - self.thread.start() - time.sleep(1) # give some time to start server - - def cleanUp(self): - # suite tearDown - self.server.stop() - self.thread.join() - -def RunXmlrpcTests(): - # modulename_suite - suite = XmlrpcTestSuite() - testResult = suite.runTests() - suite.cleanUp() - return testResult - -if __name__ == "__main__": - RunXmlrpcTests() |