001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.mapred;
019    
020    import java.io.FileNotFoundException;
021    import java.io.IOException;
022    import java.net.InetSocketAddress;
023    import java.net.URL;
024    import java.security.PrivilegedExceptionAction;
025    import java.util.ArrayList;
026    import java.util.Collection;
027    import java.util.List;
028    
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.classification.InterfaceStability;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.fs.FileStatus;
033    import org.apache.hadoop.fs.FileSystem;
034    import org.apache.hadoop.fs.Path;
035    import org.apache.hadoop.io.Text;
036    import org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
037    import org.apache.hadoop.mapreduce.Cluster;
038    import org.apache.hadoop.mapreduce.ClusterMetrics;
039    import org.apache.hadoop.mapreduce.Job;
040    import org.apache.hadoop.mapreduce.QueueInfo;
041    import org.apache.hadoop.mapreduce.TaskTrackerInfo;
042    import org.apache.hadoop.mapreduce.TaskType;
043    import org.apache.hadoop.mapreduce.filecache.DistributedCache;
044    import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
045    import org.apache.hadoop.mapreduce.tools.CLI;
046    import org.apache.hadoop.mapreduce.util.ConfigUtil;
047    import org.apache.hadoop.security.UserGroupInformation;
048    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
049    import org.apache.hadoop.security.token.Token;
050    import org.apache.hadoop.security.token.TokenRenewer;
051    import org.apache.hadoop.util.Tool;
052    import org.apache.hadoop.util.ToolRunner;
053    
054    /**
055     * <code>JobClient</code> is the primary interface for the user-job to interact
056     * with the cluster.
057     * 
058     * <code>JobClient</code> provides facilities to submit jobs, track their 
059     * progress, access component-tasks' reports/logs, get the Map-Reduce cluster
060     * status information etc.
061     * 
062     * <p>The job submission process involves:
063     * <ol>
064     *   <li>
065     *   Checking the input and output specifications of the job.
066     *   </li>
067     *   <li>
068     *   Computing the {@link InputSplit}s for the job.
069     *   </li>
070     *   <li>
071     *   Setup the requisite accounting information for the {@link DistributedCache} 
072     *   of the job, if necessary.
073     *   </li>
074     *   <li>
075     *   Copying the job's jar and configuration to the map-reduce system directory 
076     *   on the distributed file-system. 
077     *   </li>
078     *   <li>
079     *   Submitting the job to the cluster and optionally monitoring
080     *   it's status.
081     *   </li>
082     * </ol></p>
083     *  
084     * Normally the user creates the application, describes various facets of the
085     * job via {@link JobConf} and then uses the <code>JobClient</code> to submit 
086     * the job and monitor its progress.
087     * 
088     * <p>Here is an example on how to use <code>JobClient</code>:</p>
089     * <p><blockquote><pre>
090     *     // Create a new JobConf
091     *     JobConf job = new JobConf(new Configuration(), MyJob.class);
092     *     
093     *     // Specify various job-specific parameters     
094     *     job.setJobName("myjob");
095     *     
096     *     job.setInputPath(new Path("in"));
097     *     job.setOutputPath(new Path("out"));
098     *     
099     *     job.setMapperClass(MyJob.MyMapper.class);
100     *     job.setReducerClass(MyJob.MyReducer.class);
101     *
102     *     // Submit the job, then poll for progress until the job is complete
103     *     JobClient.runJob(job);
104     * </pre></blockquote></p>
105     * 
106     * <h4 id="JobControl">Job Control</h4>
107     * 
108     * <p>At times clients would chain map-reduce jobs to accomplish complex tasks 
109     * which cannot be done via a single map-reduce job. This is fairly easy since 
110     * the output of the job, typically, goes to distributed file-system and that 
111     * can be used as the input for the next job.</p>
112     * 
113     * <p>However, this also means that the onus on ensuring jobs are complete 
114     * (success/failure) lies squarely on the clients. In such situations the 
115     * various job-control options are:
116     * <ol>
117     *   <li>
118     *   {@link #runJob(JobConf)} : submits the job and returns only after 
119     *   the job has completed.
120     *   </li>
121     *   <li>
122     *   {@link #submitJob(JobConf)} : only submits the job, then poll the 
123     *   returned handle to the {@link RunningJob} to query status and make 
124     *   scheduling decisions.
125     *   </li>
126     *   <li>
127     *   {@link JobConf#setJobEndNotificationURI(String)} : setup a notification
128     *   on job-completion, thus avoiding polling.
129     *   </li>
130     * </ol></p>
131     * 
132     * @see JobConf
133     * @see ClusterStatus
134     * @see Tool
135     * @see DistributedCache
136     */
137    @InterfaceAudience.Public
138    @InterfaceStability.Stable
139    public class JobClient extends CLI {
140    
141      @InterfaceAudience.Private
142      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY =
143          "mapreduce.jobclient.retry.policy.enabled";
144      @InterfaceAudience.Private
145      public static final boolean MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT =
146          false;
147      @InterfaceAudience.Private
148      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY =
149          "mapreduce.jobclient.retry.policy.spec";
150      @InterfaceAudience.Private
151      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT =
152          "10000,6,60000,10"; // t1,n1,t2,n2,...
153    
154      public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
155      private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; 
156      
157      static{
158        ConfigUtil.loadResources();
159      }
160    
161      /**
162       * A NetworkedJob is an implementation of RunningJob.  It holds
163       * a JobProfile object to provide some info, and interacts with the
164       * remote service to provide certain functionality.
165       */
166      static class NetworkedJob implements RunningJob {
167        Job job;
168        /**
169         * We store a JobProfile and a timestamp for when we last
170         * acquired the job profile.  If the job is null, then we cannot
171         * perform any of the tasks.  The job might be null if the cluster
172         * has completely forgotten about the job.  (eg, 24 hours after the
173         * job completes.)
174         */
175        public NetworkedJob(JobStatus status, Cluster cluster) throws IOException {
176          job = Job.getInstance(cluster, status, new JobConf(status.getJobFile()));
177        }
178    
179        public NetworkedJob(Job job) throws IOException {
180          this.job = job;
181        }
182    
183        public Configuration getConfiguration() {
184          return job.getConfiguration();
185        }
186    
187        /**
188         * An identifier for the job
189         */
190        public JobID getID() {
191          return JobID.downgrade(job.getJobID());
192        }
193        
194        /** @deprecated This method is deprecated and will be removed. Applications should 
195         * rather use {@link #getID()}.*/
196        @Deprecated
197        public String getJobID() {
198          return getID().toString();
199        }
200        
201        /**
202         * The user-specified job name
203         */
204        public String getJobName() {
205          return job.getJobName();
206        }
207    
208        /**
209         * The name of the job file
210         */
211        public String getJobFile() {
212          return job.getJobFile();
213        }
214    
215        /**
216         * A URL where the job's status can be seen
217         */
218        public String getTrackingURL() {
219          return job.getTrackingURL();
220        }
221    
222        /**
223         * A float between 0.0 and 1.0, indicating the % of map work
224         * completed.
225         */
226        public float mapProgress() throws IOException {
227          return job.mapProgress();
228        }
229    
230        /**
231         * A float between 0.0 and 1.0, indicating the % of reduce work
232         * completed.
233         */
234        public float reduceProgress() throws IOException {
235          return job.reduceProgress();
236        }
237    
238        /**
239         * A float between 0.0 and 1.0, indicating the % of cleanup work
240         * completed.
241         */
242        public float cleanupProgress() throws IOException {
243          try {
244            return job.cleanupProgress();
245          } catch (InterruptedException ie) {
246            throw new IOException(ie);
247          }
248        }
249    
250        /**
251         * A float between 0.0 and 1.0, indicating the % of setup work
252         * completed.
253         */
254        public float setupProgress() throws IOException {
255          return job.setupProgress();
256        }
257    
258        /**
259         * Returns immediately whether the whole job is done yet or not.
260         */
261        public synchronized boolean isComplete() throws IOException {
262          return job.isComplete();
263        }
264    
265        /**
266         * True iff job completed successfully.
267         */
268        public synchronized boolean isSuccessful() throws IOException {
269          return job.isSuccessful();
270        }
271    
272        /**
273         * Blocks until the job is finished
274         */
275        public void waitForCompletion() throws IOException {
276          try {
277            job.waitForCompletion(false);
278          } catch (InterruptedException ie) {
279            throw new IOException(ie);
280          } catch (ClassNotFoundException ce) {
281            throw new IOException(ce);
282          }
283        }
284    
285        /**
286         * Tells the service to get the state of the current job.
287         */
288        public synchronized int getJobState() throws IOException {
289          try {
290            return job.getJobState().getValue();
291          } catch (InterruptedException ie) {
292            throw new IOException(ie);
293          }
294        }
295        
296        /**
297         * Tells the service to terminate the current job.
298         */
299        public synchronized void killJob() throws IOException {
300          job.killJob();
301        }
302       
303        
304        /** Set the priority of the job.
305        * @param priority new priority of the job. 
306        */
307        public synchronized void setJobPriority(String priority) 
308                                                    throws IOException {
309          try {
310            job.setPriority(
311              org.apache.hadoop.mapreduce.JobPriority.valueOf(priority));
312          } catch (InterruptedException ie) {
313            throw new IOException(ie);
314          }
315        }
316        
317        /**
318         * Kill indicated task attempt.
319         * @param taskId the id of the task to kill.
320         * @param shouldFail if true the task is failed and added to failed tasks list, otherwise
321         * it is just killed, w/o affecting job failure status.
322         */
323        public synchronized void killTask(TaskAttemptID taskId,
324            boolean shouldFail) throws IOException {
325          if (shouldFail) {
326            job.failTask(taskId);
327          } else {
328            job.killTask(taskId);
329          }
330        }
331    
332        /** @deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}*/
333        @Deprecated
334        public synchronized void killTask(String taskId, boolean shouldFail) throws IOException {
335          killTask(TaskAttemptID.forName(taskId), shouldFail);
336        }
337        
338        /**
339         * Fetch task completion events from cluster for this job. 
340         */
341        public synchronized TaskCompletionEvent[] getTaskCompletionEvents(
342            int startFrom) throws IOException {
343          try {
344            org.apache.hadoop.mapreduce.TaskCompletionEvent[] acls = 
345              job.getTaskCompletionEvents(startFrom, 10);
346            TaskCompletionEvent[] ret = new TaskCompletionEvent[acls.length];
347            for (int i = 0 ; i < acls.length; i++ ) {
348              ret[i] = TaskCompletionEvent.downgrade(acls[i]);
349            }
350            return ret;
351          } catch (InterruptedException ie) {
352            throw new IOException(ie);
353          }
354        }
355    
356        /**
357         * Dump stats to screen
358         */
359        @Override
360        public String toString() {
361          return job.toString();
362        }
363            
364        /**
365         * Returns the counters for this job
366         */
367        public Counters getCounters() throws IOException {
368          Counters result = null;
369          org.apache.hadoop.mapreduce.Counters temp = job.getCounters();
370          if(temp != null) {
371            result = Counters.downgrade(temp);
372          }
373          return result;
374        }
375        
376        @Override
377        public String[] getTaskDiagnostics(TaskAttemptID id) throws IOException {
378          try { 
379            return job.getTaskDiagnostics(id);
380          } catch (InterruptedException ie) {
381            throw new IOException(ie);
382          }
383        }
384    
385        public String getHistoryUrl() throws IOException {
386          try {
387            return job.getHistoryUrl();
388          } catch (InterruptedException ie) {
389            throw new IOException(ie);
390          }
391        }
392    
393        public boolean isRetired() throws IOException {
394          try {
395            return job.isRetired();
396          } catch (InterruptedException ie) {
397            throw new IOException(ie);
398          }
399        }
400        
401        boolean monitorAndPrintJob() throws IOException, InterruptedException {
402          return job.monitorAndPrintJob();
403        }
404        
405        @Override
406        public String getFailureInfo() throws IOException {
407          try {
408            return job.getStatus().getFailureInfo();
409          } catch (InterruptedException ie) {
410            throw new IOException(ie);
411          }
412        }
413    
414        @Override
415        public JobStatus getJobStatus() throws IOException {
416          try {
417            return JobStatus.downgrade(job.getStatus());
418          } catch (InterruptedException ie) {
419            throw new IOException(ie);
420          }
421        }
422      }
423    
424      /**
425       * Ugi of the client. We store this ugi when the client is created and 
426       * then make sure that the same ugi is used to run the various protocols.
427       */
428      UserGroupInformation clientUgi;
429      
430      /**
431       * Create a job client.
432       */
433      public JobClient() {
434      }
435        
436      /**
437       * Build a job client with the given {@link JobConf}, and connect to the 
438       * default cluster
439       * 
440       * @param conf the job configuration.
441       * @throws IOException
442       */
443      public JobClient(JobConf conf) throws IOException {
444        init(conf);
445      }
446    
447      /**
448       * Build a job client with the given {@link Configuration}, 
449       * and connect to the default cluster
450       * 
451       * @param conf the configuration.
452       * @throws IOException
453       */
454      public JobClient(Configuration conf) throws IOException {
455        init(new JobConf(conf));
456      }
457    
458      /**
459       * Connect to the default cluster
460       * @param conf the job configuration.
461       * @throws IOException
462       */
463      public void init(JobConf conf) throws IOException {
464        setConf(conf);
465        cluster = new Cluster(conf);
466        clientUgi = UserGroupInformation.getCurrentUser();
467      }
468    
469      /**
470       * Build a job client, connect to the indicated job tracker.
471       * 
472       * @param jobTrackAddr the job tracker to connect to.
473       * @param conf configuration.
474       */
475      public JobClient(InetSocketAddress jobTrackAddr, 
476                       Configuration conf) throws IOException {
477        cluster = new Cluster(jobTrackAddr, conf);
478        clientUgi = UserGroupInformation.getCurrentUser();
479      }
480    
481      /**
482       * Close the <code>JobClient</code>.
483       */
484      public synchronized void close() throws IOException {
485        cluster.close();
486      }
487    
488      /**
489       * Get a filesystem handle.  We need this to prepare jobs
490       * for submission to the MapReduce system.
491       * 
492       * @return the filesystem handle.
493       */
494      public synchronized FileSystem getFs() throws IOException {
495        try { 
496          return cluster.getFileSystem();
497        } catch (InterruptedException ie) {
498          throw new IOException(ie);
499        }
500      }
501      
502      /**
503       * Get a handle to the Cluster
504       */
505      public Cluster getClusterHandle() {
506        return cluster;
507      }
508      
509      /**
510       * Submit a job to the MR system.
511       * 
512       * This returns a handle to the {@link RunningJob} which can be used to track
513       * the running-job.
514       * 
515       * @param jobFile the job configuration.
516       * @return a handle to the {@link RunningJob} which can be used to track the
517       *         running-job.
518       * @throws FileNotFoundException
519       * @throws InvalidJobConfException
520       * @throws IOException
521       */
522      public RunningJob submitJob(String jobFile) throws FileNotFoundException, 
523                                                         InvalidJobConfException, 
524                                                         IOException {
525        // Load in the submitted job details
526        JobConf job = new JobConf(jobFile);
527        return submitJob(job);
528      }
529        
530      /**
531       * Submit a job to the MR system.
532       * This returns a handle to the {@link RunningJob} which can be used to track
533       * the running-job.
534       * 
535       * @param conf the job configuration.
536       * @return a handle to the {@link RunningJob} which can be used to track the
537       *         running-job.
538       * @throws FileNotFoundException
539       * @throws IOException
540       */
541      public RunningJob submitJob(final JobConf conf) throws FileNotFoundException,
542                                                      IOException {
543        return submitJobInternal(conf);
544      }
545    
546      @InterfaceAudience.Private
547      public RunningJob submitJobInternal(final JobConf conf)
548          throws FileNotFoundException, IOException {
549        try {
550          conf.setBooleanIfUnset("mapred.mapper.new-api", false);
551          conf.setBooleanIfUnset("mapred.reducer.new-api", false);
552          Job job = clientUgi.doAs(new PrivilegedExceptionAction<Job> () {
553            @Override
554            public Job run() throws IOException, ClassNotFoundException, 
555              InterruptedException {
556              Job job = Job.getInstance(conf);
557              job.submit();
558              return job;
559            }
560          });
561          // update our Cluster instance with the one created by Job for submission
562          // (we can't pass our Cluster instance to Job, since Job wraps the config
563          // instance, and the two configs would then diverge)
564          cluster = job.getCluster();
565          return new NetworkedJob(job);
566        } catch (InterruptedException ie) {
567          throw new IOException("interrupted", ie);
568        }
569      }
570    
571      private Job getJobUsingCluster(final JobID jobid) throws IOException,
572      InterruptedException {
573        return clientUgi.doAs(new PrivilegedExceptionAction<Job>() {
574          public Job run() throws IOException, InterruptedException  {
575           return cluster.getJob(jobid);
576          }
577        });
578      }
579      /**
580       * Get an {@link RunningJob} object to track an ongoing job.  Returns
581       * null if the id does not correspond to any known job.
582       * 
583       * @param jobid the jobid of the job.
584       * @return the {@link RunningJob} handle to track the job, null if the 
585       *         <code>jobid</code> doesn't correspond to any known job.
586       * @throws IOException
587       */
588      public RunningJob getJob(final JobID jobid) throws IOException {
589        try {
590          
591          Job job = getJobUsingCluster(jobid);
592          if (job != null) {
593            JobStatus status = JobStatus.downgrade(job.getStatus());
594            if (status != null) {
595              return new NetworkedJob(status, cluster);
596            } 
597          }
598        } catch (InterruptedException ie) {
599          throw new IOException(ie);
600        }
601        return null;
602      }
603    
604      /**@deprecated Applications should rather use {@link #getJob(JobID)}. 
605       */
606      @Deprecated
607      public RunningJob getJob(String jobid) throws IOException {
608        return getJob(JobID.forName(jobid));
609      }
610      
611      private static final TaskReport[] EMPTY_TASK_REPORTS = new TaskReport[0];
612      
613      /**
614       * Get the information of the current state of the map tasks of a job.
615       * 
616       * @param jobId the job to query.
617       * @return the list of all of the map tips.
618       * @throws IOException
619       */
620      public TaskReport[] getMapTaskReports(JobID jobId) throws IOException {
621        return getTaskReports(jobId, TaskType.MAP);
622      }
623      
624      private TaskReport[] getTaskReports(final JobID jobId, TaskType type) throws 
625        IOException {
626        try {
627          Job j = getJobUsingCluster(jobId);
628          if(j == null) {
629            return EMPTY_TASK_REPORTS;
630          }
631          return TaskReport.downgradeArray(j.getTaskReports(type));
632        } catch (InterruptedException ie) {
633          throw new IOException(ie);
634        }
635      }
636      
637      /**@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}*/
638      @Deprecated
639      public TaskReport[] getMapTaskReports(String jobId) throws IOException {
640        return getMapTaskReports(JobID.forName(jobId));
641      }
642      
643      /**
644       * Get the information of the current state of the reduce tasks of a job.
645       * 
646       * @param jobId the job to query.
647       * @return the list of all of the reduce tips.
648       * @throws IOException
649       */    
650      public TaskReport[] getReduceTaskReports(JobID jobId) throws IOException {
651        return getTaskReports(jobId, TaskType.REDUCE);
652      }
653    
654      /**
655       * Get the information of the current state of the cleanup tasks of a job.
656       * 
657       * @param jobId the job to query.
658       * @return the list of all of the cleanup tips.
659       * @throws IOException
660       */    
661      public TaskReport[] getCleanupTaskReports(JobID jobId) throws IOException {
662        return getTaskReports(jobId, TaskType.JOB_CLEANUP);
663      }
664    
665      /**
666       * Get the information of the current state of the setup tasks of a job.
667       * 
668       * @param jobId the job to query.
669       * @return the list of all of the setup tips.
670       * @throws IOException
671       */    
672      public TaskReport[] getSetupTaskReports(JobID jobId) throws IOException {
673        return getTaskReports(jobId, TaskType.JOB_SETUP);
674      }
675    
676      
677      /**@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}*/
678      @Deprecated
679      public TaskReport[] getReduceTaskReports(String jobId) throws IOException {
680        return getReduceTaskReports(JobID.forName(jobId));
681      }
682      
683      /**
684       * Display the information about a job's tasks, of a particular type and
685       * in a particular state
686       * 
687       * @param jobId the ID of the job
688       * @param type the type of the task (map/reduce/setup/cleanup)
689       * @param state the state of the task 
690       * (pending/running/completed/failed/killed)
691       */
692      public void displayTasks(final JobID jobId, String type, String state) 
693      throws IOException {
694        try {
695          Job job = getJobUsingCluster(jobId);
696          super.displayTasks(job, type, state);
697        } catch (InterruptedException ie) {
698          throw new IOException(ie);
699        }
700      }
701      
702      /**
703       * Get status information about the Map-Reduce cluster.
704       *  
705       * @return the status information about the Map-Reduce cluster as an object
706       *         of {@link ClusterStatus}.
707       * @throws IOException
708       */
709      public ClusterStatus getClusterStatus() throws IOException {
710        try {
711          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
712            public ClusterStatus run() throws IOException, InterruptedException {
713              ClusterMetrics metrics = cluster.getClusterStatus();
714              return new ClusterStatus(metrics.getTaskTrackerCount(), metrics
715                .getBlackListedTaskTrackerCount(), cluster
716                .getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(),
717                metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
718                metrics.getReduceSlotCapacity(), cluster.getJobTrackerStatus(),
719                metrics.getDecommissionedTaskTrackerCount(), metrics
720                  .getGrayListedTaskTrackerCount());
721            }
722          });
723        } catch (InterruptedException ie) {
724          throw new IOException(ie);
725        }
726      }
727    
728      private  Collection<String> arrayToStringList(TaskTrackerInfo[] objs) {
729        Collection<String> list = new ArrayList<String>();
730        for (TaskTrackerInfo info: objs) {
731          list.add(info.getTaskTrackerName());
732        }
733        return list;
734      }
735    
736      private  Collection<BlackListInfo> arrayToBlackListInfo(TaskTrackerInfo[] objs) {
737        Collection<BlackListInfo> list = new ArrayList<BlackListInfo>();
738        for (TaskTrackerInfo info: objs) {
739          BlackListInfo binfo = new BlackListInfo();
740          binfo.setTrackerName(info.getTaskTrackerName());
741          binfo.setReasonForBlackListing(info.getReasonForBlacklist());
742          binfo.setBlackListReport(info.getBlacklistReport());
743          list.add(binfo);
744        }
745        return list;
746      }
747    
748      /**
749       * Get status information about the Map-Reduce cluster.
750       *  
751       * @param  detailed if true then get a detailed status including the
752       *         tracker names
753       * @return the status information about the Map-Reduce cluster as an object
754       *         of {@link ClusterStatus}.
755       * @throws IOException
756       */
757      public ClusterStatus getClusterStatus(boolean detailed) throws IOException {
758        try {
759          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
760            public ClusterStatus run() throws IOException, InterruptedException {
761            ClusterMetrics metrics = cluster.getClusterStatus();
762            return new ClusterStatus(arrayToStringList(cluster.getActiveTaskTrackers()),
763              arrayToBlackListInfo(cluster.getBlackListedTaskTrackers()),
764              cluster.getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(),
765              metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
766              metrics.getReduceSlotCapacity(), 
767              cluster.getJobTrackerStatus());
768            }
769          });
770        } catch (InterruptedException ie) {
771          throw new IOException(ie);
772        }
773      }
774        
775    
776      /** 
777       * Get the jobs that are not completed and not failed.
778       * 
779       * @return array of {@link JobStatus} for the running/to-be-run jobs.
780       * @throws IOException
781       */
782      public JobStatus[] jobsToComplete() throws IOException {
783        List<JobStatus> stats = new ArrayList<JobStatus>();
784        for (JobStatus stat : getAllJobs()) {
785          if (!stat.isJobComplete()) {
786            stats.add(stat);
787          }
788        }
789        return stats.toArray(new JobStatus[0]);
790      }
791    
792      /** 
793       * Get the jobs that are submitted.
794       * 
795       * @return array of {@link JobStatus} for the submitted jobs.
796       * @throws IOException
797       */
798      public JobStatus[] getAllJobs() throws IOException {
799        try {
800          org.apache.hadoop.mapreduce.JobStatus[] jobs = 
801              clientUgi.doAs(new PrivilegedExceptionAction<
802                  org.apache.hadoop.mapreduce.JobStatus[]> () {
803                public org.apache.hadoop.mapreduce.JobStatus[] run() 
804                    throws IOException, InterruptedException {
805                  return cluster.getAllJobStatuses();
806                }
807              });
808          JobStatus[] stats = new JobStatus[jobs.length];
809          for (int i = 0; i < jobs.length; i++) {
810            stats[i] = JobStatus.downgrade(jobs[i]);
811          }
812          return stats;
813        } catch (InterruptedException ie) {
814          throw new IOException(ie);
815        }
816      }
817      
818      /** 
819       * Utility that submits a job, then polls for progress until the job is
820       * complete.
821       * 
822       * @param job the job configuration.
823       * @throws IOException if the job fails
824       */
825      public static RunningJob runJob(JobConf job) throws IOException {
826        JobClient jc = new JobClient(job);
827        RunningJob rj = jc.submitJob(job);
828        try {
829          if (!jc.monitorAndPrintJob(job, rj)) {
830            throw new IOException("Job failed!");
831          }
832        } catch (InterruptedException ie) {
833          Thread.currentThread().interrupt();
834        }
835        return rj;
836      }
837      
838      /**
839       * Monitor a job and print status in real-time as progress is made and tasks 
840       * fail.
841       * @param conf the job's configuration
842       * @param job the job to track
843       * @return true if the job succeeded
844       * @throws IOException if communication to the JobTracker fails
845       */
846      public boolean monitorAndPrintJob(JobConf conf, 
847                                        RunningJob job
848      ) throws IOException, InterruptedException {
849        return ((NetworkedJob)job).monitorAndPrintJob();
850      }
851    
852      static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) {
853        return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); 
854      }
855      
856      static Configuration getConfiguration(String jobTrackerSpec)
857      {
858        Configuration conf = new Configuration();
859        if (jobTrackerSpec != null) {        
860          if (jobTrackerSpec.indexOf(":") >= 0) {
861            conf.set("mapred.job.tracker", jobTrackerSpec);
862          } else {
863            String classpathFile = "hadoop-" + jobTrackerSpec + ".xml";
864            URL validate = conf.getResource(classpathFile);
865            if (validate == null) {
866              throw new RuntimeException(classpathFile + " not found on CLASSPATH");
867            }
868            conf.addResource(classpathFile);
869          }
870        }
871        return conf;
872      }
873    
874      /**
875       * Sets the output filter for tasks. only those tasks are printed whose
876       * output matches the filter. 
877       * @param newValue task filter.
878       */
879      @Deprecated
880      public void setTaskOutputFilter(TaskStatusFilter newValue){
881        this.taskOutputFilter = newValue;
882      }
883        
884      /**
885       * Get the task output filter out of the JobConf.
886       * 
887       * @param job the JobConf to examine.
888       * @return the filter level.
889       */
890      public static TaskStatusFilter getTaskOutputFilter(JobConf job) {
891        return TaskStatusFilter.valueOf(job.get("jobclient.output.filter", 
892                                                "FAILED"));
893      }
894        
895      /**
896       * Modify the JobConf to set the task output filter.
897       * 
898       * @param job the JobConf to modify.
899       * @param newValue the value to set.
900       */
901      public static void setTaskOutputFilter(JobConf job, 
902                                             TaskStatusFilter newValue) {
903        job.set("jobclient.output.filter", newValue.toString());
904      }
905        
906      /**
907       * Returns task output filter.
908       * @return task filter. 
909       */
910      @Deprecated
911      public TaskStatusFilter getTaskOutputFilter(){
912        return this.taskOutputFilter; 
913      }
914    
915      protected long getCounter(org.apache.hadoop.mapreduce.Counters cntrs,
916          String counterGroupName, String counterName) throws IOException {
917        Counters counters = Counters.downgrade(cntrs);
918        return counters.findCounter(counterGroupName, counterName).getValue();
919      }
920    
921      /**
922       * Get status information about the max available Maps in the cluster.
923       *  
924       * @return the max available Maps in the cluster
925       * @throws IOException
926       */
927      public int getDefaultMaps() throws IOException {
928        try {
929          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
930            @Override
931            public Integer run() throws IOException, InterruptedException {
932              return cluster.getClusterStatus().getMapSlotCapacity();
933            }
934          });
935        } catch (InterruptedException ie) {
936          throw new IOException(ie);
937        }
938      }
939    
940      /**
941       * Get status information about the max available Reduces in the cluster.
942       *  
943       * @return the max available Reduces in the cluster
944       * @throws IOException
945       */
946      public int getDefaultReduces() throws IOException {
947        try {
948          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
949            @Override
950            public Integer run() throws IOException, InterruptedException {
951              return cluster.getClusterStatus().getReduceSlotCapacity();
952            }
953          });
954        } catch (InterruptedException ie) {
955          throw new IOException(ie);
956        }
957      }
958    
959      /**
960       * Grab the jobtracker system directory path where job-specific files are to be placed.
961       * 
962       * @return the system directory where job-specific files are to be placed.
963       */
964      public Path getSystemDir() {
965        try {
966          return clientUgi.doAs(new PrivilegedExceptionAction<Path>() {
967            @Override
968            public Path run() throws IOException, InterruptedException {
969              return cluster.getSystemDir();
970            }
971          });
972          } catch (IOException ioe) {
973          return null;
974        } catch (InterruptedException ie) {
975          return null;
976        }
977      }
978    
979      /**
980       * Checks if the job directory is clean and has all the required components
981       * for (re) starting the job
982       */
983      public static boolean isJobDirValid(Path jobDirPath, FileSystem fs)
984          throws IOException {
985        FileStatus[] contents = fs.listStatus(jobDirPath);
986        int matchCount = 0;
987        if (contents != null && contents.length >= 2) {
988          for (FileStatus status : contents) {
989            if ("job.xml".equals(status.getPath().getName())) {
990              ++matchCount;
991            }
992            if ("job.split".equals(status.getPath().getName())) {
993              ++matchCount;
994            }
995          }
996          if (matchCount == 2) {
997            return true;
998          }
999        }
1000        return false;
1001      }
1002    
1003      /**
1004       * Fetch the staging area directory for the application
1005       * 
1006       * @return path to staging area directory
1007       * @throws IOException
1008       */
1009      public Path getStagingAreaDir() throws IOException {
1010        try {
1011          return clientUgi.doAs(new PrivilegedExceptionAction<Path>() {
1012            @Override
1013            public Path run() throws IOException, InterruptedException {
1014              return cluster.getStagingAreaDir();
1015            }
1016          });
1017        } catch (InterruptedException ie) {
1018          // throw RuntimeException instead for compatibility reasons
1019          throw new RuntimeException(ie);
1020        }
1021      }
1022    
1023      private JobQueueInfo getJobQueueInfo(QueueInfo queue) {
1024        JobQueueInfo ret = new JobQueueInfo(queue);
1025        // make sure to convert any children
1026        if (queue.getQueueChildren().size() > 0) {
1027          List<JobQueueInfo> childQueues = new ArrayList<JobQueueInfo>(queue
1028              .getQueueChildren().size());
1029          for (QueueInfo child : queue.getQueueChildren()) {
1030            childQueues.add(getJobQueueInfo(child));
1031          }
1032          ret.setChildren(childQueues);
1033        }
1034        return ret;
1035      }
1036    
1037      private JobQueueInfo[] getJobQueueInfoArray(QueueInfo[] queues)
1038          throws IOException {
1039        JobQueueInfo[] ret = new JobQueueInfo[queues.length];
1040        for (int i = 0; i < queues.length; i++) {
1041          ret[i] = getJobQueueInfo(queues[i]);
1042        }
1043        return ret;
1044      }
1045    
1046      /**
1047       * Returns an array of queue information objects about root level queues
1048       * configured
1049       *
1050       * @return the array of root level JobQueueInfo objects
1051       * @throws IOException
1052       */
1053      public JobQueueInfo[] getRootQueues() throws IOException {
1054        try {
1055          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1056            public JobQueueInfo[] run() throws IOException, InterruptedException {
1057              return getJobQueueInfoArray(cluster.getRootQueues());
1058            }
1059          });
1060        } catch (InterruptedException ie) {
1061          throw new IOException(ie);
1062        }
1063      }
1064    
1065      /**
1066       * Returns an array of queue information objects about immediate children
1067       * of queue queueName.
1068       * 
1069       * @param queueName
1070       * @return the array of immediate children JobQueueInfo objects
1071       * @throws IOException
1072       */
1073      public JobQueueInfo[] getChildQueues(final String queueName) throws IOException {
1074        try {
1075          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1076            public JobQueueInfo[] run() throws IOException, InterruptedException {
1077              return getJobQueueInfoArray(cluster.getChildQueues(queueName));
1078            }
1079          });
1080        } catch (InterruptedException ie) {
1081          throw new IOException(ie);
1082        }
1083      }
1084      
1085      /**
1086       * Return an array of queue information objects about all the Job Queues
1087       * configured.
1088       * 
1089       * @return Array of JobQueueInfo objects
1090       * @throws IOException
1091       */
1092      public JobQueueInfo[] getQueues() throws IOException {
1093        try {
1094          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1095            public JobQueueInfo[] run() throws IOException, InterruptedException {
1096              return getJobQueueInfoArray(cluster.getQueues());
1097            }
1098          });
1099        } catch (InterruptedException ie) {
1100          throw new IOException(ie);
1101        }
1102      }
1103      
1104      /**
1105       * Gets all the jobs which were added to particular Job Queue
1106       * 
1107       * @param queueName name of the Job Queue
1108       * @return Array of jobs present in the job queue
1109       * @throws IOException
1110       */
1111      
1112      public JobStatus[] getJobsFromQueue(final String queueName) throws IOException {
1113        try {
1114          QueueInfo queue = clientUgi.doAs(new PrivilegedExceptionAction<QueueInfo>() {
1115            @Override
1116            public QueueInfo run() throws IOException, InterruptedException {
1117              return cluster.getQueue(queueName);
1118            }
1119          });
1120          if (queue == null) {
1121            return null;
1122          }
1123          org.apache.hadoop.mapreduce.JobStatus[] stats = 
1124            queue.getJobStatuses();
1125          JobStatus[] ret = new JobStatus[stats.length];
1126          for (int i = 0 ; i < stats.length; i++ ) {
1127            ret[i] = JobStatus.downgrade(stats[i]);
1128          }
1129          return ret;
1130        } catch (InterruptedException ie) {
1131          throw new IOException(ie);
1132        }
1133      }
1134      
1135      /**
1136       * Gets the queue information associated to a particular Job Queue
1137       * 
1138       * @param queueName name of the job queue.
1139       * @return Queue information associated to particular queue.
1140       * @throws IOException
1141       */
1142      public JobQueueInfo getQueueInfo(final String queueName) throws IOException {
1143        try {
1144          QueueInfo queueInfo = clientUgi.doAs(new 
1145              PrivilegedExceptionAction<QueueInfo>() {
1146            public QueueInfo run() throws IOException, InterruptedException {
1147              return cluster.getQueue(queueName);
1148            }
1149          });
1150          if (queueInfo != null) {
1151            return new JobQueueInfo(queueInfo);
1152          }
1153          return null;
1154        } catch (InterruptedException ie) {
1155          throw new IOException(ie);
1156        }
1157      }
1158      
1159      /**
1160       * Gets the Queue ACLs for current user
1161       * @return array of QueueAclsInfo object for current user.
1162       * @throws IOException
1163       */
1164      public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException {
1165        try {
1166          org.apache.hadoop.mapreduce.QueueAclsInfo[] acls = 
1167            clientUgi.doAs(new 
1168                PrivilegedExceptionAction
1169                <org.apache.hadoop.mapreduce.QueueAclsInfo[]>() {
1170                  public org.apache.hadoop.mapreduce.QueueAclsInfo[] run() 
1171                  throws IOException, InterruptedException {
1172                    return cluster.getQueueAclsForCurrentUser();
1173                  }
1174            });
1175          QueueAclsInfo[] ret = new QueueAclsInfo[acls.length];
1176          for (int i = 0 ; i < acls.length; i++ ) {
1177            ret[i] = QueueAclsInfo.downgrade(acls[i]);
1178          }
1179          return ret;
1180        } catch (InterruptedException ie) {
1181          throw new IOException(ie);
1182        }
1183      }
1184    
1185      /**
1186       * Get a delegation token for the user from the JobTracker.
1187       * @param renewer the user who can renew the token
1188       * @return the new token
1189       * @throws IOException
1190       */
1191      public Token<DelegationTokenIdentifier> 
1192        getDelegationToken(final Text renewer) throws IOException, InterruptedException {
1193        return clientUgi.doAs(new 
1194            PrivilegedExceptionAction<Token<DelegationTokenIdentifier>>() {
1195          public Token<DelegationTokenIdentifier> run() throws IOException, 
1196          InterruptedException {
1197            return cluster.getDelegationToken(renewer);
1198          }
1199        });
1200      }
1201    
1202      /**
1203       * Renew a delegation token
1204       * @param token the token to renew
1205       * @return true if the renewal went well
1206       * @throws InvalidToken
1207       * @throws IOException
1208       * @deprecated Use {@link Token#renew} instead
1209       */
1210      public long renewDelegationToken(Token<DelegationTokenIdentifier> token
1211                                       ) throws InvalidToken, IOException, 
1212                                                InterruptedException {
1213        return token.renew(getConf());
1214      }
1215    
1216      /**
1217       * Cancel a delegation token from the JobTracker
1218       * @param token the token to cancel
1219       * @throws IOException
1220       * @deprecated Use {@link Token#cancel} instead
1221       */
1222      public void cancelDelegationToken(Token<DelegationTokenIdentifier> token
1223                                        ) throws InvalidToken, IOException, 
1224                                                 InterruptedException {
1225        token.cancel(getConf());
1226      }
1227    
1228      /**
1229       */
1230      public static void main(String argv[]) throws Exception {
1231        int res = ToolRunner.run(new JobClient(), argv);
1232        System.exit(res);
1233      }
1234    }
1235