001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.File; 022 import java.io.IOException; 023 import java.util.ArrayList; 024 import java.util.EnumSet; 025 import java.util.HashMap; 026 import java.util.List; 027 import java.util.Map; 028 import java.util.Vector; 029 030 import org.apache.commons.cli.CommandLine; 031 import org.apache.commons.cli.GnuParser; 032 import org.apache.commons.cli.HelpFormatter; 033 import org.apache.commons.cli.Options; 034 import org.apache.commons.cli.ParseException; 035 import org.apache.commons.logging.Log; 036 import org.apache.commons.logging.LogFactory; 037 import org.apache.hadoop.classification.InterfaceAudience; 038 import org.apache.hadoop.classification.InterfaceStability; 039 import org.apache.hadoop.conf.Configuration; 040 import org.apache.hadoop.fs.FileStatus; 041 import org.apache.hadoop.fs.FileSystem; 042 import org.apache.hadoop.fs.Path; 043 import org.apache.hadoop.yarn.api.ApplicationConstants; 044 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; 045 import org.apache.hadoop.yarn.api.ApplicationClientProtocol; 046 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; 047 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; 048 import org.apache.hadoop.yarn.api.records.ApplicationId; 049 import org.apache.hadoop.yarn.api.records.ApplicationReport; 050 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; 051 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 052 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 053 import org.apache.hadoop.yarn.api.records.LocalResource; 054 import org.apache.hadoop.yarn.api.records.LocalResourceType; 055 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 056 import org.apache.hadoop.yarn.api.records.NodeReport; 057 import org.apache.hadoop.yarn.api.records.NodeState; 058 import org.apache.hadoop.yarn.api.records.Priority; 059 import org.apache.hadoop.yarn.api.records.QueueACL; 060 import org.apache.hadoop.yarn.api.records.QueueInfo; 061 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; 062 import org.apache.hadoop.yarn.api.records.Resource; 063 import org.apache.hadoop.yarn.api.records.YarnApplicationState; 064 import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; 065 import org.apache.hadoop.yarn.client.api.YarnClient; 066 import org.apache.hadoop.yarn.client.api.YarnClientApplication; 067 import org.apache.hadoop.yarn.conf.YarnConfiguration; 068 import org.apache.hadoop.yarn.exceptions.YarnException; 069 import org.apache.hadoop.yarn.util.ConverterUtils; 070 import org.apache.hadoop.yarn.util.Records; 071 072 /** 073 * Client for Distributed Shell application submission to YARN. 074 * 075 * <p> The distributed shell client allows an application master to be launched that in turn would run 076 * the provided shell command on a set of containers. </p> 077 * 078 * <p>This client is meant to act as an example on how to write yarn-based applications. </p> 079 * 080 * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 081 * aka ApplicationsManager or ASM via the {@link ApplicationClientProtocol}. The {@link ApplicationClientProtocol} 082 * provides a way for the client to get access to cluster information and to request for a 083 * new {@link ApplicationId}. <p> 084 * 085 * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 086 * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 087 * and application name, the priority assigned to the application and the queue 088 * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} 089 * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 090 * the {@link ApplicationMaster} is launched. </p> 091 * 092 * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 093 * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 094 * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 095 * {@link ApplicationMaster}. <p> 096 * 097 * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 098 * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 099 * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 100 * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p> 101 * 102 */ 103 @InterfaceAudience.Public 104 @InterfaceStability.Unstable 105 public class Client { 106 107 private static final Log LOG = LogFactory.getLog(Client.class); 108 109 // Configuration 110 private Configuration conf; 111 private YarnClient yarnClient; 112 // Application master specific info to register a new Application with RM/ASM 113 private String appName = ""; 114 // App master priority 115 private int amPriority = 0; 116 // Queue for App master 117 private String amQueue = ""; 118 // Amt. of memory resource to request for to run the App Master 119 private int amMemory = 10; 120 121 // Application master jar file 122 private String appMasterJar = ""; 123 // Main class to invoke application master 124 private final String appMasterMainClass = 125 "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"; 126 127 // Shell command to be executed 128 private String shellCommand = ""; 129 // Location of shell script 130 private String shellScriptPath = ""; 131 // Args to be passed to the shell command 132 private String shellArgs = ""; 133 // Env variables to be setup for the shell command 134 private Map<String, String> shellEnv = new HashMap<String, String>(); 135 // Shell Command Container priority 136 private int shellCmdPriority = 0; 137 138 // Amt of memory to request for container in which shell script will be executed 139 private int containerMemory = 10; 140 // No. of containers in which the shell script needs to be executed 141 private int numContainers = 1; 142 143 // log4j.properties file 144 // if available, add to local resources and set into classpath 145 private String log4jPropFile = ""; 146 147 // Start time for client 148 private final long clientStartTime = System.currentTimeMillis(); 149 // Timeout threshold for client. Kill app after time interval expires. 150 private long clientTimeout = 600000; 151 152 // Debug flag 153 boolean debugFlag = false; 154 155 // Command line options 156 private Options opts; 157 158 /** 159 * @param args Command line arguments 160 */ 161 public static void main(String[] args) { 162 boolean result = false; 163 try { 164 Client client = new Client(); 165 LOG.info("Initializing Client"); 166 try { 167 boolean doRun = client.init(args); 168 if (!doRun) { 169 System.exit(0); 170 } 171 } catch (IllegalArgumentException e) { 172 System.err.println(e.getLocalizedMessage()); 173 client.printUsage(); 174 System.exit(-1); 175 } 176 result = client.run(); 177 } catch (Throwable t) { 178 LOG.fatal("Error running CLient", t); 179 System.exit(1); 180 } 181 if (result) { 182 LOG.info("Application completed successfully"); 183 System.exit(0); 184 } 185 LOG.error("Application failed to complete successfully"); 186 System.exit(2); 187 } 188 189 /** 190 */ 191 public Client(Configuration conf) throws Exception { 192 193 this.conf = conf; 194 yarnClient = YarnClient.createYarnClient(); 195 yarnClient.init(conf); 196 opts = new Options(); 197 opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); 198 opts.addOption("priority", true, "Application Priority. Default 0"); 199 opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); 200 opts.addOption("timeout", true, "Application timeout in milliseconds"); 201 opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); 202 opts.addOption("jar", true, "Jar file containing the application master"); 203 opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); 204 opts.addOption("shell_script", true, "Location of the shell script to be executed"); 205 opts.addOption("shell_args", true, "Command line args for the shell script"); 206 opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); 207 opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); 208 opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); 209 opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); 210 opts.addOption("log_properties", true, "log4j.properties file"); 211 opts.addOption("debug", false, "Dump out debug information"); 212 opts.addOption("help", false, "Print usage"); 213 } 214 215 /** 216 */ 217 public Client() throws Exception { 218 this(new YarnConfiguration()); 219 } 220 221 /** 222 * Helper function to print out usage 223 */ 224 private void printUsage() { 225 new HelpFormatter().printHelp("Client", opts); 226 } 227 228 /** 229 * Parse command line options 230 * @param args Parsed command line options 231 * @return Whether the init was successful to run the client 232 * @throws ParseException 233 */ 234 public boolean init(String[] args) throws ParseException { 235 236 CommandLine cliParser = new GnuParser().parse(opts, args); 237 238 if (args.length == 0) { 239 throw new IllegalArgumentException("No args specified for client to initialize"); 240 } 241 242 if (cliParser.hasOption("help")) { 243 printUsage(); 244 return false; 245 } 246 247 if (cliParser.hasOption("debug")) { 248 debugFlag = true; 249 250 } 251 252 appName = cliParser.getOptionValue("appname", "DistributedShell"); 253 amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); 254 amQueue = cliParser.getOptionValue("queue", "default"); 255 amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); 256 257 if (amMemory < 0) { 258 throw new IllegalArgumentException("Invalid memory specified for application master, exiting." 259 + " Specified memory=" + amMemory); 260 } 261 262 if (!cliParser.hasOption("jar")) { 263 throw new IllegalArgumentException("No jar file specified for application master"); 264 } 265 266 appMasterJar = cliParser.getOptionValue("jar"); 267 268 if (!cliParser.hasOption("shell_command")) { 269 throw new IllegalArgumentException("No shell command specified to be executed by application master"); 270 } 271 shellCommand = cliParser.getOptionValue("shell_command"); 272 273 if (cliParser.hasOption("shell_script")) { 274 shellScriptPath = cliParser.getOptionValue("shell_script"); 275 } 276 if (cliParser.hasOption("shell_args")) { 277 shellArgs = cliParser.getOptionValue("shell_args"); 278 } 279 if (cliParser.hasOption("shell_env")) { 280 String envs[] = cliParser.getOptionValues("shell_env"); 281 for (String env : envs) { 282 env = env.trim(); 283 int index = env.indexOf('='); 284 if (index == -1) { 285 shellEnv.put(env, ""); 286 continue; 287 } 288 String key = env.substring(0, index); 289 String val = ""; 290 if (index < (env.length()-1)) { 291 val = env.substring(index+1); 292 } 293 shellEnv.put(key, val); 294 } 295 } 296 shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); 297 298 containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); 299 numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); 300 301 if (containerMemory < 0 || numContainers < 1) { 302 throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." 303 + " Specified containerMemory=" + containerMemory 304 + ", numContainer=" + numContainers); 305 } 306 307 clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); 308 309 log4jPropFile = cliParser.getOptionValue("log_properties", ""); 310 311 return true; 312 } 313 314 /** 315 * Main run function for the client 316 * @return true if application completed successfully 317 * @throws IOException 318 * @throws YarnException 319 */ 320 public boolean run() throws IOException, YarnException { 321 322 LOG.info("Running Client"); 323 yarnClient.start(); 324 325 YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); 326 LOG.info("Got Cluster metric info from ASM" 327 + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); 328 329 List<NodeReport> clusterNodeReports = yarnClient.getNodeReports( 330 NodeState.RUNNING); 331 LOG.info("Got Cluster node info from ASM"); 332 for (NodeReport node : clusterNodeReports) { 333 LOG.info("Got node report from ASM for" 334 + ", nodeId=" + node.getNodeId() 335 + ", nodeAddress" + node.getHttpAddress() 336 + ", nodeRackName" + node.getRackName() 337 + ", nodeNumContainers" + node.getNumContainers()); 338 } 339 340 QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); 341 LOG.info("Queue info" 342 + ", queueName=" + queueInfo.getQueueName() 343 + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() 344 + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() 345 + ", queueApplicationCount=" + queueInfo.getApplications().size() 346 + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); 347 348 List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); 349 for (QueueUserACLInfo aclInfo : listAclInfo) { 350 for (QueueACL userAcl : aclInfo.getUserAcls()) { 351 LOG.info("User ACL Info for Queue" 352 + ", queueName=" + aclInfo.getQueueName() 353 + ", userAcl=" + userAcl.name()); 354 } 355 } 356 357 // Get a new application id 358 YarnClientApplication app = yarnClient.createApplication(); 359 GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); 360 // TODO get min/max resource capabilities from RM and change memory ask if needed 361 // If we do not have min/max, we may not be able to correctly request 362 // the required resources from the RM for the app master 363 // Memory ask has to be a multiple of min and less than max. 364 // Dump out information about cluster capability as seen by the resource manager 365 int maxMem = appResponse.getMaximumResourceCapability().getMemory(); 366 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 367 368 // A resource ask cannot exceed the max. 369 if (amMemory > maxMem) { 370 LOG.info("AM memory specified above max threshold of cluster. Using max value." 371 + ", specified=" + amMemory 372 + ", max=" + maxMem); 373 amMemory = maxMem; 374 } 375 376 // set the application name 377 ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); 378 ApplicationId appId = appContext.getApplicationId(); 379 appContext.setApplicationName(appName); 380 381 // Set up the container launch context for the application master 382 ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); 383 384 // set local resources for the application master 385 // local files or archives as needed 386 // In this scenario, the jar file for the application master is part of the local resources 387 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 388 389 LOG.info("Copy App Master jar from local filesystem and add to local environment"); 390 // Copy the application master jar to the filesystem 391 // Create a local resource to point to the destination jar path 392 FileSystem fs = FileSystem.get(conf); 393 Path src = new Path(appMasterJar); 394 String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; 395 Path dst = new Path(fs.getHomeDirectory(), pathSuffix); 396 fs.copyFromLocalFile(false, true, src, dst); 397 FileStatus destStatus = fs.getFileStatus(dst); 398 LocalResource amJarRsrc = Records.newRecord(LocalResource.class); 399 400 // Set the type of resource - file or archive 401 // archives are untarred at destination 402 // we don't need the jar file to be untarred for now 403 amJarRsrc.setType(LocalResourceType.FILE); 404 // Set visibility of the resource 405 // Setting to most private option 406 amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 407 // Set the resource to be copied over 408 amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 409 // Set timestamp and length of file so that the framework 410 // can do basic sanity checks for the local resource 411 // after it has been copied over to ensure it is the same 412 // resource the client intended to use with the application 413 amJarRsrc.setTimestamp(destStatus.getModificationTime()); 414 amJarRsrc.setSize(destStatus.getLen()); 415 localResources.put("AppMaster.jar", amJarRsrc); 416 417 // Set the log4j properties if needed 418 if (!log4jPropFile.isEmpty()) { 419 Path log4jSrc = new Path(log4jPropFile); 420 Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); 421 fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); 422 FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); 423 LocalResource log4jRsrc = Records.newRecord(LocalResource.class); 424 log4jRsrc.setType(LocalResourceType.FILE); 425 log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 426 log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); 427 log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); 428 log4jRsrc.setSize(log4jFileStatus.getLen()); 429 localResources.put("log4j.properties", log4jRsrc); 430 } 431 432 // The shell script has to be made available on the final container(s) 433 // where it will be executed. 434 // To do this, we need to first copy into the filesystem that is visible 435 // to the yarn framework. 436 // We do not need to set this as a local resource for the application 437 // master as the application master does not need it. 438 String hdfsShellScriptLocation = ""; 439 long hdfsShellScriptLen = 0; 440 long hdfsShellScriptTimestamp = 0; 441 if (!shellScriptPath.isEmpty()) { 442 Path shellSrc = new Path(shellScriptPath); 443 String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; 444 Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); 445 fs.copyFromLocalFile(false, true, shellSrc, shellDst); 446 hdfsShellScriptLocation = shellDst.toUri().toString(); 447 FileStatus shellFileStatus = fs.getFileStatus(shellDst); 448 hdfsShellScriptLen = shellFileStatus.getLen(); 449 hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); 450 } 451 452 // Set local resource info into app master container launch context 453 amContainer.setLocalResources(localResources); 454 455 // Set the necessary security tokens as needed 456 //amContainer.setContainerTokens(containerToken); 457 458 // Set the env variables to be setup in the env where the application master will be run 459 LOG.info("Set the environment for the application master"); 460 Map<String, String> env = new HashMap<String, String>(); 461 462 // put location of shell script into env 463 // using the env info, the application master will create the correct local resource for the 464 // eventual containers that will be launched to execute the shell scripts 465 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); 466 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); 467 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); 468 469 // Add AppMaster.jar location to classpath 470 // At some point we should not be required to add 471 // the hadoop specific classpaths to the env. 472 // It should be provided out of the box. 473 // For now setting all required classpaths including 474 // the classpath to "." for the application jar 475 StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()) 476 .append(File.pathSeparatorChar).append("./*"); 477 for (String c : conf.getStrings( 478 YarnConfiguration.YARN_APPLICATION_CLASSPATH, 479 YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { 480 classPathEnv.append(File.pathSeparatorChar); 481 classPathEnv.append(c.trim()); 482 } 483 classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); 484 485 // add the runtime classpath needed for tests to work 486 if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { 487 classPathEnv.append(':'); 488 classPathEnv.append(System.getProperty("java.class.path")); 489 } 490 491 env.put("CLASSPATH", classPathEnv.toString()); 492 493 amContainer.setEnvironment(env); 494 495 // Set the necessary command to execute the application master 496 Vector<CharSequence> vargs = new Vector<CharSequence>(30); 497 498 // Set java executable command 499 LOG.info("Setting up app master command"); 500 vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); 501 // Set Xmx based on am memory size 502 vargs.add("-Xmx" + amMemory + "m"); 503 // Set class name 504 vargs.add(appMasterMainClass); 505 // Set params for Application Master 506 vargs.add("--container_memory " + String.valueOf(containerMemory)); 507 vargs.add("--num_containers " + String.valueOf(numContainers)); 508 vargs.add("--priority " + String.valueOf(shellCmdPriority)); 509 if (!shellCommand.isEmpty()) { 510 vargs.add("--shell_command " + shellCommand + ""); 511 } 512 if (!shellArgs.isEmpty()) { 513 vargs.add("--shell_args " + shellArgs + ""); 514 } 515 for (Map.Entry<String, String> entry : shellEnv.entrySet()) { 516 vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); 517 } 518 if (debugFlag) { 519 vargs.add("--debug"); 520 } 521 522 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); 523 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); 524 525 // Get final commmand 526 StringBuilder command = new StringBuilder(); 527 for (CharSequence str : vargs) { 528 command.append(str).append(" "); 529 } 530 531 LOG.info("Completed setting up app master command " + command.toString()); 532 List<String> commands = new ArrayList<String>(); 533 commands.add(command.toString()); 534 amContainer.setCommands(commands); 535 536 // Set up resource type requirements 537 // For now, only memory is supported so we set memory requirements 538 Resource capability = Records.newRecord(Resource.class); 539 capability.setMemory(amMemory); 540 appContext.setResource(capability); 541 542 // Service data is a binary blob that can be passed to the application 543 // Not needed in this scenario 544 // amContainer.setServiceData(serviceData); 545 546 // The following are not required for launching an application master 547 // amContainer.setContainerId(containerId); 548 549 appContext.setAMContainerSpec(amContainer); 550 551 // Set the priority for the application master 552 Priority pri = Records.newRecord(Priority.class); 553 // TODO - what is the range for priority? how to decide? 554 pri.setPriority(amPriority); 555 appContext.setPriority(pri); 556 557 // Set the queue to which this application is to be submitted in the RM 558 appContext.setQueue(amQueue); 559 560 // Submit the application to the applications manager 561 // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); 562 // Ignore the response as either a valid response object is returned on success 563 // or an exception thrown to denote some form of a failure 564 LOG.info("Submitting application to ASM"); 565 566 yarnClient.submitApplication(appContext); 567 568 // TODO 569 // Try submitting the same request again 570 // app submission failure? 571 572 // Monitor the application 573 return monitorApplication(appId); 574 575 } 576 577 /** 578 * Monitor the submitted application for completion. 579 * Kill application if time expires. 580 * @param appId Application Id of application to be monitored 581 * @return true if application completed successfully 582 * @throws YarnException 583 * @throws IOException 584 */ 585 private boolean monitorApplication(ApplicationId appId) 586 throws YarnException, IOException { 587 588 while (true) { 589 590 // Check app status every 1 second. 591 try { 592 Thread.sleep(1000); 593 } catch (InterruptedException e) { 594 LOG.debug("Thread sleep in monitoring loop interrupted"); 595 } 596 597 // Get application report for the appId we are interested in 598 ApplicationReport report = yarnClient.getApplicationReport(appId); 599 600 LOG.info("Got application report from ASM for" 601 + ", appId=" + appId.getId() 602 + ", clientToAMToken=" + report.getClientToAMToken() 603 + ", appDiagnostics=" + report.getDiagnostics() 604 + ", appMasterHost=" + report.getHost() 605 + ", appQueue=" + report.getQueue() 606 + ", appMasterRpcPort=" + report.getRpcPort() 607 + ", appStartTime=" + report.getStartTime() 608 + ", yarnAppState=" + report.getYarnApplicationState().toString() 609 + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() 610 + ", appTrackingUrl=" + report.getTrackingUrl() 611 + ", appUser=" + report.getUser()); 612 613 YarnApplicationState state = report.getYarnApplicationState(); 614 FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); 615 if (YarnApplicationState.FINISHED == state) { 616 if (FinalApplicationStatus.SUCCEEDED == dsStatus) { 617 LOG.info("Application has completed successfully. Breaking monitoring loop"); 618 return true; 619 } 620 else { 621 LOG.info("Application did finished unsuccessfully." 622 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 623 + ". Breaking monitoring loop"); 624 return false; 625 } 626 } 627 else if (YarnApplicationState.KILLED == state 628 || YarnApplicationState.FAILED == state) { 629 LOG.info("Application did not finish." 630 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 631 + ". Breaking monitoring loop"); 632 return false; 633 } 634 635 if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { 636 LOG.info("Reached client specified timeout for application. Killing application"); 637 forceKillApplication(appId); 638 return false; 639 } 640 } 641 642 } 643 644 /** 645 * Kill a submitted application by sending a call to the ASM 646 * @param appId Application Id to be killed. 647 * @throws YarnException 648 * @throws IOException 649 */ 650 private void forceKillApplication(ApplicationId appId) 651 throws YarnException, IOException { 652 // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 653 // the same time. 654 // If yes, can we kill a particular attempt only? 655 656 // Response can be ignored as it is non-null on success or 657 // throws an exception in case of failures 658 yarnClient.killApplication(appId); 659 } 660 661 }