1 /* 2 * scsi_error.c Copyright (C) 1997 Eric Youngdale 3 * 4 * SCSI error/timeout handling 5 * Initial versions: Eric Youngdale. Based upon conversations with 6 * Leonard Zubkoff and David Miller at Linux Expo, 7 * ideas originating from all over the place. 8 * 9 * Restructured scsi_unjam_host and associated functions. 10 * September 04, 2002 Mike Anderson (andmike@us.ibm.com) 11 * 12 * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and 13 * minor cleanups. 14 * September 30, 2002 Mike Anderson (andmike@us.ibm.com) 15 */ 16 17 #include <linux/module.h> 18 #include <linux/sched.h> 19 #include <linux/timer.h> 20 #include <linux/string.h> 21 #include <linux/slab.h> 22 #include <linux/kernel.h> 23 #include <linux/kthread.h> 24 #include <linux/interrupt.h> 25 #include <linux/blkdev.h> 26 #include <linux/delay.h> 27 28 #include <scsi/scsi.h> 29 #include <scsi/scsi_dbg.h> 30 #include <scsi/scsi_device.h> 31 #include <scsi/scsi_eh.h> 32 #include <scsi/scsi_host.h> 33 #include <scsi/scsi_ioctl.h> 34 #include <scsi/scsi_request.h> 35 36 #include "scsi_priv.h" 37 #include "scsi_logging.h" 38 39 #define SENSE_TIMEOUT (10*HZ) 40 #define START_UNIT_TIMEOUT (30*HZ) 41 42 /* 43 * These should *probably* be handled by the host itself. 44 * Since it is allowed to sleep, it probably should. 45 */ 46 #define BUS_RESET_SETTLE_TIME (10) 47 #define HOST_RESET_SETTLE_TIME (10) 48 49 /* called with shost->host_lock held */ 50 void scsi_eh_wakeup(struct Scsi_Host *shost) 51 { 52 if (shost->host_busy == shost->host_failed) { 53 wake_up_process(shost->ehandler); 54 SCSI_LOG_ERROR_RECOVERY(5, 55 printk("Waking error handler thread\n")); 56 } 57 } 58 59 /** 60 * scsi_eh_scmd_add - add scsi cmd to error handling. 61 * @scmd: scmd to run eh on. 62 * @eh_flag: optional SCSI_EH flag. 63 * 64 * Return value: 65 * 0 on failure. 66 **/ 67 int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) 68 { 69 struct Scsi_Host *shost = scmd->device->host; 70 unsigned long flags; 71 int ret = 0; 72 73 if (!shost->ehandler) 74 return 0; 75 76 spin_lock_irqsave(shost->host_lock, flags); 77 if (scsi_host_set_state(shost, SHOST_RECOVERY)) 78 if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) 79 goto out_unlock; 80 81 ret = 1; 82 scmd->eh_eflags |= eh_flag; 83 list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); 84 shost->host_failed++; 85 scsi_eh_wakeup(shost); 86 out_unlock: 87 spin_unlock_irqrestore(shost->host_lock, flags); 88 return ret; 89 } 90 91 /** 92 * scsi_add_timer - Start timeout timer for a single scsi command. 93 * @scmd: scsi command that is about to start running. 94 * @timeout: amount of time to allow this command to run. 95 * @complete: timeout function to call if timer isn't canceled. 96 * 97 * Notes: 98 * This should be turned into an inline function. Each scsi command 99 * has its own timer, and as it is added to the queue, we set up the 100 * timer. When the command completes, we cancel the timer. 101 **/ 102 void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, 103 void (*complete)(struct scsi_cmnd *)) 104 { 105 106 /* 107 * If the clock was already running for this command, then 108 * first delete the timer. The timer handling code gets rather 109 * confused if we don't do this. 110 */ 111 if (scmd->eh_timeout.function) 112 del_timer(&scmd->eh_timeout); 113 114 scmd->eh_timeout.data = (unsigned long)scmd; 115 scmd->eh_timeout.expires = jiffies + timeout; 116 scmd->eh_timeout.function = (void (*)(unsigned long)) complete; 117 118 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" 119 " %d, (%p)\n", __FUNCTION__, 120 scmd, timeout, complete)); 121 122 add_timer(&scmd->eh_timeout); 123 } 124 125 /** 126 * scsi_delete_timer - Delete/cancel timer for a given function. 127 * @scmd: Cmd that we are canceling timer for 128 * 129 * Notes: 130 * This should be turned into an inline function. 131 * 132 * Return value: 133 * 1 if we were able to detach the timer. 0 if we blew it, and the 134 * timer function has already started to run. 135 **/ 136 int scsi_delete_timer(struct scsi_cmnd *scmd) 137 { 138 int rtn; 139 140 rtn = del_timer(&scmd->eh_timeout); 141 142 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," 143 " rtn: %d\n", __FUNCTION__, 144 scmd, rtn)); 145 146 scmd->eh_timeout.data = (unsigned long)NULL; 147 scmd->eh_timeout.function = NULL; 148 149 return rtn; 150 } 151 152 /** 153 * scsi_times_out - Timeout function for normal scsi commands. 154 * @scmd: Cmd that is timing out. 155 * 156 * Notes: 157 * We do not need to lock this. There is the potential for a race 158 * only in that the normal completion handling might run, but if the 159 * normal completion function determines that the timer has already 160 * fired, then it mustn't do anything. 161 **/ 162 void scsi_times_out(struct scsi_cmnd *scmd) 163 { 164 scsi_log_completion(scmd, TIMEOUT_ERROR); 165 166 if (scmd->device->host->hostt->eh_timed_out) 167 switch (scmd->device->host->hostt->eh_timed_out(scmd)) { 168 case EH_HANDLED: 169 __scsi_done(scmd); 170 return; 171 case EH_RESET_TIMER: 172 /* This allows a single retry even of a command 173 * with allowed == 0 */ 174 if (scmd->retries++ > scmd->allowed) 175 break; 176 scsi_add_timer(scmd, scmd->timeout_per_command, 177 scsi_times_out); 178 return; 179 case EH_NOT_HANDLED: 180 break; 181 } 182 183 if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { 184 scmd->result |= DID_TIME_OUT << 16; 185 __scsi_done(scmd); 186 } 187 } 188 189 /** 190 * scsi_block_when_processing_errors - Prevent cmds from being queued. 191 * @sdev: Device on which we are performing recovery. 192 * 193 * Description: 194 * We block until the host is out of error recovery, and then check to 195 * see whether the host or the device is offline. 196 * 197 * Return value: 198 * 0 when dev was taken offline by error recovery. 1 OK to proceed. 199 **/ 200 int scsi_block_when_processing_errors(struct scsi_device *sdev) 201 { 202 int online; 203 204 wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); 205 206 online = scsi_device_online(sdev); 207 208 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__, 209 online)); 210 211 return online; 212 } 213 EXPORT_SYMBOL(scsi_block_when_processing_errors); 214 215 #ifdef CONFIG_SCSI_LOGGING 216 /** 217 * scsi_eh_prt_fail_stats - Log info on failures. 218 * @shost: scsi host being recovered. 219 * @work_q: Queue of scsi cmds to process. 220 **/ 221 static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, 222 struct list_head *work_q) 223 { 224 struct scsi_cmnd *scmd; 225 struct scsi_device *sdev; 226 int total_failures = 0; 227 int cmd_failed = 0; 228 int cmd_cancel = 0; 229 int devices_failed = 0; 230 231 shost_for_each_device(sdev, shost) { 232 list_for_each_entry(scmd, work_q, eh_entry) { 233 if (scmd->device == sdev) { 234 ++total_failures; 235 if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) 236 ++cmd_cancel; 237 else 238 ++cmd_failed; 239 } 240 } 241 242 if (cmd_cancel || cmd_failed) { 243 SCSI_LOG_ERROR_RECOVERY(3, 244 printk("%s: %d:%d:%d:%d cmds failed: %d," 245 " cancel: %d\n", 246 __FUNCTION__, shost->host_no, 247 sdev->channel, sdev->id, sdev->lun, 248 cmd_failed, cmd_cancel)); 249 cmd_cancel = 0; 250 cmd_failed = 0; 251 ++devices_failed; 252 } 253 } 254 255 SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d" 256 " devices require eh work\n", 257 total_failures, devices_failed)); 258 } 259 #endif 260 261 /** 262 * scsi_check_sense - Examine scsi cmd sense 263 * @scmd: Cmd to have sense checked. 264 * 265 * Return value: 266 * SUCCESS or FAILED or NEEDS_RETRY 267 * 268 * Notes: 269 * When a deferred error is detected the current command has 270 * not been executed and needs retrying. 271 **/ 272 static int scsi_check_sense(struct scsi_cmnd *scmd) 273 { 274 struct scsi_sense_hdr sshdr; 275 276 if (! scsi_command_normalize_sense(scmd, &sshdr)) 277 return FAILED; /* no valid sense data */ 278 279 if (scsi_sense_is_deferred(&sshdr)) 280 return NEEDS_RETRY; 281 282 /* 283 * Previous logic looked for FILEMARK, EOM or ILI which are 284 * mainly associated with tapes and returned SUCCESS. 285 */ 286 if (sshdr.response_code == 0x70) { 287 /* fixed format */ 288 if (scmd->sense_buffer[2] & 0xe0) 289 return SUCCESS; 290 } else { 291 /* 292 * descriptor format: look for "stream commands sense data 293 * descriptor" (see SSC-3). Assume single sense data 294 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG. 295 */ 296 if ((sshdr.additional_length > 3) && 297 (scmd->sense_buffer[8] == 0x4) && 298 (scmd->sense_buffer[11] & 0xe0)) 299 return SUCCESS; 300 } 301 302 switch (sshdr.sense_key) { 303 case NO_SENSE: 304 return SUCCESS; 305 case RECOVERED_ERROR: 306 return /* soft_error */ SUCCESS; 307 308 case ABORTED_COMMAND: 309 return NEEDS_RETRY; 310 case NOT_READY: 311 case UNIT_ATTENTION: 312 /* 313 * if we are expecting a cc/ua because of a bus reset that we 314 * performed, treat this just as a retry. otherwise this is 315 * information that we should pass up to the upper-level driver 316 * so that we can deal with it there. 317 */ 318 if (scmd->device->expecting_cc_ua) { 319 scmd->device->expecting_cc_ua = 0; 320 return NEEDS_RETRY; 321 } 322 /* 323 * if the device is in the process of becoming ready, we 324 * should retry. 325 */ 326 if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) 327 return NEEDS_RETRY; 328 /* 329 * if the device is not started, we need to wake 330 * the error handler to start the motor 331 */ 332 if (scmd->device->allow_restart && 333 (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) 334 return FAILED; 335 return SUCCESS; 336 337 /* these three are not supported */ 338 case COPY_ABORTED: 339 case VOLUME_OVERFLOW: 340 case MISCOMPARE: 341 return SUCCESS; 342 343 case MEDIUM_ERROR: 344 return NEEDS_RETRY; 345 346 case HARDWARE_ERROR: 347 if (scmd->device->retry_hwerror) 348 return NEEDS_RETRY; 349 else 350 return SUCCESS; 351 352 case ILLEGAL_REQUEST: 353 case BLANK_CHECK: 354 case DATA_PROTECT: 355 default: 356 return SUCCESS; 357 } 358 } 359 360 /** 361 * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. 362 * @scmd: SCSI cmd to examine. 363 * 364 * Notes: 365 * This is *only* called when we are examining the status of commands 366 * queued during error recovery. the main difference here is that we 367 * don't allow for the possibility of retries here, and we are a lot 368 * more restrictive about what we consider acceptable. 369 **/ 370 static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) 371 { 372 /* 373 * first check the host byte, to see if there is anything in there 374 * that would indicate what we need to do. 375 */ 376 if (host_byte(scmd->result) == DID_RESET) { 377 /* 378 * rats. we are already in the error handler, so we now 379 * get to try and figure out what to do next. if the sense 380 * is valid, we have a pretty good idea of what to do. 381 * if not, we mark it as FAILED. 382 */ 383 return scsi_check_sense(scmd); 384 } 385 if (host_byte(scmd->result) != DID_OK) 386 return FAILED; 387 388 /* 389 * next, check the message byte. 390 */ 391 if (msg_byte(scmd->result) != COMMAND_COMPLETE) 392 return FAILED; 393 394 /* 395 * now, check the status byte to see if this indicates 396 * anything special. 397 */ 398 switch (status_byte(scmd->result)) { 399 case GOOD: 400 case COMMAND_TERMINATED: 401 return SUCCESS; 402 case CHECK_CONDITION: 403 return scsi_check_sense(scmd); 404 case CONDITION_GOOD: 405 case INTERMEDIATE_GOOD: 406 case INTERMEDIATE_C_GOOD: 407 /* 408 * who knows? FIXME(eric) 409 */ 410 return SUCCESS; 411 case BUSY: 412 case QUEUE_FULL: 413 case RESERVATION_CONFLICT: 414 default: 415 return FAILED; 416 } 417 return FAILED; 418 } 419 420 /** 421 * scsi_eh_times_out - timeout function for error handling. 422 * @scmd: Cmd that is timing out. 423 * 424 * Notes: 425 * During error handling, the kernel thread will be sleeping waiting 426 * for some action to complete on the device. our only job is to 427 * record that it timed out, and to wake up the thread. 428 **/ 429 static void scsi_eh_times_out(struct scsi_cmnd *scmd) 430 { 431 scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT; 432 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__, 433 scmd)); 434 435 up(scmd->device->host->eh_action); 436 } 437 438 /** 439 * scsi_eh_done - Completion function for error handling. 440 * @scmd: Cmd that is done. 441 **/ 442 static void scsi_eh_done(struct scsi_cmnd *scmd) 443 { 444 /* 445 * if the timeout handler is already running, then just set the 446 * flag which says we finished late, and return. we have no 447 * way of stopping the timeout handler from running, so we must 448 * always defer to it. 449 */ 450 if (del_timer(&scmd->eh_timeout)) { 451 scmd->request->rq_status = RQ_SCSI_DONE; 452 453 SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n", 454 __FUNCTION__, scmd, scmd->result)); 455 456 up(scmd->device->host->eh_action); 457 } 458 } 459 460 /** 461 * scsi_send_eh_cmnd - send a cmd to a device as part of error recovery. 462 * @scmd: SCSI Cmd to send. 463 * @timeout: Timeout for cmd. 464 * 465 * Notes: 466 * The initialization of the structures is quite a bit different in 467 * this case, and furthermore, there is a different completion handler 468 * vs scsi_dispatch_cmd. 469 * Return value: 470 * SUCCESS or FAILED or NEEDS_RETRY 471 **/ 472 static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) 473 { 474 struct scsi_device *sdev = scmd->device; 475 struct Scsi_Host *shost = sdev->host; 476 DECLARE_MUTEX_LOCKED(sem); 477 unsigned long flags; 478 int rtn = SUCCESS; 479 480 /* 481 * we will use a queued command if possible, otherwise we will 482 * emulate the queuing and calling of completion function ourselves. 483 */ 484 if (sdev->scsi_level <= SCSI_2) 485 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | 486 (sdev->lun << 5 & 0xe0); 487 488 scsi_add_timer(scmd, timeout, scsi_eh_times_out); 489 490 /* 491 * set up the semaphore so we wait for the command to complete. 492 */ 493 shost->eh_action = &sem; 494 scmd->request->rq_status = RQ_SCSI_BUSY; 495 496 spin_lock_irqsave(shost->host_lock, flags); 497 scsi_log_send(scmd); 498 shost->hostt->queuecommand(scmd, scsi_eh_done); 499 spin_unlock_irqrestore(shost->host_lock, flags); 500 501 down(&sem); 502 scsi_log_completion(scmd, SUCCESS); 503 504 shost->eh_action = NULL; 505 506 /* 507 * see if timeout. if so, tell the host to forget about it. 508 * in other words, we don't want a callback any more. 509 */ 510 if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) { 511 scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT; 512 513 /* 514 * as far as the low level driver is 515 * concerned, this command is still active, so 516 * we must give the low level driver a chance 517 * to abort it. (db) 518 * 519 * FIXME(eric) - we are not tracking whether we could 520 * abort a timed out command or not. not sure how 521 * we should treat them differently anyways. 522 */ 523 if (shost->hostt->eh_abort_handler) 524 shost->hostt->eh_abort_handler(scmd); 525 526 scmd->request->rq_status = RQ_SCSI_DONE; 527 rtn = FAILED; 528 } 529 530 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n", 531 __FUNCTION__, scmd, rtn)); 532 533 /* 534 * now examine the actual status codes to see whether the command 535 * actually did complete normally. 536 */ 537 if (rtn == SUCCESS) { 538 rtn = scsi_eh_completed_normally(scmd); 539 SCSI_LOG_ERROR_RECOVERY(3, 540 printk("%s: scsi_eh_completed_normally %x\n", 541 __FUNCTION__, rtn)); 542 switch (rtn) { 543 case SUCCESS: 544 case NEEDS_RETRY: 545 case FAILED: 546 break; 547 default: 548 rtn = FAILED; 549 break; 550 } 551 } 552 553 return rtn; 554 } 555 556 /** 557 * scsi_request_sense - Request sense data from a particular target. 558 * @scmd: SCSI cmd for request sense. 559 * 560 * Notes: 561 * Some hosts automatically obtain this information, others require 562 * that we obtain it on our own. This function will *not* return until 563 * the command either times out, or it completes. 564 **/ 565 static int scsi_request_sense(struct scsi_cmnd *scmd) 566 { 567 static unsigned char generic_sense[6] = 568 {REQUEST_SENSE, 0, 0, 0, 252, 0}; 569 unsigned char *scsi_result; 570 int saved_result; 571 int rtn; 572 573 memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense)); 574 575 scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0)); 576 577 578 if (unlikely(!scsi_result)) { 579 printk(KERN_ERR "%s: cannot allocate scsi_result.\n", 580 __FUNCTION__); 581 return FAILED; 582 } 583 584 /* 585 * zero the sense buffer. some host adapters automatically always 586 * request sense, so it is not a good idea that 587 * scmd->request_buffer and scmd->sense_buffer point to the same 588 * address (db). 0 is not a valid sense code. 589 */ 590 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); 591 memset(scsi_result, 0, 252); 592 593 saved_result = scmd->result; 594 scmd->request_buffer = scsi_result; 595 scmd->request_bufflen = 252; 596 scmd->use_sg = 0; 597 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); 598 scmd->sc_data_direction = DMA_FROM_DEVICE; 599 scmd->underflow = 0; 600 601 rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); 602 603 /* last chance to have valid sense data */ 604 if(!SCSI_SENSE_VALID(scmd)) { 605 memcpy(scmd->sense_buffer, scmd->request_buffer, 606 sizeof(scmd->sense_buffer)); 607 } 608 609 kfree(scsi_result); 610 611 /* 612 * when we eventually call scsi_finish, we really wish to complete 613 * the original request, so let's restore the original data. (db) 614 */ 615 scsi_setup_cmd_retry(scmd); 616 scmd->result = saved_result; 617 return rtn; 618 } 619 620 /** 621 * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. 622 * @scmd: Original SCSI cmd that eh has finished. 623 * @done_q: Queue for processed commands. 624 * 625 * Notes: 626 * We don't want to use the normal command completion while we are are 627 * still handling errors - it may cause other commands to be queued, 628 * and that would disturb what we are doing. thus we really want to 629 * keep a list of pending commands for final completion, and once we 630 * are ready to leave error handling we handle completion for real. 631 **/ 632 static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, 633 struct list_head *done_q) 634 { 635 scmd->device->host->host_failed--; 636 scmd->eh_eflags = 0; 637 638 /* 639 * set this back so that the upper level can correctly free up 640 * things. 641 */ 642 scsi_setup_cmd_retry(scmd); 643 list_move_tail(&scmd->eh_entry, done_q); 644 } 645 646 /** 647 * scsi_eh_get_sense - Get device sense data. 648 * @work_q: Queue of commands to process. 649 * @done_q: Queue of proccessed commands.. 650 * 651 * Description: 652 * See if we need to request sense information. if so, then get it 653 * now, so we have a better idea of what to do. 654 * 655 * Notes: 656 * This has the unfortunate side effect that if a shost adapter does 657 * not automatically request sense information, that we end up shutting 658 * it down before we request it. 659 * 660 * All drivers should request sense information internally these days, 661 * so for now all I have to say is tough noogies if you end up in here. 662 * 663 * XXX: Long term this code should go away, but that needs an audit of 664 * all LLDDs first. 665 **/ 666 static int scsi_eh_get_sense(struct list_head *work_q, 667 struct list_head *done_q) 668 { 669 struct scsi_cmnd *scmd, *next; 670 int rtn; 671 672 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 673 if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || 674 SCSI_SENSE_VALID(scmd)) 675 continue; 676 677 SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense" 678 " for id: %d\n", 679 current->comm, 680 scmd->device->id)); 681 rtn = scsi_request_sense(scmd); 682 if (rtn != SUCCESS) 683 continue; 684 685 SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p" 686 " result %x\n", scmd, 687 scmd->result)); 688 SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd)); 689 690 rtn = scsi_decide_disposition(scmd); 691 692 /* 693 * if the result was normal, then just pass it along to the 694 * upper level. 695 */ 696 if (rtn == SUCCESS) 697 /* we don't want this command reissued, just 698 * finished with the sense data, so set 699 * retries to the max allowed to ensure it 700 * won't get reissued */ 701 scmd->retries = scmd->allowed; 702 else if (rtn != NEEDS_RETRY) 703 continue; 704 705 scsi_eh_finish_cmd(scmd, done_q); 706 } 707 708 return list_empty(work_q); 709 } 710 711 /** 712 * scsi_try_to_abort_cmd - Ask host to abort a running command. 713 * @scmd: SCSI cmd to abort from Lower Level. 714 * 715 * Notes: 716 * This function will not return until the user's completion function 717 * has been called. there is no timeout on this operation. if the 718 * author of the low-level driver wishes this operation to be timed, 719 * they can provide this facility themselves. helper functions in 720 * scsi_error.c can be supplied to make this easier to do. 721 **/ 722 static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) 723 { 724 if (!scmd->device->host->hostt->eh_abort_handler) 725 return FAILED; 726 727 /* 728 * scsi_done was called just after the command timed out and before 729 * we had a chance to process it. (db) 730 */ 731 if (scmd->serial_number == 0) 732 return SUCCESS; 733 return scmd->device->host->hostt->eh_abort_handler(scmd); 734 } 735 736 /** 737 * scsi_eh_tur - Send TUR to device. 738 * @scmd: Scsi cmd to send TUR 739 * 740 * Return value: 741 * 0 - Device is ready. 1 - Device NOT ready. 742 **/ 743 static int scsi_eh_tur(struct scsi_cmnd *scmd) 744 { 745 static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; 746 int retry_cnt = 1, rtn; 747 int saved_result; 748 749 retry_tur: 750 memcpy(scmd->cmnd, tur_command, sizeof(tur_command)); 751 752 /* 753 * zero the sense buffer. the scsi spec mandates that any 754 * untransferred sense data should be interpreted as being zero. 755 */ 756 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); 757 758 saved_result = scmd->result; 759 scmd->request_buffer = NULL; 760 scmd->request_bufflen = 0; 761 scmd->use_sg = 0; 762 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); 763 scmd->underflow = 0; 764 scmd->sc_data_direction = DMA_NONE; 765 766 rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); 767 768 /* 769 * when we eventually call scsi_finish, we really wish to complete 770 * the original request, so let's restore the original data. (db) 771 */ 772 scsi_setup_cmd_retry(scmd); 773 scmd->result = saved_result; 774 775 /* 776 * hey, we are done. let's look to see what happened. 777 */ 778 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", 779 __FUNCTION__, scmd, rtn)); 780 if (rtn == SUCCESS) 781 return 0; 782 else if (rtn == NEEDS_RETRY) { 783 if (retry_cnt--) 784 goto retry_tur; 785 return 0; 786 } 787 return 1; 788 } 789 790 /** 791 * scsi_eh_abort_cmds - abort canceled commands. 792 * @shost: scsi host being recovered. 793 * @eh_done_q: list_head for processed commands. 794 * 795 * Decription: 796 * Try and see whether or not it makes sense to try and abort the 797 * running command. this only works out to be the case if we have one 798 * command that has timed out. if the command simply failed, it makes 799 * no sense to try and abort the command, since as far as the shost 800 * adapter is concerned, it isn't running. 801 **/ 802 static int scsi_eh_abort_cmds(struct list_head *work_q, 803 struct list_head *done_q) 804 { 805 struct scsi_cmnd *scmd, *next; 806 int rtn; 807 808 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 809 if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) 810 continue; 811 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" 812 "0x%p\n", current->comm, 813 scmd)); 814 rtn = scsi_try_to_abort_cmd(scmd); 815 if (rtn == SUCCESS) { 816 scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; 817 if (!scsi_device_online(scmd->device) || 818 !scsi_eh_tur(scmd)) { 819 scsi_eh_finish_cmd(scmd, done_q); 820 } 821 822 } else 823 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting" 824 " cmd failed:" 825 "0x%p\n", 826 current->comm, 827 scmd)); 828 } 829 830 return list_empty(work_q); 831 } 832 833 /** 834 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev 835 * @scmd: SCSI cmd used to send BDR 836 * 837 * Notes: 838 * There is no timeout for this operation. if this operation is 839 * unreliable for a given host, then the host itself needs to put a 840 * timer on it, and set the host back to a consistent state prior to 841 * returning. 842 **/ 843 static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) 844 { 845 int rtn; 846 847 if (!scmd->device->host->hostt->eh_device_reset_handler) 848 return FAILED; 849 850 rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd); 851 if (rtn == SUCCESS) { 852 scmd->device->was_reset = 1; 853 scmd->device->expecting_cc_ua = 1; 854 } 855 856 return rtn; 857 } 858 859 /** 860 * scsi_eh_try_stu - Send START_UNIT to device. 861 * @scmd: Scsi cmd to send START_UNIT 862 * 863 * Return value: 864 * 0 - Device is ready. 1 - Device NOT ready. 865 **/ 866 static int scsi_eh_try_stu(struct scsi_cmnd *scmd) 867 { 868 static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; 869 int rtn; 870 int saved_result; 871 872 if (!scmd->device->allow_restart) 873 return 1; 874 875 memcpy(scmd->cmnd, stu_command, sizeof(stu_command)); 876 877 /* 878 * zero the sense buffer. the scsi spec mandates that any 879 * untransferred sense data should be interpreted as being zero. 880 */ 881 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); 882 883 saved_result = scmd->result; 884 scmd->request_buffer = NULL; 885 scmd->request_bufflen = 0; 886 scmd->use_sg = 0; 887 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); 888 scmd->underflow = 0; 889 scmd->sc_data_direction = DMA_NONE; 890 891 rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT); 892 893 /* 894 * when we eventually call scsi_finish, we really wish to complete 895 * the original request, so let's restore the original data. (db) 896 */ 897 scsi_setup_cmd_retry(scmd); 898 scmd->result = saved_result; 899 900 /* 901 * hey, we are done. let's look to see what happened. 902 */ 903 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", 904 __FUNCTION__, scmd, rtn)); 905 if (rtn == SUCCESS) 906 return 0; 907 return 1; 908 } 909 910 /** 911 * scsi_eh_stu - send START_UNIT if needed 912 * @shost: scsi host being recovered. 913 * @eh_done_q: list_head for processed commands. 914 * 915 * Notes: 916 * If commands are failing due to not ready, initializing command required, 917 * try revalidating the device, which will end up sending a start unit. 918 **/ 919 static int scsi_eh_stu(struct Scsi_Host *shost, 920 struct list_head *work_q, 921 struct list_head *done_q) 922 { 923 struct scsi_cmnd *scmd, *stu_scmd, *next; 924 struct scsi_device *sdev; 925 926 shost_for_each_device(sdev, shost) { 927 stu_scmd = NULL; 928 list_for_each_entry(scmd, work_q, eh_entry) 929 if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && 930 scsi_check_sense(scmd) == FAILED ) { 931 stu_scmd = scmd; 932 break; 933 } 934 935 if (!stu_scmd) 936 continue; 937 938 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:" 939 " 0x%p\n", current->comm, sdev)); 940 941 if (!scsi_eh_try_stu(stu_scmd)) { 942 if (!scsi_device_online(sdev) || 943 !scsi_eh_tur(stu_scmd)) { 944 list_for_each_entry_safe(scmd, next, 945 work_q, eh_entry) { 946 if (scmd->device == sdev) 947 scsi_eh_finish_cmd(scmd, done_q); 948 } 949 } 950 } else { 951 SCSI_LOG_ERROR_RECOVERY(3, 952 printk("%s: START_UNIT failed to sdev:" 953 " 0x%p\n", current->comm, sdev)); 954 } 955 } 956 957 return list_empty(work_q); 958 } 959 960 961 /** 962 * scsi_eh_bus_device_reset - send bdr if needed 963 * @shost: scsi host being recovered. 964 * @eh_done_q: list_head for processed commands. 965 * 966 * Notes: 967 * Try a bus device reset. still, look to see whether we have multiple 968 * devices that are jammed or not - if we have multiple devices, it 969 * makes no sense to try bus_device_reset - we really would need to try 970 * a bus_reset instead. 971 **/ 972 static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, 973 struct list_head *work_q, 974 struct list_head *done_q) 975 { 976 struct scsi_cmnd *scmd, *bdr_scmd, *next; 977 struct scsi_device *sdev; 978 int rtn; 979 980 shost_for_each_device(sdev, shost) { 981 bdr_scmd = NULL; 982 list_for_each_entry(scmd, work_q, eh_entry) 983 if (scmd->device == sdev) { 984 bdr_scmd = scmd; 985 break; 986 } 987 988 if (!bdr_scmd) 989 continue; 990 991 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:" 992 " 0x%p\n", current->comm, 993 sdev)); 994 rtn = scsi_try_bus_device_reset(bdr_scmd); 995 if (rtn == SUCCESS) { 996 if (!scsi_device_online(sdev) || 997 !scsi_eh_tur(bdr_scmd)) { 998 list_for_each_entry_safe(scmd, next, 999 work_q, eh_entry) { 1000 if (scmd->device == sdev) 1001 scsi_eh_finish_cmd(scmd, 1002 done_q); 1003 } 1004 } 1005 } else { 1006 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR" 1007 " failed sdev:" 1008 "0x%p\n", 1009 current->comm, 1010 sdev)); 1011 } 1012 } 1013 1014 return list_empty(work_q); 1015 } 1016 1017 /** 1018 * scsi_try_bus_reset - ask host to perform a bus reset 1019 * @scmd: SCSI cmd to send bus reset. 1020 **/ 1021 static int scsi_try_bus_reset(struct scsi_cmnd *scmd) 1022 { 1023 unsigned long flags; 1024 int rtn; 1025 1026 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n", 1027 __FUNCTION__)); 1028 1029 if (!scmd->device->host->hostt->eh_bus_reset_handler) 1030 return FAILED; 1031 1032 rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd); 1033 1034 if (rtn == SUCCESS) { 1035 if (!scmd->device->host->hostt->skip_settle_delay) 1036 ssleep(BUS_RESET_SETTLE_TIME); 1037 spin_lock_irqsave(scmd->device->host->host_lock, flags); 1038 scsi_report_bus_reset(scmd->device->host, scmd->device->channel); 1039 spin_unlock_irqrestore(scmd->device->host->host_lock, flags); 1040 } 1041 1042 return rtn; 1043 } 1044 1045 /** 1046 * scsi_try_host_reset - ask host adapter to reset itself 1047 * @scmd: SCSI cmd to send hsot reset. 1048 **/ 1049 static int scsi_try_host_reset(struct scsi_cmnd *scmd) 1050 { 1051 unsigned long flags; 1052 int rtn; 1053 1054 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", 1055 __FUNCTION__)); 1056 1057 if (!scmd->device->host->hostt->eh_host_reset_handler) 1058 return FAILED; 1059 1060 rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd); 1061 1062 if (rtn == SUCCESS) { 1063 if (!scmd->device->host->hostt->skip_settle_delay) 1064 ssleep(HOST_RESET_SETTLE_TIME); 1065 spin_lock_irqsave(scmd->device->host->host_lock, flags); 1066 scsi_report_bus_reset(scmd->device->host, scmd->device->channel); 1067 spin_unlock_irqrestore(scmd->device->host->host_lock, flags); 1068 } 1069 1070 return rtn; 1071 } 1072 1073 /** 1074 * scsi_eh_bus_reset - send a bus reset 1075 * @shost: scsi host being recovered. 1076 * @eh_done_q: list_head for processed commands. 1077 **/ 1078 static int scsi_eh_bus_reset(struct Scsi_Host *shost, 1079 struct list_head *work_q, 1080 struct list_head *done_q) 1081 { 1082 struct scsi_cmnd *scmd, *chan_scmd, *next; 1083 unsigned int channel; 1084 int rtn; 1085 1086 /* 1087 * we really want to loop over the various channels, and do this on 1088 * a channel by channel basis. we should also check to see if any 1089 * of the failed commands are on soft_reset devices, and if so, skip 1090 * the reset. 1091 */ 1092 1093 for (channel = 0; channel <= shost->max_channel; channel++) { 1094 chan_scmd = NULL; 1095 list_for_each_entry(scmd, work_q, eh_entry) { 1096 if (channel == scmd->device->channel) { 1097 chan_scmd = scmd; 1098 break; 1099 /* 1100 * FIXME add back in some support for 1101 * soft_reset devices. 1102 */ 1103 } 1104 } 1105 1106 if (!chan_scmd) 1107 continue; 1108 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:" 1109 " %d\n", current->comm, 1110 channel)); 1111 rtn = scsi_try_bus_reset(chan_scmd); 1112 if (rtn == SUCCESS) { 1113 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1114 if (channel == scmd->device->channel) 1115 if (!scsi_device_online(scmd->device) || 1116 !scsi_eh_tur(scmd)) 1117 scsi_eh_finish_cmd(scmd, 1118 done_q); 1119 } 1120 } else { 1121 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST" 1122 " failed chan: %d\n", 1123 current->comm, 1124 channel)); 1125 } 1126 } 1127 return list_empty(work_q); 1128 } 1129 1130 /** 1131 * scsi_eh_host_reset - send a host reset 1132 * @work_q: list_head for processed commands. 1133 * @done_q: list_head for processed commands. 1134 **/ 1135 static int scsi_eh_host_reset(struct list_head *work_q, 1136 struct list_head *done_q) 1137 { 1138 struct scsi_cmnd *scmd, *next; 1139 int rtn; 1140 1141 if (!list_empty(work_q)) { 1142 scmd = list_entry(work_q->next, 1143 struct scsi_cmnd, eh_entry); 1144 1145 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n" 1146 , current->comm)); 1147 1148 rtn = scsi_try_host_reset(scmd); 1149 if (rtn == SUCCESS) { 1150 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1151 if (!scsi_device_online(scmd->device) || 1152 (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || 1153 !scsi_eh_tur(scmd)) 1154 scsi_eh_finish_cmd(scmd, done_q); 1155 } 1156 } else { 1157 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST" 1158 " failed\n", 1159 current->comm)); 1160 } 1161 } 1162 return list_empty(work_q); 1163 } 1164 1165 /** 1166 * scsi_eh_offline_sdevs - offline scsi devices that fail to recover 1167 * @work_q: list_head for processed commands. 1168 * @done_q: list_head for processed commands. 1169 * 1170 **/ 1171 static void scsi_eh_offline_sdevs(struct list_head *work_q, 1172 struct list_head *done_q) 1173 { 1174 struct scsi_cmnd *scmd, *next; 1175 1176 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1177 printk(KERN_INFO "scsi: Device offlined - not" 1178 " ready after error recovery: host" 1179 " %d channel %d id %d lun %d\n", 1180 scmd->device->host->host_no, 1181 scmd->device->channel, 1182 scmd->device->id, 1183 scmd->device->lun); 1184 scsi_device_set_state(scmd->device, SDEV_OFFLINE); 1185 if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) { 1186 /* 1187 * FIXME: Handle lost cmds. 1188 */ 1189 } 1190 scsi_eh_finish_cmd(scmd, done_q); 1191 } 1192 return; 1193 } 1194 1195 /** 1196 * scsi_decide_disposition - Disposition a cmd on return from LLD. 1197 * @scmd: SCSI cmd to examine. 1198 * 1199 * Notes: 1200 * This is *only* called when we are examining the status after sending 1201 * out the actual data command. any commands that are queued for error 1202 * recovery (e.g. test_unit_ready) do *not* come through here. 1203 * 1204 * When this routine returns failed, it means the error handler thread 1205 * is woken. In cases where the error code indicates an error that 1206 * doesn't require the error handler read (i.e. we don't need to 1207 * abort/reset), this function should return SUCCESS. 1208 **/ 1209 int scsi_decide_disposition(struct scsi_cmnd *scmd) 1210 { 1211 int rtn; 1212 1213 /* 1214 * if the device is offline, then we clearly just pass the result back 1215 * up to the top level. 1216 */ 1217 if (!scsi_device_online(scmd->device)) { 1218 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report" 1219 " as SUCCESS\n", 1220 __FUNCTION__)); 1221 return SUCCESS; 1222 } 1223 1224 /* 1225 * first check the host byte, to see if there is anything in there 1226 * that would indicate what we need to do. 1227 */ 1228 switch (host_byte(scmd->result)) { 1229 case DID_PASSTHROUGH: 1230 /* 1231 * no matter what, pass this through to the upper layer. 1232 * nuke this special code so that it looks like we are saying 1233 * did_ok. 1234 */ 1235 scmd->result &= 0xff00ffff; 1236 return SUCCESS; 1237 case DID_OK: 1238 /* 1239 * looks good. drop through, and check the next byte. 1240 */ 1241 break; 1242 case DID_NO_CONNECT: 1243 case DID_BAD_TARGET: 1244 case DID_ABORT: 1245 /* 1246 * note - this means that we just report the status back 1247 * to the top level driver, not that we actually think 1248 * that it indicates SUCCESS. 1249 */ 1250 return SUCCESS; 1251 /* 1252 * when the low level driver returns did_soft_error, 1253 * it is responsible for keeping an internal retry counter 1254 * in order to avoid endless loops (db) 1255 * 1256 * actually this is a bug in this function here. we should 1257 * be mindful of the maximum number of retries specified 1258 * and not get stuck in a loop. 1259 */ 1260 case DID_SOFT_ERROR: 1261 goto maybe_retry; 1262 case DID_IMM_RETRY: 1263 return NEEDS_RETRY; 1264 1265 case DID_REQUEUE: 1266 return ADD_TO_MLQUEUE; 1267 1268 case DID_ERROR: 1269 if (msg_byte(scmd->result) == COMMAND_COMPLETE && 1270 status_byte(scmd->result) == RESERVATION_CONFLICT) 1271 /* 1272 * execute reservation conflict processing code 1273 * lower down 1274 */ 1275 break; 1276 /* fallthrough */ 1277 1278 case DID_BUS_BUSY: 1279 case DID_PARITY: 1280 goto maybe_retry; 1281 case DID_TIME_OUT: 1282 /* 1283 * when we scan the bus, we get timeout messages for 1284 * these commands if there is no device available. 1285 * other hosts report did_no_connect for the same thing. 1286 */ 1287 if ((scmd->cmnd[0] == TEST_UNIT_READY || 1288 scmd->cmnd[0] == INQUIRY)) { 1289 return SUCCESS; 1290 } else { 1291 return FAILED; 1292 } 1293 case DID_RESET: 1294 return SUCCESS; 1295 default: 1296 return FAILED; 1297 } 1298 1299 /* 1300 * next, check the message byte. 1301 */ 1302 if (msg_byte(scmd->result) != COMMAND_COMPLETE) 1303 return FAILED; 1304 1305 /* 1306 * check the status byte to see if this indicates anything special. 1307 */ 1308 switch (status_byte(scmd->result)) { 1309 case QUEUE_FULL: 1310 /* 1311 * the case of trying to send too many commands to a 1312 * tagged queueing device. 1313 */ 1314 case BUSY: 1315 /* 1316 * device can't talk to us at the moment. Should only 1317 * occur (SAM-3) when the task queue is empty, so will cause 1318 * the empty queue handling to trigger a stall in the 1319 * device. 1320 */ 1321 return ADD_TO_MLQUEUE; 1322 case GOOD: 1323 case COMMAND_TERMINATED: 1324 case TASK_ABORTED: 1325 return SUCCESS; 1326 case CHECK_CONDITION: 1327 rtn = scsi_check_sense(scmd); 1328 if (rtn == NEEDS_RETRY) 1329 goto maybe_retry; 1330 /* if rtn == FAILED, we have no sense information; 1331 * returning FAILED will wake the error handler thread 1332 * to collect the sense and redo the decide 1333 * disposition */ 1334 return rtn; 1335 case CONDITION_GOOD: 1336 case INTERMEDIATE_GOOD: 1337 case INTERMEDIATE_C_GOOD: 1338 case ACA_ACTIVE: 1339 /* 1340 * who knows? FIXME(eric) 1341 */ 1342 return SUCCESS; 1343 1344 case RESERVATION_CONFLICT: 1345 printk(KERN_INFO "scsi: reservation conflict: host" 1346 " %d channel %d id %d lun %d\n", 1347 scmd->device->host->host_no, scmd->device->channel, 1348 scmd->device->id, scmd->device->lun); 1349 return SUCCESS; /* causes immediate i/o error */ 1350 default: 1351 return FAILED; 1352 } 1353 return FAILED; 1354 1355 maybe_retry: 1356 1357 /* we requeue for retry because the error was retryable, and 1358 * the request was not marked fast fail. Note that above, 1359 * even if the request is marked fast fail, we still requeue 1360 * for queue congestion conditions (QUEUE_FULL or BUSY) */ 1361 if ((++scmd->retries) < scmd->allowed 1362 && !blk_noretry_request(scmd->request)) { 1363 return NEEDS_RETRY; 1364 } else { 1365 /* 1366 * no more retries - report this one back to upper level. 1367 */ 1368 return SUCCESS; 1369 } 1370 } 1371 1372 /** 1373 * scsi_eh_lock_done - done function for eh door lock request 1374 * @scmd: SCSI command block for the door lock request 1375 * 1376 * Notes: 1377 * We completed the asynchronous door lock request, and it has either 1378 * locked the door or failed. We must free the command structures 1379 * associated with this request. 1380 **/ 1381 static void scsi_eh_lock_done(struct scsi_cmnd *scmd) 1382 { 1383 struct scsi_request *sreq = scmd->sc_request; 1384 1385 scsi_release_request(sreq); 1386 } 1387 1388 1389 /** 1390 * scsi_eh_lock_door - Prevent medium removal for the specified device 1391 * @sdev: SCSI device to prevent medium removal 1392 * 1393 * Locking: 1394 * We must be called from process context; scsi_allocate_request() 1395 * may sleep. 1396 * 1397 * Notes: 1398 * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the 1399 * head of the devices request queue, and continue. 1400 * 1401 * Bugs: 1402 * scsi_allocate_request() may sleep waiting for existing requests to 1403 * be processed. However, since we haven't kicked off any request 1404 * processing for this host, this may deadlock. 1405 * 1406 * If scsi_allocate_request() fails for what ever reason, we 1407 * completely forget to lock the door. 1408 **/ 1409 static void scsi_eh_lock_door(struct scsi_device *sdev) 1410 { 1411 struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL); 1412 1413 if (unlikely(!sreq)) { 1414 printk(KERN_ERR "%s: request allocate failed," 1415 "prevent media removal cmd not sent\n", __FUNCTION__); 1416 return; 1417 } 1418 1419 sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL; 1420 sreq->sr_cmnd[1] = 0; 1421 sreq->sr_cmnd[2] = 0; 1422 sreq->sr_cmnd[3] = 0; 1423 sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT; 1424 sreq->sr_cmnd[5] = 0; 1425 sreq->sr_data_direction = DMA_NONE; 1426 sreq->sr_bufflen = 0; 1427 sreq->sr_buffer = NULL; 1428 sreq->sr_allowed = 5; 1429 sreq->sr_done = scsi_eh_lock_done; 1430 sreq->sr_timeout_per_command = 10 * HZ; 1431 sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]); 1432 1433 scsi_insert_special_req(sreq, 1); 1434 } 1435 1436 1437 /** 1438 * scsi_restart_operations - restart io operations to the specified host. 1439 * @shost: Host we are restarting. 1440 * 1441 * Notes: 1442 * When we entered the error handler, we blocked all further i/o to 1443 * this device. we need to 'reverse' this process. 1444 **/ 1445 static void scsi_restart_operations(struct Scsi_Host *shost) 1446 { 1447 struct scsi_device *sdev; 1448 unsigned long flags; 1449 1450 /* 1451 * If the door was locked, we need to insert a door lock request 1452 * onto the head of the SCSI request queue for the device. There 1453 * is no point trying to lock the door of an off-line device. 1454 */ 1455 shost_for_each_device(sdev, shost) { 1456 if (scsi_device_online(sdev) && sdev->locked) 1457 scsi_eh_lock_door(sdev); 1458 } 1459 1460 /* 1461 * next free up anything directly waiting upon the host. this 1462 * will be requests for character device operations, and also for 1463 * ioctls to queued block devices. 1464 */ 1465 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", 1466 __FUNCTION__)); 1467 1468 spin_lock_irqsave(shost->host_lock, flags); 1469 if (scsi_host_set_state(shost, SHOST_RUNNING)) 1470 if (scsi_host_set_state(shost, SHOST_CANCEL)) 1471 BUG_ON(scsi_host_set_state(shost, SHOST_DEL)); 1472 spin_unlock_irqrestore(shost->host_lock, flags); 1473 1474 wake_up(&shost->host_wait); 1475 1476 /* 1477 * finally we need to re-initiate requests that may be pending. we will 1478 * have had everything blocked while error handling is taking place, and 1479 * now that error recovery is done, we will need to ensure that these 1480 * requests are started. 1481 */ 1482 scsi_run_host_queues(shost); 1483 } 1484 1485 /** 1486 * scsi_eh_ready_devs - check device ready state and recover if not. 1487 * @shost: host to be recovered. 1488 * @eh_done_q: list_head for processed commands. 1489 * 1490 **/ 1491 static void scsi_eh_ready_devs(struct Scsi_Host *shost, 1492 struct list_head *work_q, 1493 struct list_head *done_q) 1494 { 1495 if (!scsi_eh_stu(shost, work_q, done_q)) 1496 if (!scsi_eh_bus_device_reset(shost, work_q, done_q)) 1497 if (!scsi_eh_bus_reset(shost, work_q, done_q)) 1498 if (!scsi_eh_host_reset(work_q, done_q)) 1499 scsi_eh_offline_sdevs(work_q, done_q); 1500 } 1501 1502 /** 1503 * scsi_eh_flush_done_q - finish processed commands or retry them. 1504 * @done_q: list_head of processed commands. 1505 * 1506 **/ 1507 static void scsi_eh_flush_done_q(struct list_head *done_q) 1508 { 1509 struct scsi_cmnd *scmd, *next; 1510 1511 list_for_each_entry_safe(scmd, next, done_q, eh_entry) { 1512 list_del_init(&scmd->eh_entry); 1513 if (scsi_device_online(scmd->device) && 1514 !blk_noretry_request(scmd->request) && 1515 (++scmd->retries < scmd->allowed)) { 1516 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush" 1517 " retry cmd: %p\n", 1518 current->comm, 1519 scmd)); 1520 scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); 1521 } else { 1522 /* 1523 * If just we got sense for the device (called 1524 * scsi_eh_get_sense), scmd->result is already 1525 * set, do not set DRIVER_TIMEOUT. 1526 */ 1527 if (!scmd->result) 1528 scmd->result |= (DRIVER_TIMEOUT << 24); 1529 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish" 1530 " cmd: %p\n", 1531 current->comm, scmd)); 1532 scsi_finish_command(scmd); 1533 } 1534 } 1535 } 1536 1537 /** 1538 * scsi_unjam_host - Attempt to fix a host which has a cmd that failed. 1539 * @shost: Host to unjam. 1540 * 1541 * Notes: 1542 * When we come in here, we *know* that all commands on the bus have 1543 * either completed, failed or timed out. we also know that no further 1544 * commands are being sent to the host, so things are relatively quiet 1545 * and we have freedom to fiddle with things as we wish. 1546 * 1547 * This is only the *default* implementation. it is possible for 1548 * individual drivers to supply their own version of this function, and 1549 * if the maintainer wishes to do this, it is strongly suggested that 1550 * this function be taken as a template and modified. this function 1551 * was designed to correctly handle problems for about 95% of the 1552 * different cases out there, and it should always provide at least a 1553 * reasonable amount of error recovery. 1554 * 1555 * Any command marked 'failed' or 'timeout' must eventually have 1556 * scsi_finish_cmd() called for it. we do all of the retry stuff 1557 * here, so when we restart the host after we return it should have an 1558 * empty queue. 1559 **/ 1560 static void scsi_unjam_host(struct Scsi_Host *shost) 1561 { 1562 unsigned long flags; 1563 LIST_HEAD(eh_work_q); 1564 LIST_HEAD(eh_done_q); 1565 1566 spin_lock_irqsave(shost->host_lock, flags); 1567 list_splice_init(&shost->eh_cmd_q, &eh_work_q); 1568 spin_unlock_irqrestore(shost->host_lock, flags); 1569 1570 SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q)); 1571 1572 if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q)) 1573 if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q)) 1574 scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); 1575 1576 scsi_eh_flush_done_q(&eh_done_q); 1577 } 1578 1579 /** 1580 * scsi_error_handler - Handle errors/timeouts of SCSI cmds. 1581 * @data: Host for which we are running. 1582 * 1583 * Notes: 1584 * This is always run in the context of a kernel thread. The idea is 1585 * that we start this thing up when the kernel starts up (one per host 1586 * that we detect), and it immediately goes to sleep and waits for some 1587 * event (i.e. failure). When this takes place, we have the job of 1588 * trying to unjam the bus and restarting things. 1589 **/ 1590 int scsi_error_handler(void *data) 1591 { 1592 struct Scsi_Host *shost = (struct Scsi_Host *) data; 1593 int rtn; 1594 1595 current->flags |= PF_NOFREEZE; 1596 1597 1598 /* 1599 * Note - we always use TASK_INTERRUPTIBLE even if the module 1600 * was loaded as part of the kernel. The reason is that 1601 * UNINTERRUPTIBLE would cause this thread to be counted in 1602 * the load average as a running process, and an interruptible 1603 * wait doesn't. 1604 */ 1605 set_current_state(TASK_INTERRUPTIBLE); 1606 while (!kthread_should_stop()) { 1607 if (shost->host_failed == 0 || 1608 shost->host_failed != shost->host_busy) { 1609 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" 1610 " scsi_eh_%d" 1611 " sleeping\n", 1612 shost->host_no)); 1613 schedule(); 1614 set_current_state(TASK_INTERRUPTIBLE); 1615 continue; 1616 } 1617 1618 __set_current_state(TASK_RUNNING); 1619 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" 1620 " scsi_eh_%d waking" 1621 " up\n",shost->host_no)); 1622 1623 shost->eh_active = 1; 1624 1625 /* 1626 * We have a host that is failing for some reason. Figure out 1627 * what we need to do to get it up and online again (if we can). 1628 * If we fail, we end up taking the thing offline. 1629 */ 1630 if (shost->hostt->eh_strategy_handler) 1631 rtn = shost->hostt->eh_strategy_handler(shost); 1632 else 1633 scsi_unjam_host(shost); 1634 1635 shost->eh_active = 0; 1636 1637 /* 1638 * Note - if the above fails completely, the action is to take 1639 * individual devices offline and flush the queue of any 1640 * outstanding requests that may have been pending. When we 1641 * restart, we restart any I/O to any other devices on the bus 1642 * which are still online. 1643 */ 1644 scsi_restart_operations(shost); 1645 set_current_state(TASK_INTERRUPTIBLE); 1646 } 1647 1648 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" 1649 " exiting\n",shost->host_no)); 1650 1651 /* 1652 * Make sure that nobody tries to wake us up again. 1653 */ 1654 shost->ehandler = NULL; 1655 return 0; 1656 } 1657 1658 /* 1659 * Function: scsi_report_bus_reset() 1660 * 1661 * Purpose: Utility function used by low-level drivers to report that 1662 * they have observed a bus reset on the bus being handled. 1663 * 1664 * Arguments: shost - Host in question 1665 * channel - channel on which reset was observed. 1666 * 1667 * Returns: Nothing 1668 * 1669 * Lock status: Host lock must be held. 1670 * 1671 * Notes: This only needs to be called if the reset is one which 1672 * originates from an unknown location. Resets originated 1673 * by the mid-level itself don't need to call this, but there 1674 * should be no harm. 1675 * 1676 * The main purpose of this is to make sure that a CHECK_CONDITION 1677 * is properly treated. 1678 */ 1679 void scsi_report_bus_reset(struct Scsi_Host *shost, int channel) 1680 { 1681 struct scsi_device *sdev; 1682 1683 __shost_for_each_device(sdev, shost) { 1684 if (channel == sdev->channel) { 1685 sdev->was_reset = 1; 1686 sdev->expecting_cc_ua = 1; 1687 } 1688 } 1689 } 1690 EXPORT_SYMBOL(scsi_report_bus_reset); 1691 1692 /* 1693 * Function: scsi_report_device_reset() 1694 * 1695 * Purpose: Utility function used by low-level drivers to report that 1696 * they have observed a device reset on the device being handled. 1697 * 1698 * Arguments: shost - Host in question 1699 * channel - channel on which reset was observed 1700 * target - target on which reset was observed 1701 * 1702 * Returns: Nothing 1703 * 1704 * Lock status: Host lock must be held 1705 * 1706 * Notes: This only needs to be called if the reset is one which 1707 * originates from an unknown location. Resets originated 1708 * by the mid-level itself don't need to call this, but there 1709 * should be no harm. 1710 * 1711 * The main purpose of this is to make sure that a CHECK_CONDITION 1712 * is properly treated. 1713 */ 1714 void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target) 1715 { 1716 struct scsi_device *sdev; 1717 1718 __shost_for_each_device(sdev, shost) { 1719 if (channel == sdev->channel && 1720 target == sdev->id) { 1721 sdev->was_reset = 1; 1722 sdev->expecting_cc_ua = 1; 1723 } 1724 } 1725 } 1726 EXPORT_SYMBOL(scsi_report_device_reset); 1727 1728 static void 1729 scsi_reset_provider_done_command(struct scsi_cmnd *scmd) 1730 { 1731 } 1732 1733 /* 1734 * Function: scsi_reset_provider 1735 * 1736 * Purpose: Send requested reset to a bus or device at any phase. 1737 * 1738 * Arguments: device - device to send reset to 1739 * flag - reset type (see scsi.h) 1740 * 1741 * Returns: SUCCESS/FAILURE. 1742 * 1743 * Notes: This is used by the SCSI Generic driver to provide 1744 * Bus/Device reset capability. 1745 */ 1746 int 1747 scsi_reset_provider(struct scsi_device *dev, int flag) 1748 { 1749 struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL); 1750 struct request req; 1751 int rtn; 1752 1753 scmd->request = &req; 1754 memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); 1755 scmd->request->rq_status = RQ_SCSI_BUSY; 1756 1757 memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd)); 1758 1759 scmd->scsi_done = scsi_reset_provider_done_command; 1760 scmd->done = NULL; 1761 scmd->buffer = NULL; 1762 scmd->bufflen = 0; 1763 scmd->request_buffer = NULL; 1764 scmd->request_bufflen = 0; 1765 1766 scmd->cmd_len = 0; 1767 1768 scmd->sc_data_direction = DMA_BIDIRECTIONAL; 1769 scmd->sc_request = NULL; 1770 scmd->sc_magic = SCSI_CMND_MAGIC; 1771 1772 init_timer(&scmd->eh_timeout); 1773 1774 /* 1775 * Sometimes the command can get back into the timer chain, 1776 * so use the pid as an identifier. 1777 */ 1778 scmd->pid = 0; 1779 1780 switch (flag) { 1781 case SCSI_TRY_RESET_DEVICE: 1782 rtn = scsi_try_bus_device_reset(scmd); 1783 if (rtn == SUCCESS) 1784 break; 1785 /* FALLTHROUGH */ 1786 case SCSI_TRY_RESET_BUS: 1787 rtn = scsi_try_bus_reset(scmd); 1788 if (rtn == SUCCESS) 1789 break; 1790 /* FALLTHROUGH */ 1791 case SCSI_TRY_RESET_HOST: 1792 rtn = scsi_try_host_reset(scmd); 1793 break; 1794 default: 1795 rtn = FAILED; 1796 } 1797 1798 scsi_next_command(scmd); 1799 return rtn; 1800 } 1801 EXPORT_SYMBOL(scsi_reset_provider); 1802 1803 /** 1804 * scsi_normalize_sense - normalize main elements from either fixed or 1805 * descriptor sense data format into a common format. 1806 * 1807 * @sense_buffer: byte array containing sense data returned by device 1808 * @sb_len: number of valid bytes in sense_buffer 1809 * @sshdr: pointer to instance of structure that common 1810 * elements are written to. 1811 * 1812 * Notes: 1813 * The "main elements" from sense data are: response_code, sense_key, 1814 * asc, ascq and additional_length (only for descriptor format). 1815 * 1816 * Typically this function can be called after a device has 1817 * responded to a SCSI command with the CHECK_CONDITION status. 1818 * 1819 * Return value: 1820 * 1 if valid sense data information found, else 0; 1821 **/ 1822 int scsi_normalize_sense(const u8 *sense_buffer, int sb_len, 1823 struct scsi_sense_hdr *sshdr) 1824 { 1825 if (!sense_buffer || !sb_len) 1826 return 0; 1827 1828 memset(sshdr, 0, sizeof(struct scsi_sense_hdr)); 1829 1830 sshdr->response_code = (sense_buffer[0] & 0x7f); 1831 1832 if (!scsi_sense_valid(sshdr)) 1833 return 0; 1834 1835 if (sshdr->response_code >= 0x72) { 1836 /* 1837 * descriptor format 1838 */ 1839 if (sb_len > 1) 1840 sshdr->sense_key = (sense_buffer[1] & 0xf); 1841 if (sb_len > 2) 1842 sshdr->asc = sense_buffer[2]; 1843 if (sb_len > 3) 1844 sshdr->ascq = sense_buffer[3]; 1845 if (sb_len > 7) 1846 sshdr->additional_length = sense_buffer[7]; 1847 } else { 1848 /* 1849 * fixed format 1850 */ 1851 if (sb_len > 2) 1852 sshdr->sense_key = (sense_buffer[2] & 0xf); 1853 if (sb_len > 7) { 1854 sb_len = (sb_len < (sense_buffer[7] + 8)) ? 1855 sb_len : (sense_buffer[7] + 8); 1856 if (sb_len > 12) 1857 sshdr->asc = sense_buffer[12]; 1858 if (sb_len > 13) 1859 sshdr->ascq = sense_buffer[13]; 1860 } 1861 } 1862 1863 return 1; 1864 } 1865 EXPORT_SYMBOL(scsi_normalize_sense); 1866 1867 int scsi_request_normalize_sense(struct scsi_request *sreq, 1868 struct scsi_sense_hdr *sshdr) 1869 { 1870 return scsi_normalize_sense(sreq->sr_sense_buffer, 1871 sizeof(sreq->sr_sense_buffer), sshdr); 1872 } 1873 EXPORT_SYMBOL(scsi_request_normalize_sense); 1874 1875 int scsi_command_normalize_sense(struct scsi_cmnd *cmd, 1876 struct scsi_sense_hdr *sshdr) 1877 { 1878 return scsi_normalize_sense(cmd->sense_buffer, 1879 sizeof(cmd->sense_buffer), sshdr); 1880 } 1881 EXPORT_SYMBOL(scsi_command_normalize_sense); 1882 1883 /** 1884 * scsi_sense_desc_find - search for a given descriptor type in 1885 * descriptor sense data format. 1886 * 1887 * @sense_buffer: byte array of descriptor format sense data 1888 * @sb_len: number of valid bytes in sense_buffer 1889 * @desc_type: value of descriptor type to find 1890 * (e.g. 0 -> information) 1891 * 1892 * Notes: 1893 * only valid when sense data is in descriptor format 1894 * 1895 * Return value: 1896 * pointer to start of (first) descriptor if found else NULL 1897 **/ 1898 const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, 1899 int desc_type) 1900 { 1901 int add_sen_len, add_len, desc_len, k; 1902 const u8 * descp; 1903 1904 if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7]))) 1905 return NULL; 1906 if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73)) 1907 return NULL; 1908 add_sen_len = (add_sen_len < (sb_len - 8)) ? 1909 add_sen_len : (sb_len - 8); 1910 descp = &sense_buffer[8]; 1911 for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) { 1912 descp += desc_len; 1913 add_len = (k < (add_sen_len - 1)) ? descp[1]: -1; 1914 desc_len = add_len + 2; 1915 if (descp[0] == desc_type) 1916 return descp; 1917 if (add_len < 0) // short descriptor ?? 1918 break; 1919 } 1920 return NULL; 1921 } 1922 EXPORT_SYMBOL(scsi_sense_desc_find); 1923 1924 /** 1925 * scsi_get_sense_info_fld - attempts to get information field from 1926 * sense data (either fixed or descriptor format) 1927 * 1928 * @sense_buffer: byte array of sense data 1929 * @sb_len: number of valid bytes in sense_buffer 1930 * @info_out: pointer to 64 integer where 8 or 4 byte information 1931 * field will be placed if found. 1932 * 1933 * Return value: 1934 * 1 if information field found, 0 if not found. 1935 **/ 1936 int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, 1937 u64 * info_out) 1938 { 1939 int j; 1940 const u8 * ucp; 1941 u64 ull; 1942 1943 if (sb_len < 7) 1944 return 0; 1945 switch (sense_buffer[0] & 0x7f) { 1946 case 0x70: 1947 case 0x71: 1948 if (sense_buffer[0] & 0x80) { 1949 *info_out = (sense_buffer[3] << 24) + 1950 (sense_buffer[4] << 16) + 1951 (sense_buffer[5] << 8) + sense_buffer[6]; 1952 return 1; 1953 } else 1954 return 0; 1955 case 0x72: 1956 case 0x73: 1957 ucp = scsi_sense_desc_find(sense_buffer, sb_len, 1958 0 /* info desc */); 1959 if (ucp && (0xa == ucp[1])) { 1960 ull = 0; 1961 for (j = 0; j < 8; ++j) { 1962 if (j > 0) 1963 ull <<= 8; 1964 ull |= ucp[4 + j]; 1965 } 1966 *info_out = ull; 1967 return 1; 1968 } else 1969 return 0; 1970 default: 1971 return 0; 1972 } 1973 } 1974 EXPORT_SYMBOL(scsi_get_sense_info_fld); 1975