1 /* 2 * scsi_error.c Copyright (C) 1997 Eric Youngdale 3 * 4 * SCSI error/timeout handling 5 * Initial versions: Eric Youngdale. Based upon conversations with 6 * Leonard Zubkoff and David Miller at Linux Expo, 7 * ideas originating from all over the place. 8 * 9 * Restructured scsi_unjam_host and associated functions. 10 * September 04, 2002 Mike Anderson (andmike@us.ibm.com) 11 * 12 * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and 13 * minor cleanups. 14 * September 30, 2002 Mike Anderson (andmike@us.ibm.com) 15 */ 16 17 #include <linux/module.h> 18 #include <linux/sched.h> 19 #include <linux/gfp.h> 20 #include <linux/timer.h> 21 #include <linux/string.h> 22 #include <linux/kernel.h> 23 #include <linux/freezer.h> 24 #include <linux/kthread.h> 25 #include <linux/interrupt.h> 26 #include <linux/blkdev.h> 27 #include <linux/delay.h> 28 #include <linux/jiffies.h> 29 30 #include <scsi/scsi.h> 31 #include <scsi/scsi_cmnd.h> 32 #include <scsi/scsi_dbg.h> 33 #include <scsi/scsi_device.h> 34 #include <scsi/scsi_driver.h> 35 #include <scsi/scsi_eh.h> 36 #include <scsi/scsi_common.h> 37 #include <scsi/scsi_transport.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_ioctl.h> 40 #include <scsi/scsi_dh.h> 41 #include <scsi/sg.h> 42 43 #include "scsi_priv.h" 44 #include "scsi_logging.h" 45 #include "scsi_transport_api.h" 46 47 #include <trace/events/scsi.h> 48 49 #include <asm/unaligned.h> 50 51 static void scsi_eh_done(struct scsi_cmnd *scmd); 52 53 /* 54 * These should *probably* be handled by the host itself. 55 * Since it is allowed to sleep, it probably should. 56 */ 57 #define BUS_RESET_SETTLE_TIME (10) 58 #define HOST_RESET_SETTLE_TIME (10) 59 60 static int scsi_eh_try_stu(struct scsi_cmnd *scmd); 61 static int scsi_try_to_abort_cmd(struct scsi_host_template *, 62 struct scsi_cmnd *); 63 64 void scsi_eh_wakeup(struct Scsi_Host *shost) 65 { 66 lockdep_assert_held(shost->host_lock); 67 68 if (atomic_read(&shost->host_busy) == shost->host_failed) { 69 trace_scsi_eh_wakeup(shost); 70 wake_up_process(shost->ehandler); 71 SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, 72 "Waking error handler thread\n")); 73 } 74 } 75 76 /** 77 * scsi_schedule_eh - schedule EH for SCSI host 78 * @shost: SCSI host to invoke error handling on. 79 * 80 * Schedule SCSI EH without scmd. 81 */ 82 void scsi_schedule_eh(struct Scsi_Host *shost) 83 { 84 unsigned long flags; 85 86 spin_lock_irqsave(shost->host_lock, flags); 87 88 if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || 89 scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { 90 shost->host_eh_scheduled++; 91 scsi_eh_wakeup(shost); 92 } 93 94 spin_unlock_irqrestore(shost->host_lock, flags); 95 } 96 EXPORT_SYMBOL_GPL(scsi_schedule_eh); 97 98 static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) 99 { 100 if (!shost->last_reset || shost->eh_deadline == -1) 101 return 0; 102 103 /* 104 * 32bit accesses are guaranteed to be atomic 105 * (on all supported architectures), so instead 106 * of using a spinlock we can as well double check 107 * if eh_deadline has been set to 'off' during the 108 * time_before call. 109 */ 110 if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && 111 shost->eh_deadline > -1) 112 return 0; 113 114 return 1; 115 } 116 117 /** 118 * scmd_eh_abort_handler - Handle command aborts 119 * @work: command to be aborted. 120 */ 121 void 122 scmd_eh_abort_handler(struct work_struct *work) 123 { 124 struct scsi_cmnd *scmd = 125 container_of(work, struct scsi_cmnd, abort_work.work); 126 struct scsi_device *sdev = scmd->device; 127 int rtn; 128 129 if (scsi_host_eh_past_deadline(sdev->host)) { 130 SCSI_LOG_ERROR_RECOVERY(3, 131 scmd_printk(KERN_INFO, scmd, 132 "eh timeout, not aborting\n")); 133 } else { 134 SCSI_LOG_ERROR_RECOVERY(3, 135 scmd_printk(KERN_INFO, scmd, 136 "aborting command\n")); 137 rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); 138 if (rtn == SUCCESS) { 139 set_host_byte(scmd, DID_TIME_OUT); 140 if (scsi_host_eh_past_deadline(sdev->host)) { 141 SCSI_LOG_ERROR_RECOVERY(3, 142 scmd_printk(KERN_INFO, scmd, 143 "eh timeout, not retrying " 144 "aborted command\n")); 145 } else if (!scsi_noretry_cmd(scmd) && 146 (++scmd->retries <= scmd->allowed)) { 147 SCSI_LOG_ERROR_RECOVERY(3, 148 scmd_printk(KERN_WARNING, scmd, 149 "retry aborted command\n")); 150 scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); 151 return; 152 } else { 153 SCSI_LOG_ERROR_RECOVERY(3, 154 scmd_printk(KERN_WARNING, scmd, 155 "finish aborted command\n")); 156 scsi_finish_command(scmd); 157 return; 158 } 159 } else { 160 SCSI_LOG_ERROR_RECOVERY(3, 161 scmd_printk(KERN_INFO, scmd, 162 "cmd abort %s\n", 163 (rtn == FAST_IO_FAIL) ? 164 "not send" : "failed")); 165 } 166 } 167 168 scsi_eh_scmd_add(scmd); 169 } 170 171 /** 172 * scsi_abort_command - schedule a command abort 173 * @scmd: scmd to abort. 174 * 175 * We only need to abort commands after a command timeout 176 */ 177 static int 178 scsi_abort_command(struct scsi_cmnd *scmd) 179 { 180 struct scsi_device *sdev = scmd->device; 181 struct Scsi_Host *shost = sdev->host; 182 unsigned long flags; 183 184 if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { 185 /* 186 * Retry after abort failed, escalate to next level. 187 */ 188 SCSI_LOG_ERROR_RECOVERY(3, 189 scmd_printk(KERN_INFO, scmd, 190 "previous abort failed\n")); 191 BUG_ON(delayed_work_pending(&scmd->abort_work)); 192 return FAILED; 193 } 194 195 spin_lock_irqsave(shost->host_lock, flags); 196 if (shost->eh_deadline != -1 && !shost->last_reset) 197 shost->last_reset = jiffies; 198 spin_unlock_irqrestore(shost->host_lock, flags); 199 200 scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; 201 SCSI_LOG_ERROR_RECOVERY(3, 202 scmd_printk(KERN_INFO, scmd, "abort scheduled\n")); 203 queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100); 204 return SUCCESS; 205 } 206 207 /** 208 * scsi_eh_reset - call into ->eh_action to reset internal counters 209 * @scmd: scmd to run eh on. 210 * 211 * The scsi driver might be carrying internal state about the 212 * devices, so we need to call into the driver to reset the 213 * internal state once the error handler is started. 214 */ 215 static void scsi_eh_reset(struct scsi_cmnd *scmd) 216 { 217 if (!blk_rq_is_passthrough(scmd->request)) { 218 struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); 219 if (sdrv->eh_reset) 220 sdrv->eh_reset(scmd); 221 } 222 } 223 224 static void scsi_eh_inc_host_failed(struct rcu_head *head) 225 { 226 struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); 227 struct Scsi_Host *shost = scmd->device->host; 228 unsigned long flags; 229 230 spin_lock_irqsave(shost->host_lock, flags); 231 shost->host_failed++; 232 scsi_eh_wakeup(shost); 233 spin_unlock_irqrestore(shost->host_lock, flags); 234 } 235 236 /** 237 * scsi_eh_scmd_add - add scsi cmd to error handling. 238 * @scmd: scmd to run eh on. 239 */ 240 void scsi_eh_scmd_add(struct scsi_cmnd *scmd) 241 { 242 struct Scsi_Host *shost = scmd->device->host; 243 unsigned long flags; 244 int ret; 245 246 WARN_ON_ONCE(!shost->ehandler); 247 248 spin_lock_irqsave(shost->host_lock, flags); 249 if (scsi_host_set_state(shost, SHOST_RECOVERY)) { 250 ret = scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY); 251 WARN_ON_ONCE(ret); 252 } 253 if (shost->eh_deadline != -1 && !shost->last_reset) 254 shost->last_reset = jiffies; 255 256 scsi_eh_reset(scmd); 257 list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); 258 spin_unlock_irqrestore(shost->host_lock, flags); 259 /* 260 * Ensure that all tasks observe the host state change before the 261 * host_failed change. 262 */ 263 call_rcu(&scmd->rcu, scsi_eh_inc_host_failed); 264 } 265 266 /** 267 * scsi_times_out - Timeout function for normal scsi commands. 268 * @req: request that is timing out. 269 * 270 * Notes: 271 * We do not need to lock this. There is the potential for a race 272 * only in that the normal completion handling might run, but if the 273 * normal completion function determines that the timer has already 274 * fired, then it mustn't do anything. 275 */ 276 enum blk_eh_timer_return scsi_times_out(struct request *req) 277 { 278 struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); 279 enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; 280 struct Scsi_Host *host = scmd->device->host; 281 282 trace_scsi_dispatch_cmd_timeout(scmd); 283 scsi_log_completion(scmd, TIMEOUT_ERROR); 284 285 if (host->eh_deadline != -1 && !host->last_reset) 286 host->last_reset = jiffies; 287 288 if (host->hostt->eh_timed_out) 289 rtn = host->hostt->eh_timed_out(scmd); 290 291 if (rtn == BLK_EH_NOT_HANDLED) { 292 if (scsi_abort_command(scmd) != SUCCESS) { 293 set_host_byte(scmd, DID_TIME_OUT); 294 scsi_eh_scmd_add(scmd); 295 } 296 } 297 298 return rtn; 299 } 300 301 /** 302 * scsi_block_when_processing_errors - Prevent cmds from being queued. 303 * @sdev: Device on which we are performing recovery. 304 * 305 * Description: 306 * We block until the host is out of error recovery, and then check to 307 * see whether the host or the device is offline. 308 * 309 * Return value: 310 * 0 when dev was taken offline by error recovery. 1 OK to proceed. 311 */ 312 int scsi_block_when_processing_errors(struct scsi_device *sdev) 313 { 314 int online; 315 316 wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); 317 318 online = scsi_device_online(sdev); 319 320 SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_INFO, sdev, 321 "%s: rtn: %d\n", __func__, online)); 322 323 return online; 324 } 325 EXPORT_SYMBOL(scsi_block_when_processing_errors); 326 327 #ifdef CONFIG_SCSI_LOGGING 328 /** 329 * scsi_eh_prt_fail_stats - Log info on failures. 330 * @shost: scsi host being recovered. 331 * @work_q: Queue of scsi cmds to process. 332 */ 333 static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, 334 struct list_head *work_q) 335 { 336 struct scsi_cmnd *scmd; 337 struct scsi_device *sdev; 338 int total_failures = 0; 339 int cmd_failed = 0; 340 int cmd_cancel = 0; 341 int devices_failed = 0; 342 343 shost_for_each_device(sdev, shost) { 344 list_for_each_entry(scmd, work_q, eh_entry) { 345 if (scmd->device == sdev) { 346 ++total_failures; 347 if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) 348 ++cmd_cancel; 349 else 350 ++cmd_failed; 351 } 352 } 353 354 if (cmd_cancel || cmd_failed) { 355 SCSI_LOG_ERROR_RECOVERY(3, 356 shost_printk(KERN_INFO, shost, 357 "%s: cmds failed: %d, cancel: %d\n", 358 __func__, cmd_failed, 359 cmd_cancel)); 360 cmd_cancel = 0; 361 cmd_failed = 0; 362 ++devices_failed; 363 } 364 } 365 366 SCSI_LOG_ERROR_RECOVERY(2, shost_printk(KERN_INFO, shost, 367 "Total of %d commands on %d" 368 " devices require eh work\n", 369 total_failures, devices_failed)); 370 } 371 #endif 372 373 /** 374 * scsi_report_lun_change - Set flag on all *other* devices on the same target 375 * to indicate that a UNIT ATTENTION is expected. 376 * @sdev: Device reporting the UNIT ATTENTION 377 */ 378 static void scsi_report_lun_change(struct scsi_device *sdev) 379 { 380 sdev->sdev_target->expecting_lun_change = 1; 381 } 382 383 /** 384 * scsi_report_sense - Examine scsi sense information and log messages for 385 * certain conditions, also issue uevents for some of them. 386 * @sdev: Device reporting the sense code 387 * @sshdr: sshdr to be examined 388 */ 389 static void scsi_report_sense(struct scsi_device *sdev, 390 struct scsi_sense_hdr *sshdr) 391 { 392 enum scsi_device_event evt_type = SDEV_EVT_MAXBITS; /* i.e. none */ 393 394 if (sshdr->sense_key == UNIT_ATTENTION) { 395 if (sshdr->asc == 0x3f && sshdr->ascq == 0x03) { 396 evt_type = SDEV_EVT_INQUIRY_CHANGE_REPORTED; 397 sdev_printk(KERN_WARNING, sdev, 398 "Inquiry data has changed"); 399 } else if (sshdr->asc == 0x3f && sshdr->ascq == 0x0e) { 400 evt_type = SDEV_EVT_LUN_CHANGE_REPORTED; 401 scsi_report_lun_change(sdev); 402 sdev_printk(KERN_WARNING, sdev, 403 "Warning! Received an indication that the " 404 "LUN assignments on this target have " 405 "changed. The Linux SCSI layer does not " 406 "automatically remap LUN assignments.\n"); 407 } else if (sshdr->asc == 0x3f) 408 sdev_printk(KERN_WARNING, sdev, 409 "Warning! Received an indication that the " 410 "operating parameters on this target have " 411 "changed. The Linux SCSI layer does not " 412 "automatically adjust these parameters.\n"); 413 414 if (sshdr->asc == 0x38 && sshdr->ascq == 0x07) { 415 evt_type = SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED; 416 sdev_printk(KERN_WARNING, sdev, 417 "Warning! Received an indication that the " 418 "LUN reached a thin provisioning soft " 419 "threshold.\n"); 420 } 421 422 if (sshdr->asc == 0x29) { 423 evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; 424 sdev_printk(KERN_WARNING, sdev, 425 "Power-on or device reset occurred\n"); 426 } 427 428 if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { 429 evt_type = SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED; 430 sdev_printk(KERN_WARNING, sdev, 431 "Mode parameters changed"); 432 } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x06) { 433 evt_type = SDEV_EVT_ALUA_STATE_CHANGE_REPORTED; 434 sdev_printk(KERN_WARNING, sdev, 435 "Asymmetric access state changed"); 436 } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x09) { 437 evt_type = SDEV_EVT_CAPACITY_CHANGE_REPORTED; 438 sdev_printk(KERN_WARNING, sdev, 439 "Capacity data has changed"); 440 } else if (sshdr->asc == 0x2a) 441 sdev_printk(KERN_WARNING, sdev, 442 "Parameters changed"); 443 } 444 445 if (evt_type != SDEV_EVT_MAXBITS) { 446 set_bit(evt_type, sdev->pending_events); 447 schedule_work(&sdev->event_work); 448 } 449 } 450 451 /** 452 * scsi_check_sense - Examine scsi cmd sense 453 * @scmd: Cmd to have sense checked. 454 * 455 * Return value: 456 * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE 457 * 458 * Notes: 459 * When a deferred error is detected the current command has 460 * not been executed and needs retrying. 461 */ 462 int scsi_check_sense(struct scsi_cmnd *scmd) 463 { 464 struct scsi_device *sdev = scmd->device; 465 struct scsi_sense_hdr sshdr; 466 467 if (! scsi_command_normalize_sense(scmd, &sshdr)) 468 return FAILED; /* no valid sense data */ 469 470 scsi_report_sense(sdev, &sshdr); 471 472 if (scsi_sense_is_deferred(&sshdr)) 473 return NEEDS_RETRY; 474 475 if (sdev->handler && sdev->handler->check_sense) { 476 int rc; 477 478 rc = sdev->handler->check_sense(sdev, &sshdr); 479 if (rc != SCSI_RETURN_NOT_HANDLED) 480 return rc; 481 /* handler does not care. Drop down to default handling */ 482 } 483 484 if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done) 485 /* 486 * nasty: for mid-layer issued TURs, we need to return the 487 * actual sense data without any recovery attempt. For eh 488 * issued ones, we need to try to recover and interpret 489 */ 490 return SUCCESS; 491 492 /* 493 * Previous logic looked for FILEMARK, EOM or ILI which are 494 * mainly associated with tapes and returned SUCCESS. 495 */ 496 if (sshdr.response_code == 0x70) { 497 /* fixed format */ 498 if (scmd->sense_buffer[2] & 0xe0) 499 return SUCCESS; 500 } else { 501 /* 502 * descriptor format: look for "stream commands sense data 503 * descriptor" (see SSC-3). Assume single sense data 504 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG. 505 */ 506 if ((sshdr.additional_length > 3) && 507 (scmd->sense_buffer[8] == 0x4) && 508 (scmd->sense_buffer[11] & 0xe0)) 509 return SUCCESS; 510 } 511 512 switch (sshdr.sense_key) { 513 case NO_SENSE: 514 return SUCCESS; 515 case RECOVERED_ERROR: 516 return /* soft_error */ SUCCESS; 517 518 case ABORTED_COMMAND: 519 if (sshdr.asc == 0x10) /* DIF */ 520 return SUCCESS; 521 522 return NEEDS_RETRY; 523 case NOT_READY: 524 case UNIT_ATTENTION: 525 /* 526 * if we are expecting a cc/ua because of a bus reset that we 527 * performed, treat this just as a retry. otherwise this is 528 * information that we should pass up to the upper-level driver 529 * so that we can deal with it there. 530 */ 531 if (scmd->device->expecting_cc_ua) { 532 /* 533 * Because some device does not queue unit 534 * attentions correctly, we carefully check 535 * additional sense code and qualifier so as 536 * not to squash media change unit attention. 537 */ 538 if (sshdr.asc != 0x28 || sshdr.ascq != 0x00) { 539 scmd->device->expecting_cc_ua = 0; 540 return NEEDS_RETRY; 541 } 542 } 543 /* 544 * we might also expect a cc/ua if another LUN on the target 545 * reported a UA with an ASC/ASCQ of 3F 0E - 546 * REPORTED LUNS DATA HAS CHANGED. 547 */ 548 if (scmd->device->sdev_target->expecting_lun_change && 549 sshdr.asc == 0x3f && sshdr.ascq == 0x0e) 550 return NEEDS_RETRY; 551 /* 552 * if the device is in the process of becoming ready, we 553 * should retry. 554 */ 555 if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) 556 return NEEDS_RETRY; 557 /* 558 * if the device is not started, we need to wake 559 * the error handler to start the motor 560 */ 561 if (scmd->device->allow_restart && 562 (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) 563 return FAILED; 564 /* 565 * Pass the UA upwards for a determination in the completion 566 * functions. 567 */ 568 return SUCCESS; 569 570 /* these are not supported */ 571 case DATA_PROTECT: 572 if (sshdr.asc == 0x27 && sshdr.ascq == 0x07) { 573 /* Thin provisioning hard threshold reached */ 574 set_host_byte(scmd, DID_ALLOC_FAILURE); 575 return SUCCESS; 576 } 577 /* FALLTHROUGH */ 578 case COPY_ABORTED: 579 case VOLUME_OVERFLOW: 580 case MISCOMPARE: 581 case BLANK_CHECK: 582 set_host_byte(scmd, DID_TARGET_FAILURE); 583 return SUCCESS; 584 585 case MEDIUM_ERROR: 586 if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */ 587 sshdr.asc == 0x13 || /* AMNF DATA FIELD */ 588 sshdr.asc == 0x14) { /* RECORD NOT FOUND */ 589 set_host_byte(scmd, DID_MEDIUM_ERROR); 590 return SUCCESS; 591 } 592 return NEEDS_RETRY; 593 594 case HARDWARE_ERROR: 595 if (scmd->device->retry_hwerror) 596 return ADD_TO_MLQUEUE; 597 else 598 set_host_byte(scmd, DID_TARGET_FAILURE); 599 /* FALLTHROUGH */ 600 601 case ILLEGAL_REQUEST: 602 if (sshdr.asc == 0x20 || /* Invalid command operation code */ 603 sshdr.asc == 0x21 || /* Logical block address out of range */ 604 sshdr.asc == 0x22 || /* Invalid function */ 605 sshdr.asc == 0x24 || /* Invalid field in cdb */ 606 sshdr.asc == 0x26 || /* Parameter value invalid */ 607 sshdr.asc == 0x27) { /* Write protected */ 608 set_host_byte(scmd, DID_TARGET_FAILURE); 609 } 610 return SUCCESS; 611 612 default: 613 return SUCCESS; 614 } 615 } 616 EXPORT_SYMBOL_GPL(scsi_check_sense); 617 618 static void scsi_handle_queue_ramp_up(struct scsi_device *sdev) 619 { 620 struct scsi_host_template *sht = sdev->host->hostt; 621 struct scsi_device *tmp_sdev; 622 623 if (!sht->track_queue_depth || 624 sdev->queue_depth >= sdev->max_queue_depth) 625 return; 626 627 if (time_before(jiffies, 628 sdev->last_queue_ramp_up + sdev->queue_ramp_up_period)) 629 return; 630 631 if (time_before(jiffies, 632 sdev->last_queue_full_time + sdev->queue_ramp_up_period)) 633 return; 634 635 /* 636 * Walk all devices of a target and do 637 * ramp up on them. 638 */ 639 shost_for_each_device(tmp_sdev, sdev->host) { 640 if (tmp_sdev->channel != sdev->channel || 641 tmp_sdev->id != sdev->id || 642 tmp_sdev->queue_depth == sdev->max_queue_depth) 643 continue; 644 645 scsi_change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1); 646 sdev->last_queue_ramp_up = jiffies; 647 } 648 } 649 650 static void scsi_handle_queue_full(struct scsi_device *sdev) 651 { 652 struct scsi_host_template *sht = sdev->host->hostt; 653 struct scsi_device *tmp_sdev; 654 655 if (!sht->track_queue_depth) 656 return; 657 658 shost_for_each_device(tmp_sdev, sdev->host) { 659 if (tmp_sdev->channel != sdev->channel || 660 tmp_sdev->id != sdev->id) 661 continue; 662 /* 663 * We do not know the number of commands that were at 664 * the device when we got the queue full so we start 665 * from the highest possible value and work our way down. 666 */ 667 scsi_track_queue_full(tmp_sdev, tmp_sdev->queue_depth - 1); 668 } 669 } 670 671 /** 672 * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. 673 * @scmd: SCSI cmd to examine. 674 * 675 * Notes: 676 * This is *only* called when we are examining the status of commands 677 * queued during error recovery. the main difference here is that we 678 * don't allow for the possibility of retries here, and we are a lot 679 * more restrictive about what we consider acceptable. 680 */ 681 static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) 682 { 683 /* 684 * first check the host byte, to see if there is anything in there 685 * that would indicate what we need to do. 686 */ 687 if (host_byte(scmd->result) == DID_RESET) { 688 /* 689 * rats. we are already in the error handler, so we now 690 * get to try and figure out what to do next. if the sense 691 * is valid, we have a pretty good idea of what to do. 692 * if not, we mark it as FAILED. 693 */ 694 return scsi_check_sense(scmd); 695 } 696 if (host_byte(scmd->result) != DID_OK) 697 return FAILED; 698 699 /* 700 * next, check the message byte. 701 */ 702 if (msg_byte(scmd->result) != COMMAND_COMPLETE) 703 return FAILED; 704 705 /* 706 * now, check the status byte to see if this indicates 707 * anything special. 708 */ 709 switch (status_byte(scmd->result)) { 710 case GOOD: 711 scsi_handle_queue_ramp_up(scmd->device); 712 /* FALLTHROUGH */ 713 case COMMAND_TERMINATED: 714 return SUCCESS; 715 case CHECK_CONDITION: 716 return scsi_check_sense(scmd); 717 case CONDITION_GOOD: 718 case INTERMEDIATE_GOOD: 719 case INTERMEDIATE_C_GOOD: 720 /* 721 * who knows? FIXME(eric) 722 */ 723 return SUCCESS; 724 case RESERVATION_CONFLICT: 725 if (scmd->cmnd[0] == TEST_UNIT_READY) 726 /* it is a success, we probed the device and 727 * found it */ 728 return SUCCESS; 729 /* otherwise, we failed to send the command */ 730 return FAILED; 731 case QUEUE_FULL: 732 scsi_handle_queue_full(scmd->device); 733 /* fall through */ 734 case BUSY: 735 return NEEDS_RETRY; 736 default: 737 return FAILED; 738 } 739 return FAILED; 740 } 741 742 /** 743 * scsi_eh_done - Completion function for error handling. 744 * @scmd: Cmd that is done. 745 */ 746 static void scsi_eh_done(struct scsi_cmnd *scmd) 747 { 748 struct completion *eh_action; 749 750 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 751 "%s result: %x\n", __func__, scmd->result)); 752 753 eh_action = scmd->device->host->eh_action; 754 if (eh_action) 755 complete(eh_action); 756 } 757 758 /** 759 * scsi_try_host_reset - ask host adapter to reset itself 760 * @scmd: SCSI cmd to send host reset. 761 */ 762 static int scsi_try_host_reset(struct scsi_cmnd *scmd) 763 { 764 unsigned long flags; 765 int rtn; 766 struct Scsi_Host *host = scmd->device->host; 767 struct scsi_host_template *hostt = host->hostt; 768 769 SCSI_LOG_ERROR_RECOVERY(3, 770 shost_printk(KERN_INFO, host, "Snd Host RST\n")); 771 772 if (!hostt->eh_host_reset_handler) 773 return FAILED; 774 775 rtn = hostt->eh_host_reset_handler(scmd); 776 777 if (rtn == SUCCESS) { 778 if (!hostt->skip_settle_delay) 779 ssleep(HOST_RESET_SETTLE_TIME); 780 spin_lock_irqsave(host->host_lock, flags); 781 scsi_report_bus_reset(host, scmd_channel(scmd)); 782 spin_unlock_irqrestore(host->host_lock, flags); 783 } 784 785 return rtn; 786 } 787 788 /** 789 * scsi_try_bus_reset - ask host to perform a bus reset 790 * @scmd: SCSI cmd to send bus reset. 791 */ 792 static int scsi_try_bus_reset(struct scsi_cmnd *scmd) 793 { 794 unsigned long flags; 795 int rtn; 796 struct Scsi_Host *host = scmd->device->host; 797 struct scsi_host_template *hostt = host->hostt; 798 799 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 800 "%s: Snd Bus RST\n", __func__)); 801 802 if (!hostt->eh_bus_reset_handler) 803 return FAILED; 804 805 rtn = hostt->eh_bus_reset_handler(scmd); 806 807 if (rtn == SUCCESS) { 808 if (!hostt->skip_settle_delay) 809 ssleep(BUS_RESET_SETTLE_TIME); 810 spin_lock_irqsave(host->host_lock, flags); 811 scsi_report_bus_reset(host, scmd_channel(scmd)); 812 spin_unlock_irqrestore(host->host_lock, flags); 813 } 814 815 return rtn; 816 } 817 818 static void __scsi_report_device_reset(struct scsi_device *sdev, void *data) 819 { 820 sdev->was_reset = 1; 821 sdev->expecting_cc_ua = 1; 822 } 823 824 /** 825 * scsi_try_target_reset - Ask host to perform a target reset 826 * @scmd: SCSI cmd used to send a target reset 827 * 828 * Notes: 829 * There is no timeout for this operation. if this operation is 830 * unreliable for a given host, then the host itself needs to put a 831 * timer on it, and set the host back to a consistent state prior to 832 * returning. 833 */ 834 static int scsi_try_target_reset(struct scsi_cmnd *scmd) 835 { 836 unsigned long flags; 837 int rtn; 838 struct Scsi_Host *host = scmd->device->host; 839 struct scsi_host_template *hostt = host->hostt; 840 841 if (!hostt->eh_target_reset_handler) 842 return FAILED; 843 844 rtn = hostt->eh_target_reset_handler(scmd); 845 if (rtn == SUCCESS) { 846 spin_lock_irqsave(host->host_lock, flags); 847 __starget_for_each_device(scsi_target(scmd->device), NULL, 848 __scsi_report_device_reset); 849 spin_unlock_irqrestore(host->host_lock, flags); 850 } 851 852 return rtn; 853 } 854 855 /** 856 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev 857 * @scmd: SCSI cmd used to send BDR 858 * 859 * Notes: 860 * There is no timeout for this operation. if this operation is 861 * unreliable for a given host, then the host itself needs to put a 862 * timer on it, and set the host back to a consistent state prior to 863 * returning. 864 */ 865 static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) 866 { 867 int rtn; 868 struct scsi_host_template *hostt = scmd->device->host->hostt; 869 870 if (!hostt->eh_device_reset_handler) 871 return FAILED; 872 873 rtn = hostt->eh_device_reset_handler(scmd); 874 if (rtn == SUCCESS) 875 __scsi_report_device_reset(scmd->device, NULL); 876 return rtn; 877 } 878 879 /** 880 * scsi_try_to_abort_cmd - Ask host to abort a SCSI command 881 * @hostt: SCSI driver host template 882 * @scmd: SCSI cmd used to send a target reset 883 * 884 * Return value: 885 * SUCCESS, FAILED, or FAST_IO_FAIL 886 * 887 * Notes: 888 * SUCCESS does not necessarily indicate that the command 889 * has been aborted; it only indicates that the LLDDs 890 * has cleared all references to that command. 891 * LLDDs should return FAILED only if an abort was required 892 * but could not be executed. LLDDs should return FAST_IO_FAIL 893 * if the device is temporarily unavailable (eg due to a 894 * link down on FibreChannel) 895 */ 896 static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt, 897 struct scsi_cmnd *scmd) 898 { 899 if (!hostt->eh_abort_handler) 900 return FAILED; 901 902 return hostt->eh_abort_handler(scmd); 903 } 904 905 static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd) 906 { 907 if (scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd) != SUCCESS) 908 if (scsi_try_bus_device_reset(scmd) != SUCCESS) 909 if (scsi_try_target_reset(scmd) != SUCCESS) 910 if (scsi_try_bus_reset(scmd) != SUCCESS) 911 scsi_try_host_reset(scmd); 912 } 913 914 /** 915 * scsi_eh_prep_cmnd - Save a scsi command info as part of error recovery 916 * @scmd: SCSI command structure to hijack 917 * @ses: structure to save restore information 918 * @cmnd: CDB to send. Can be NULL if no new cmnd is needed 919 * @cmnd_size: size in bytes of @cmnd (must be <= BLK_MAX_CDB) 920 * @sense_bytes: size of sense data to copy. or 0 (if != 0 @cmnd is ignored) 921 * 922 * This function is used to save a scsi command information before re-execution 923 * as part of the error recovery process. If @sense_bytes is 0 the command 924 * sent must be one that does not transfer any data. If @sense_bytes != 0 925 * @cmnd is ignored and this functions sets up a REQUEST_SENSE command 926 * and cmnd buffers to read @sense_bytes into @scmd->sense_buffer. 927 */ 928 void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, 929 unsigned char *cmnd, int cmnd_size, unsigned sense_bytes) 930 { 931 struct scsi_device *sdev = scmd->device; 932 933 /* 934 * We need saved copies of a number of fields - this is because 935 * error handling may need to overwrite these with different values 936 * to run different commands, and once error handling is complete, 937 * we will need to restore these values prior to running the actual 938 * command. 939 */ 940 ses->cmd_len = scmd->cmd_len; 941 ses->cmnd = scmd->cmnd; 942 ses->data_direction = scmd->sc_data_direction; 943 ses->sdb = scmd->sdb; 944 ses->next_rq = scmd->request->next_rq; 945 ses->result = scmd->result; 946 ses->underflow = scmd->underflow; 947 ses->prot_op = scmd->prot_op; 948 ses->eh_eflags = scmd->eh_eflags; 949 950 scmd->prot_op = SCSI_PROT_NORMAL; 951 scmd->eh_eflags = 0; 952 scmd->cmnd = ses->eh_cmnd; 953 memset(scmd->cmnd, 0, BLK_MAX_CDB); 954 memset(&scmd->sdb, 0, sizeof(scmd->sdb)); 955 scmd->request->next_rq = NULL; 956 scmd->result = 0; 957 958 if (sense_bytes) { 959 scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, 960 sense_bytes); 961 sg_init_one(&ses->sense_sgl, scmd->sense_buffer, 962 scmd->sdb.length); 963 scmd->sdb.table.sgl = &ses->sense_sgl; 964 scmd->sc_data_direction = DMA_FROM_DEVICE; 965 scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; 966 scmd->cmnd[0] = REQUEST_SENSE; 967 scmd->cmnd[4] = scmd->sdb.length; 968 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); 969 } else { 970 scmd->sc_data_direction = DMA_NONE; 971 if (cmnd) { 972 BUG_ON(cmnd_size > BLK_MAX_CDB); 973 memcpy(scmd->cmnd, cmnd, cmnd_size); 974 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); 975 } 976 } 977 978 scmd->underflow = 0; 979 980 if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN) 981 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | 982 (sdev->lun << 5 & 0xe0); 983 984 /* 985 * Zero the sense buffer. The scsi spec mandates that any 986 * untransferred sense data should be interpreted as being zero. 987 */ 988 memset(scmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); 989 } 990 EXPORT_SYMBOL(scsi_eh_prep_cmnd); 991 992 /** 993 * scsi_eh_restore_cmnd - Restore a scsi command info as part of error recovery 994 * @scmd: SCSI command structure to restore 995 * @ses: saved information from a coresponding call to scsi_eh_prep_cmnd 996 * 997 * Undo any damage done by above scsi_eh_prep_cmnd(). 998 */ 999 void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) 1000 { 1001 /* 1002 * Restore original data 1003 */ 1004 scmd->cmd_len = ses->cmd_len; 1005 scmd->cmnd = ses->cmnd; 1006 scmd->sc_data_direction = ses->data_direction; 1007 scmd->sdb = ses->sdb; 1008 scmd->request->next_rq = ses->next_rq; 1009 scmd->result = ses->result; 1010 scmd->underflow = ses->underflow; 1011 scmd->prot_op = ses->prot_op; 1012 scmd->eh_eflags = ses->eh_eflags; 1013 } 1014 EXPORT_SYMBOL(scsi_eh_restore_cmnd); 1015 1016 /** 1017 * scsi_send_eh_cmnd - submit a scsi command as part of error recovery 1018 * @scmd: SCSI command structure to hijack 1019 * @cmnd: CDB to send 1020 * @cmnd_size: size in bytes of @cmnd 1021 * @timeout: timeout for this request 1022 * @sense_bytes: size of sense data to copy or 0 1023 * 1024 * This function is used to send a scsi command down to a target device 1025 * as part of the error recovery process. See also scsi_eh_prep_cmnd() above. 1026 * 1027 * Return value: 1028 * SUCCESS or FAILED or NEEDS_RETRY 1029 */ 1030 static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd, 1031 int cmnd_size, int timeout, unsigned sense_bytes) 1032 { 1033 struct scsi_device *sdev = scmd->device; 1034 struct Scsi_Host *shost = sdev->host; 1035 DECLARE_COMPLETION_ONSTACK(done); 1036 unsigned long timeleft = timeout; 1037 struct scsi_eh_save ses; 1038 const unsigned long stall_for = msecs_to_jiffies(100); 1039 int rtn; 1040 1041 retry: 1042 scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes); 1043 shost->eh_action = &done; 1044 1045 scsi_log_send(scmd); 1046 scmd->scsi_done = scsi_eh_done; 1047 rtn = shost->hostt->queuecommand(shost, scmd); 1048 if (rtn) { 1049 if (timeleft > stall_for) { 1050 scsi_eh_restore_cmnd(scmd, &ses); 1051 timeleft -= stall_for; 1052 msleep(jiffies_to_msecs(stall_for)); 1053 goto retry; 1054 } 1055 /* signal not to enter either branch of the if () below */ 1056 timeleft = 0; 1057 rtn = FAILED; 1058 } else { 1059 timeleft = wait_for_completion_timeout(&done, timeout); 1060 rtn = SUCCESS; 1061 } 1062 1063 shost->eh_action = NULL; 1064 1065 scsi_log_completion(scmd, rtn); 1066 1067 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 1068 "%s timeleft: %ld\n", 1069 __func__, timeleft)); 1070 1071 /* 1072 * If there is time left scsi_eh_done got called, and we will examine 1073 * the actual status codes to see whether the command actually did 1074 * complete normally, else if we have a zero return and no time left, 1075 * the command must still be pending, so abort it and return FAILED. 1076 * If we never actually managed to issue the command, because 1077 * ->queuecommand() kept returning non zero, use the rtn = FAILED 1078 * value above (so don't execute either branch of the if) 1079 */ 1080 if (timeleft) { 1081 rtn = scsi_eh_completed_normally(scmd); 1082 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 1083 "%s: scsi_eh_completed_normally %x\n", __func__, rtn)); 1084 1085 switch (rtn) { 1086 case SUCCESS: 1087 case NEEDS_RETRY: 1088 case FAILED: 1089 break; 1090 case ADD_TO_MLQUEUE: 1091 rtn = NEEDS_RETRY; 1092 break; 1093 default: 1094 rtn = FAILED; 1095 break; 1096 } 1097 } else if (rtn != FAILED) { 1098 scsi_abort_eh_cmnd(scmd); 1099 rtn = FAILED; 1100 } 1101 1102 scsi_eh_restore_cmnd(scmd, &ses); 1103 1104 return rtn; 1105 } 1106 1107 /** 1108 * scsi_request_sense - Request sense data from a particular target. 1109 * @scmd: SCSI cmd for request sense. 1110 * 1111 * Notes: 1112 * Some hosts automatically obtain this information, others require 1113 * that we obtain it on our own. This function will *not* return until 1114 * the command either times out, or it completes. 1115 */ 1116 static int scsi_request_sense(struct scsi_cmnd *scmd) 1117 { 1118 return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); 1119 } 1120 1121 static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) 1122 { 1123 if (!blk_rq_is_passthrough(scmd->request)) { 1124 struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); 1125 if (sdrv->eh_action) 1126 rtn = sdrv->eh_action(scmd, rtn); 1127 } 1128 return rtn; 1129 } 1130 1131 /** 1132 * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. 1133 * @scmd: Original SCSI cmd that eh has finished. 1134 * @done_q: Queue for processed commands. 1135 * 1136 * Notes: 1137 * We don't want to use the normal command completion while we are are 1138 * still handling errors - it may cause other commands to be queued, 1139 * and that would disturb what we are doing. Thus we really want to 1140 * keep a list of pending commands for final completion, and once we 1141 * are ready to leave error handling we handle completion for real. 1142 */ 1143 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) 1144 { 1145 list_move_tail(&scmd->eh_entry, done_q); 1146 } 1147 EXPORT_SYMBOL(scsi_eh_finish_cmd); 1148 1149 /** 1150 * scsi_eh_get_sense - Get device sense data. 1151 * @work_q: Queue of commands to process. 1152 * @done_q: Queue of processed commands. 1153 * 1154 * Description: 1155 * See if we need to request sense information. if so, then get it 1156 * now, so we have a better idea of what to do. 1157 * 1158 * Notes: 1159 * This has the unfortunate side effect that if a shost adapter does 1160 * not automatically request sense information, we end up shutting 1161 * it down before we request it. 1162 * 1163 * All drivers should request sense information internally these days, 1164 * so for now all I have to say is tough noogies if you end up in here. 1165 * 1166 * XXX: Long term this code should go away, but that needs an audit of 1167 * all LLDDs first. 1168 */ 1169 int scsi_eh_get_sense(struct list_head *work_q, 1170 struct list_head *done_q) 1171 { 1172 struct scsi_cmnd *scmd, *next; 1173 struct Scsi_Host *shost; 1174 int rtn; 1175 1176 /* 1177 * If SCSI_EH_ABORT_SCHEDULED has been set, it is timeout IO, 1178 * should not get sense. 1179 */ 1180 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1181 if ((scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) || 1182 SCSI_SENSE_VALID(scmd)) 1183 continue; 1184 1185 shost = scmd->device->host; 1186 if (scsi_host_eh_past_deadline(shost)) { 1187 SCSI_LOG_ERROR_RECOVERY(3, 1188 scmd_printk(KERN_INFO, scmd, 1189 "%s: skip request sense, past eh deadline\n", 1190 current->comm)); 1191 break; 1192 } 1193 if (status_byte(scmd->result) != CHECK_CONDITION) 1194 /* 1195 * don't request sense if there's no check condition 1196 * status because the error we're processing isn't one 1197 * that has a sense code (and some devices get 1198 * confused by sense requests out of the blue) 1199 */ 1200 continue; 1201 1202 SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, 1203 "%s: requesting sense\n", 1204 current->comm)); 1205 rtn = scsi_request_sense(scmd); 1206 if (rtn != SUCCESS) 1207 continue; 1208 1209 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 1210 "sense requested, result %x\n", scmd->result)); 1211 SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense(scmd)); 1212 1213 rtn = scsi_decide_disposition(scmd); 1214 1215 /* 1216 * if the result was normal, then just pass it along to the 1217 * upper level. 1218 */ 1219 if (rtn == SUCCESS) 1220 /* we don't want this command reissued, just 1221 * finished with the sense data, so set 1222 * retries to the max allowed to ensure it 1223 * won't get reissued */ 1224 scmd->retries = scmd->allowed; 1225 else if (rtn != NEEDS_RETRY) 1226 continue; 1227 1228 scsi_eh_finish_cmd(scmd, done_q); 1229 } 1230 1231 return list_empty(work_q); 1232 } 1233 EXPORT_SYMBOL_GPL(scsi_eh_get_sense); 1234 1235 /** 1236 * scsi_eh_tur - Send TUR to device. 1237 * @scmd: &scsi_cmnd to send TUR 1238 * 1239 * Return value: 1240 * 0 - Device is ready. 1 - Device NOT ready. 1241 */ 1242 static int scsi_eh_tur(struct scsi_cmnd *scmd) 1243 { 1244 static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; 1245 int retry_cnt = 1, rtn; 1246 1247 retry_tur: 1248 rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, 1249 scmd->device->eh_timeout, 0); 1250 1251 SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, 1252 "%s return: %x\n", __func__, rtn)); 1253 1254 switch (rtn) { 1255 case NEEDS_RETRY: 1256 if (retry_cnt--) 1257 goto retry_tur; 1258 /*FALLTHRU*/ 1259 case SUCCESS: 1260 return 0; 1261 default: 1262 return 1; 1263 } 1264 } 1265 1266 /** 1267 * scsi_eh_test_devices - check if devices are responding from error recovery. 1268 * @cmd_list: scsi commands in error recovery. 1269 * @work_q: queue for commands which still need more error recovery 1270 * @done_q: queue for commands which are finished 1271 * @try_stu: boolean on if a STU command should be tried in addition to TUR. 1272 * 1273 * Decription: 1274 * Tests if devices are in a working state. Commands to devices now in 1275 * a working state are sent to the done_q while commands to devices which 1276 * are still failing to respond are returned to the work_q for more 1277 * processing. 1278 **/ 1279 static int scsi_eh_test_devices(struct list_head *cmd_list, 1280 struct list_head *work_q, 1281 struct list_head *done_q, int try_stu) 1282 { 1283 struct scsi_cmnd *scmd, *next; 1284 struct scsi_device *sdev; 1285 int finish_cmds; 1286 1287 while (!list_empty(cmd_list)) { 1288 scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); 1289 sdev = scmd->device; 1290 1291 if (!try_stu) { 1292 if (scsi_host_eh_past_deadline(sdev->host)) { 1293 /* Push items back onto work_q */ 1294 list_splice_init(cmd_list, work_q); 1295 SCSI_LOG_ERROR_RECOVERY(3, 1296 sdev_printk(KERN_INFO, sdev, 1297 "%s: skip test device, past eh deadline", 1298 current->comm)); 1299 break; 1300 } 1301 } 1302 1303 finish_cmds = !scsi_device_online(scmd->device) || 1304 (try_stu && !scsi_eh_try_stu(scmd) && 1305 !scsi_eh_tur(scmd)) || 1306 !scsi_eh_tur(scmd); 1307 1308 list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) 1309 if (scmd->device == sdev) { 1310 if (finish_cmds && 1311 (try_stu || 1312 scsi_eh_action(scmd, SUCCESS) == SUCCESS)) 1313 scsi_eh_finish_cmd(scmd, done_q); 1314 else 1315 list_move_tail(&scmd->eh_entry, work_q); 1316 } 1317 } 1318 return list_empty(work_q); 1319 } 1320 1321 /** 1322 * scsi_eh_try_stu - Send START_UNIT to device. 1323 * @scmd: &scsi_cmnd to send START_UNIT 1324 * 1325 * Return value: 1326 * 0 - Device is ready. 1 - Device NOT ready. 1327 */ 1328 static int scsi_eh_try_stu(struct scsi_cmnd *scmd) 1329 { 1330 static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; 1331 1332 if (scmd->device->allow_restart) { 1333 int i, rtn = NEEDS_RETRY; 1334 1335 for (i = 0; rtn == NEEDS_RETRY && i < 2; i++) 1336 rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, scmd->device->request_queue->rq_timeout, 0); 1337 1338 if (rtn == SUCCESS) 1339 return 0; 1340 } 1341 1342 return 1; 1343 } 1344 1345 /** 1346 * scsi_eh_stu - send START_UNIT if needed 1347 * @shost: &scsi host being recovered. 1348 * @work_q: &list_head for pending commands. 1349 * @done_q: &list_head for processed commands. 1350 * 1351 * Notes: 1352 * If commands are failing due to not ready, initializing command required, 1353 * try revalidating the device, which will end up sending a start unit. 1354 */ 1355 static int scsi_eh_stu(struct Scsi_Host *shost, 1356 struct list_head *work_q, 1357 struct list_head *done_q) 1358 { 1359 struct scsi_cmnd *scmd, *stu_scmd, *next; 1360 struct scsi_device *sdev; 1361 1362 shost_for_each_device(sdev, shost) { 1363 if (scsi_host_eh_past_deadline(shost)) { 1364 SCSI_LOG_ERROR_RECOVERY(3, 1365 sdev_printk(KERN_INFO, sdev, 1366 "%s: skip START_UNIT, past eh deadline\n", 1367 current->comm)); 1368 break; 1369 } 1370 stu_scmd = NULL; 1371 list_for_each_entry(scmd, work_q, eh_entry) 1372 if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && 1373 scsi_check_sense(scmd) == FAILED ) { 1374 stu_scmd = scmd; 1375 break; 1376 } 1377 1378 if (!stu_scmd) 1379 continue; 1380 1381 SCSI_LOG_ERROR_RECOVERY(3, 1382 sdev_printk(KERN_INFO, sdev, 1383 "%s: Sending START_UNIT\n", 1384 current->comm)); 1385 1386 if (!scsi_eh_try_stu(stu_scmd)) { 1387 if (!scsi_device_online(sdev) || 1388 !scsi_eh_tur(stu_scmd)) { 1389 list_for_each_entry_safe(scmd, next, 1390 work_q, eh_entry) { 1391 if (scmd->device == sdev && 1392 scsi_eh_action(scmd, SUCCESS) == SUCCESS) 1393 scsi_eh_finish_cmd(scmd, done_q); 1394 } 1395 } 1396 } else { 1397 SCSI_LOG_ERROR_RECOVERY(3, 1398 sdev_printk(KERN_INFO, sdev, 1399 "%s: START_UNIT failed\n", 1400 current->comm)); 1401 } 1402 } 1403 1404 return list_empty(work_q); 1405 } 1406 1407 1408 /** 1409 * scsi_eh_bus_device_reset - send bdr if needed 1410 * @shost: scsi host being recovered. 1411 * @work_q: &list_head for pending commands. 1412 * @done_q: &list_head for processed commands. 1413 * 1414 * Notes: 1415 * Try a bus device reset. Still, look to see whether we have multiple 1416 * devices that are jammed or not - if we have multiple devices, it 1417 * makes no sense to try bus_device_reset - we really would need to try 1418 * a bus_reset instead. 1419 */ 1420 static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, 1421 struct list_head *work_q, 1422 struct list_head *done_q) 1423 { 1424 struct scsi_cmnd *scmd, *bdr_scmd, *next; 1425 struct scsi_device *sdev; 1426 int rtn; 1427 1428 shost_for_each_device(sdev, shost) { 1429 if (scsi_host_eh_past_deadline(shost)) { 1430 SCSI_LOG_ERROR_RECOVERY(3, 1431 sdev_printk(KERN_INFO, sdev, 1432 "%s: skip BDR, past eh deadline\n", 1433 current->comm)); 1434 break; 1435 } 1436 bdr_scmd = NULL; 1437 list_for_each_entry(scmd, work_q, eh_entry) 1438 if (scmd->device == sdev) { 1439 bdr_scmd = scmd; 1440 break; 1441 } 1442 1443 if (!bdr_scmd) 1444 continue; 1445 1446 SCSI_LOG_ERROR_RECOVERY(3, 1447 sdev_printk(KERN_INFO, sdev, 1448 "%s: Sending BDR\n", current->comm)); 1449 rtn = scsi_try_bus_device_reset(bdr_scmd); 1450 if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { 1451 if (!scsi_device_online(sdev) || 1452 rtn == FAST_IO_FAIL || 1453 !scsi_eh_tur(bdr_scmd)) { 1454 list_for_each_entry_safe(scmd, next, 1455 work_q, eh_entry) { 1456 if (scmd->device == sdev && 1457 scsi_eh_action(scmd, rtn) != FAILED) 1458 scsi_eh_finish_cmd(scmd, 1459 done_q); 1460 } 1461 } 1462 } else { 1463 SCSI_LOG_ERROR_RECOVERY(3, 1464 sdev_printk(KERN_INFO, sdev, 1465 "%s: BDR failed\n", current->comm)); 1466 } 1467 } 1468 1469 return list_empty(work_q); 1470 } 1471 1472 /** 1473 * scsi_eh_target_reset - send target reset if needed 1474 * @shost: scsi host being recovered. 1475 * @work_q: &list_head for pending commands. 1476 * @done_q: &list_head for processed commands. 1477 * 1478 * Notes: 1479 * Try a target reset. 1480 */ 1481 static int scsi_eh_target_reset(struct Scsi_Host *shost, 1482 struct list_head *work_q, 1483 struct list_head *done_q) 1484 { 1485 LIST_HEAD(tmp_list); 1486 LIST_HEAD(check_list); 1487 1488 list_splice_init(work_q, &tmp_list); 1489 1490 while (!list_empty(&tmp_list)) { 1491 struct scsi_cmnd *next, *scmd; 1492 int rtn; 1493 unsigned int id; 1494 1495 if (scsi_host_eh_past_deadline(shost)) { 1496 /* push back on work queue for further processing */ 1497 list_splice_init(&check_list, work_q); 1498 list_splice_init(&tmp_list, work_q); 1499 SCSI_LOG_ERROR_RECOVERY(3, 1500 shost_printk(KERN_INFO, shost, 1501 "%s: Skip target reset, past eh deadline\n", 1502 current->comm)); 1503 return list_empty(work_q); 1504 } 1505 1506 scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); 1507 id = scmd_id(scmd); 1508 1509 SCSI_LOG_ERROR_RECOVERY(3, 1510 shost_printk(KERN_INFO, shost, 1511 "%s: Sending target reset to target %d\n", 1512 current->comm, id)); 1513 rtn = scsi_try_target_reset(scmd); 1514 if (rtn != SUCCESS && rtn != FAST_IO_FAIL) 1515 SCSI_LOG_ERROR_RECOVERY(3, 1516 shost_printk(KERN_INFO, shost, 1517 "%s: Target reset failed" 1518 " target: %d\n", 1519 current->comm, id)); 1520 list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) { 1521 if (scmd_id(scmd) != id) 1522 continue; 1523 1524 if (rtn == SUCCESS) 1525 list_move_tail(&scmd->eh_entry, &check_list); 1526 else if (rtn == FAST_IO_FAIL) 1527 scsi_eh_finish_cmd(scmd, done_q); 1528 else 1529 /* push back on work queue for further processing */ 1530 list_move(&scmd->eh_entry, work_q); 1531 } 1532 } 1533 1534 return scsi_eh_test_devices(&check_list, work_q, done_q, 0); 1535 } 1536 1537 /** 1538 * scsi_eh_bus_reset - send a bus reset 1539 * @shost: &scsi host being recovered. 1540 * @work_q: &list_head for pending commands. 1541 * @done_q: &list_head for processed commands. 1542 */ 1543 static int scsi_eh_bus_reset(struct Scsi_Host *shost, 1544 struct list_head *work_q, 1545 struct list_head *done_q) 1546 { 1547 struct scsi_cmnd *scmd, *chan_scmd, *next; 1548 LIST_HEAD(check_list); 1549 unsigned int channel; 1550 int rtn; 1551 1552 /* 1553 * we really want to loop over the various channels, and do this on 1554 * a channel by channel basis. we should also check to see if any 1555 * of the failed commands are on soft_reset devices, and if so, skip 1556 * the reset. 1557 */ 1558 1559 for (channel = 0; channel <= shost->max_channel; channel++) { 1560 if (scsi_host_eh_past_deadline(shost)) { 1561 list_splice_init(&check_list, work_q); 1562 SCSI_LOG_ERROR_RECOVERY(3, 1563 shost_printk(KERN_INFO, shost, 1564 "%s: skip BRST, past eh deadline\n", 1565 current->comm)); 1566 return list_empty(work_q); 1567 } 1568 1569 chan_scmd = NULL; 1570 list_for_each_entry(scmd, work_q, eh_entry) { 1571 if (channel == scmd_channel(scmd)) { 1572 chan_scmd = scmd; 1573 break; 1574 /* 1575 * FIXME add back in some support for 1576 * soft_reset devices. 1577 */ 1578 } 1579 } 1580 1581 if (!chan_scmd) 1582 continue; 1583 SCSI_LOG_ERROR_RECOVERY(3, 1584 shost_printk(KERN_INFO, shost, 1585 "%s: Sending BRST chan: %d\n", 1586 current->comm, channel)); 1587 rtn = scsi_try_bus_reset(chan_scmd); 1588 if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { 1589 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1590 if (channel == scmd_channel(scmd)) { 1591 if (rtn == FAST_IO_FAIL) 1592 scsi_eh_finish_cmd(scmd, 1593 done_q); 1594 else 1595 list_move_tail(&scmd->eh_entry, 1596 &check_list); 1597 } 1598 } 1599 } else { 1600 SCSI_LOG_ERROR_RECOVERY(3, 1601 shost_printk(KERN_INFO, shost, 1602 "%s: BRST failed chan: %d\n", 1603 current->comm, channel)); 1604 } 1605 } 1606 return scsi_eh_test_devices(&check_list, work_q, done_q, 0); 1607 } 1608 1609 /** 1610 * scsi_eh_host_reset - send a host reset 1611 * @shost: host to be reset. 1612 * @work_q: &list_head for pending commands. 1613 * @done_q: &list_head for processed commands. 1614 */ 1615 static int scsi_eh_host_reset(struct Scsi_Host *shost, 1616 struct list_head *work_q, 1617 struct list_head *done_q) 1618 { 1619 struct scsi_cmnd *scmd, *next; 1620 LIST_HEAD(check_list); 1621 int rtn; 1622 1623 if (!list_empty(work_q)) { 1624 scmd = list_entry(work_q->next, 1625 struct scsi_cmnd, eh_entry); 1626 1627 SCSI_LOG_ERROR_RECOVERY(3, 1628 shost_printk(KERN_INFO, shost, 1629 "%s: Sending HRST\n", 1630 current->comm)); 1631 1632 rtn = scsi_try_host_reset(scmd); 1633 if (rtn == SUCCESS) { 1634 list_splice_init(work_q, &check_list); 1635 } else if (rtn == FAST_IO_FAIL) { 1636 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1637 scsi_eh_finish_cmd(scmd, done_q); 1638 } 1639 } else { 1640 SCSI_LOG_ERROR_RECOVERY(3, 1641 shost_printk(KERN_INFO, shost, 1642 "%s: HRST failed\n", 1643 current->comm)); 1644 } 1645 } 1646 return scsi_eh_test_devices(&check_list, work_q, done_q, 1); 1647 } 1648 1649 /** 1650 * scsi_eh_offline_sdevs - offline scsi devices that fail to recover 1651 * @work_q: &list_head for pending commands. 1652 * @done_q: &list_head for processed commands. 1653 */ 1654 static void scsi_eh_offline_sdevs(struct list_head *work_q, 1655 struct list_head *done_q) 1656 { 1657 struct scsi_cmnd *scmd, *next; 1658 struct scsi_device *sdev; 1659 1660 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1661 sdev_printk(KERN_INFO, scmd->device, "Device offlined - " 1662 "not ready after error recovery\n"); 1663 sdev = scmd->device; 1664 1665 mutex_lock(&sdev->state_mutex); 1666 scsi_device_set_state(sdev, SDEV_OFFLINE); 1667 mutex_unlock(&sdev->state_mutex); 1668 1669 scsi_eh_finish_cmd(scmd, done_q); 1670 } 1671 return; 1672 } 1673 1674 /** 1675 * scsi_noretry_cmd - determine if command should be failed fast 1676 * @scmd: SCSI cmd to examine. 1677 */ 1678 int scsi_noretry_cmd(struct scsi_cmnd *scmd) 1679 { 1680 switch (host_byte(scmd->result)) { 1681 case DID_OK: 1682 break; 1683 case DID_TIME_OUT: 1684 goto check_type; 1685 case DID_BUS_BUSY: 1686 return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT); 1687 case DID_PARITY: 1688 return (scmd->request->cmd_flags & REQ_FAILFAST_DEV); 1689 case DID_ERROR: 1690 if (msg_byte(scmd->result) == COMMAND_COMPLETE && 1691 status_byte(scmd->result) == RESERVATION_CONFLICT) 1692 return 0; 1693 /* fall through */ 1694 case DID_SOFT_ERROR: 1695 return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER); 1696 } 1697 1698 if (status_byte(scmd->result) != CHECK_CONDITION) 1699 return 0; 1700 1701 check_type: 1702 /* 1703 * assume caller has checked sense and determined 1704 * the check condition was retryable. 1705 */ 1706 if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || 1707 blk_rq_is_passthrough(scmd->request)) 1708 return 1; 1709 else 1710 return 0; 1711 } 1712 1713 /** 1714 * scsi_decide_disposition - Disposition a cmd on return from LLD. 1715 * @scmd: SCSI cmd to examine. 1716 * 1717 * Notes: 1718 * This is *only* called when we are examining the status after sending 1719 * out the actual data command. any commands that are queued for error 1720 * recovery (e.g. test_unit_ready) do *not* come through here. 1721 * 1722 * When this routine returns failed, it means the error handler thread 1723 * is woken. In cases where the error code indicates an error that 1724 * doesn't require the error handler read (i.e. we don't need to 1725 * abort/reset), this function should return SUCCESS. 1726 */ 1727 int scsi_decide_disposition(struct scsi_cmnd *scmd) 1728 { 1729 int rtn; 1730 1731 /* 1732 * if the device is offline, then we clearly just pass the result back 1733 * up to the top level. 1734 */ 1735 if (!scsi_device_online(scmd->device)) { 1736 SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd, 1737 "%s: device offline - report as SUCCESS\n", __func__)); 1738 return SUCCESS; 1739 } 1740 1741 /* 1742 * first check the host byte, to see if there is anything in there 1743 * that would indicate what we need to do. 1744 */ 1745 switch (host_byte(scmd->result)) { 1746 case DID_PASSTHROUGH: 1747 /* 1748 * no matter what, pass this through to the upper layer. 1749 * nuke this special code so that it looks like we are saying 1750 * did_ok. 1751 */ 1752 scmd->result &= 0xff00ffff; 1753 return SUCCESS; 1754 case DID_OK: 1755 /* 1756 * looks good. drop through, and check the next byte. 1757 */ 1758 break; 1759 case DID_ABORT: 1760 if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { 1761 set_host_byte(scmd, DID_TIME_OUT); 1762 return SUCCESS; 1763 } 1764 /* FALLTHROUGH */ 1765 case DID_NO_CONNECT: 1766 case DID_BAD_TARGET: 1767 /* 1768 * note - this means that we just report the status back 1769 * to the top level driver, not that we actually think 1770 * that it indicates SUCCESS. 1771 */ 1772 return SUCCESS; 1773 case DID_SOFT_ERROR: 1774 /* 1775 * when the low level driver returns did_soft_error, 1776 * it is responsible for keeping an internal retry counter 1777 * in order to avoid endless loops (db) 1778 */ 1779 goto maybe_retry; 1780 case DID_IMM_RETRY: 1781 return NEEDS_RETRY; 1782 1783 case DID_REQUEUE: 1784 return ADD_TO_MLQUEUE; 1785 case DID_TRANSPORT_DISRUPTED: 1786 /* 1787 * LLD/transport was disrupted during processing of the IO. 1788 * The transport class is now blocked/blocking, 1789 * and the transport will decide what to do with the IO 1790 * based on its timers and recovery capablilities if 1791 * there are enough retries. 1792 */ 1793 goto maybe_retry; 1794 case DID_TRANSPORT_FAILFAST: 1795 /* 1796 * The transport decided to failfast the IO (most likely 1797 * the fast io fail tmo fired), so send IO directly upwards. 1798 */ 1799 return SUCCESS; 1800 case DID_ERROR: 1801 if (msg_byte(scmd->result) == COMMAND_COMPLETE && 1802 status_byte(scmd->result) == RESERVATION_CONFLICT) 1803 /* 1804 * execute reservation conflict processing code 1805 * lower down 1806 */ 1807 break; 1808 /* fallthrough */ 1809 case DID_BUS_BUSY: 1810 case DID_PARITY: 1811 goto maybe_retry; 1812 case DID_TIME_OUT: 1813 /* 1814 * when we scan the bus, we get timeout messages for 1815 * these commands if there is no device available. 1816 * other hosts report did_no_connect for the same thing. 1817 */ 1818 if ((scmd->cmnd[0] == TEST_UNIT_READY || 1819 scmd->cmnd[0] == INQUIRY)) { 1820 return SUCCESS; 1821 } else { 1822 return FAILED; 1823 } 1824 case DID_RESET: 1825 return SUCCESS; 1826 default: 1827 return FAILED; 1828 } 1829 1830 /* 1831 * next, check the message byte. 1832 */ 1833 if (msg_byte(scmd->result) != COMMAND_COMPLETE) 1834 return FAILED; 1835 1836 /* 1837 * check the status byte to see if this indicates anything special. 1838 */ 1839 switch (status_byte(scmd->result)) { 1840 case QUEUE_FULL: 1841 scsi_handle_queue_full(scmd->device); 1842 /* 1843 * the case of trying to send too many commands to a 1844 * tagged queueing device. 1845 */ 1846 /* FALLTHROUGH */ 1847 case BUSY: 1848 /* 1849 * device can't talk to us at the moment. Should only 1850 * occur (SAM-3) when the task queue is empty, so will cause 1851 * the empty queue handling to trigger a stall in the 1852 * device. 1853 */ 1854 return ADD_TO_MLQUEUE; 1855 case GOOD: 1856 if (scmd->cmnd[0] == REPORT_LUNS) 1857 scmd->device->sdev_target->expecting_lun_change = 0; 1858 scsi_handle_queue_ramp_up(scmd->device); 1859 /* FALLTHROUGH */ 1860 case COMMAND_TERMINATED: 1861 return SUCCESS; 1862 case TASK_ABORTED: 1863 goto maybe_retry; 1864 case CHECK_CONDITION: 1865 rtn = scsi_check_sense(scmd); 1866 if (rtn == NEEDS_RETRY) 1867 goto maybe_retry; 1868 /* if rtn == FAILED, we have no sense information; 1869 * returning FAILED will wake the error handler thread 1870 * to collect the sense and redo the decide 1871 * disposition */ 1872 return rtn; 1873 case CONDITION_GOOD: 1874 case INTERMEDIATE_GOOD: 1875 case INTERMEDIATE_C_GOOD: 1876 case ACA_ACTIVE: 1877 /* 1878 * who knows? FIXME(eric) 1879 */ 1880 return SUCCESS; 1881 1882 case RESERVATION_CONFLICT: 1883 sdev_printk(KERN_INFO, scmd->device, 1884 "reservation conflict\n"); 1885 set_host_byte(scmd, DID_NEXUS_FAILURE); 1886 return SUCCESS; /* causes immediate i/o error */ 1887 default: 1888 return FAILED; 1889 } 1890 return FAILED; 1891 1892 maybe_retry: 1893 1894 /* we requeue for retry because the error was retryable, and 1895 * the request was not marked fast fail. Note that above, 1896 * even if the request is marked fast fail, we still requeue 1897 * for queue congestion conditions (QUEUE_FULL or BUSY) */ 1898 if ((++scmd->retries) <= scmd->allowed 1899 && !scsi_noretry_cmd(scmd)) { 1900 return NEEDS_RETRY; 1901 } else { 1902 /* 1903 * no more retries - report this one back to upper level. 1904 */ 1905 return SUCCESS; 1906 } 1907 } 1908 1909 static void eh_lock_door_done(struct request *req, blk_status_t status) 1910 { 1911 __blk_put_request(req->q, req); 1912 } 1913 1914 /** 1915 * scsi_eh_lock_door - Prevent medium removal for the specified device 1916 * @sdev: SCSI device to prevent medium removal 1917 * 1918 * Locking: 1919 * We must be called from process context. 1920 * 1921 * Notes: 1922 * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the 1923 * head of the devices request queue, and continue. 1924 */ 1925 static void scsi_eh_lock_door(struct scsi_device *sdev) 1926 { 1927 struct request *req; 1928 struct scsi_request *rq; 1929 1930 /* 1931 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a 1932 * request becomes available 1933 */ 1934 req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL); 1935 if (IS_ERR(req)) 1936 return; 1937 rq = scsi_req(req); 1938 1939 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL; 1940 rq->cmd[1] = 0; 1941 rq->cmd[2] = 0; 1942 rq->cmd[3] = 0; 1943 rq->cmd[4] = SCSI_REMOVAL_PREVENT; 1944 rq->cmd[5] = 0; 1945 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); 1946 1947 req->rq_flags |= RQF_QUIET; 1948 req->timeout = 10 * HZ; 1949 rq->retries = 5; 1950 1951 blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done); 1952 } 1953 1954 /** 1955 * scsi_restart_operations - restart io operations to the specified host. 1956 * @shost: Host we are restarting. 1957 * 1958 * Notes: 1959 * When we entered the error handler, we blocked all further i/o to 1960 * this device. we need to 'reverse' this process. 1961 */ 1962 static void scsi_restart_operations(struct Scsi_Host *shost) 1963 { 1964 struct scsi_device *sdev; 1965 unsigned long flags; 1966 1967 /* 1968 * If the door was locked, we need to insert a door lock request 1969 * onto the head of the SCSI request queue for the device. There 1970 * is no point trying to lock the door of an off-line device. 1971 */ 1972 shost_for_each_device(sdev, shost) { 1973 if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) { 1974 scsi_eh_lock_door(sdev); 1975 sdev->was_reset = 0; 1976 } 1977 } 1978 1979 /* 1980 * next free up anything directly waiting upon the host. this 1981 * will be requests for character device operations, and also for 1982 * ioctls to queued block devices. 1983 */ 1984 SCSI_LOG_ERROR_RECOVERY(3, 1985 shost_printk(KERN_INFO, shost, "waking up host to restart\n")); 1986 1987 spin_lock_irqsave(shost->host_lock, flags); 1988 if (scsi_host_set_state(shost, SHOST_RUNNING)) 1989 if (scsi_host_set_state(shost, SHOST_CANCEL)) 1990 BUG_ON(scsi_host_set_state(shost, SHOST_DEL)); 1991 spin_unlock_irqrestore(shost->host_lock, flags); 1992 1993 wake_up(&shost->host_wait); 1994 1995 /* 1996 * finally we need to re-initiate requests that may be pending. we will 1997 * have had everything blocked while error handling is taking place, and 1998 * now that error recovery is done, we will need to ensure that these 1999 * requests are started. 2000 */ 2001 scsi_run_host_queues(shost); 2002 2003 /* 2004 * if eh is active and host_eh_scheduled is pending we need to re-run 2005 * recovery. we do this check after scsi_run_host_queues() to allow 2006 * everything pent up since the last eh run a chance to make forward 2007 * progress before we sync again. Either we'll immediately re-run 2008 * recovery or scsi_device_unbusy() will wake us again when these 2009 * pending commands complete. 2010 */ 2011 spin_lock_irqsave(shost->host_lock, flags); 2012 if (shost->host_eh_scheduled) 2013 if (scsi_host_set_state(shost, SHOST_RECOVERY)) 2014 WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)); 2015 spin_unlock_irqrestore(shost->host_lock, flags); 2016 } 2017 2018 /** 2019 * scsi_eh_ready_devs - check device ready state and recover if not. 2020 * @shost: host to be recovered. 2021 * @work_q: &list_head for pending commands. 2022 * @done_q: &list_head for processed commands. 2023 */ 2024 void scsi_eh_ready_devs(struct Scsi_Host *shost, 2025 struct list_head *work_q, 2026 struct list_head *done_q) 2027 { 2028 if (!scsi_eh_stu(shost, work_q, done_q)) 2029 if (!scsi_eh_bus_device_reset(shost, work_q, done_q)) 2030 if (!scsi_eh_target_reset(shost, work_q, done_q)) 2031 if (!scsi_eh_bus_reset(shost, work_q, done_q)) 2032 if (!scsi_eh_host_reset(shost, work_q, done_q)) 2033 scsi_eh_offline_sdevs(work_q, 2034 done_q); 2035 } 2036 EXPORT_SYMBOL_GPL(scsi_eh_ready_devs); 2037 2038 /** 2039 * scsi_eh_flush_done_q - finish processed commands or retry them. 2040 * @done_q: list_head of processed commands. 2041 */ 2042 void scsi_eh_flush_done_q(struct list_head *done_q) 2043 { 2044 struct scsi_cmnd *scmd, *next; 2045 2046 list_for_each_entry_safe(scmd, next, done_q, eh_entry) { 2047 list_del_init(&scmd->eh_entry); 2048 if (scsi_device_online(scmd->device) && 2049 !scsi_noretry_cmd(scmd) && 2050 (++scmd->retries <= scmd->allowed)) { 2051 SCSI_LOG_ERROR_RECOVERY(3, 2052 scmd_printk(KERN_INFO, scmd, 2053 "%s: flush retry cmd\n", 2054 current->comm)); 2055 scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); 2056 } else { 2057 /* 2058 * If just we got sense for the device (called 2059 * scsi_eh_get_sense), scmd->result is already 2060 * set, do not set DRIVER_TIMEOUT. 2061 */ 2062 if (!scmd->result) 2063 scmd->result |= (DRIVER_TIMEOUT << 24); 2064 SCSI_LOG_ERROR_RECOVERY(3, 2065 scmd_printk(KERN_INFO, scmd, 2066 "%s: flush finish cmd\n", 2067 current->comm)); 2068 scsi_finish_command(scmd); 2069 } 2070 } 2071 } 2072 EXPORT_SYMBOL(scsi_eh_flush_done_q); 2073 2074 /** 2075 * scsi_unjam_host - Attempt to fix a host which has a cmd that failed. 2076 * @shost: Host to unjam. 2077 * 2078 * Notes: 2079 * When we come in here, we *know* that all commands on the bus have 2080 * either completed, failed or timed out. we also know that no further 2081 * commands are being sent to the host, so things are relatively quiet 2082 * and we have freedom to fiddle with things as we wish. 2083 * 2084 * This is only the *default* implementation. it is possible for 2085 * individual drivers to supply their own version of this function, and 2086 * if the maintainer wishes to do this, it is strongly suggested that 2087 * this function be taken as a template and modified. this function 2088 * was designed to correctly handle problems for about 95% of the 2089 * different cases out there, and it should always provide at least a 2090 * reasonable amount of error recovery. 2091 * 2092 * Any command marked 'failed' or 'timeout' must eventually have 2093 * scsi_finish_cmd() called for it. we do all of the retry stuff 2094 * here, so when we restart the host after we return it should have an 2095 * empty queue. 2096 */ 2097 static void scsi_unjam_host(struct Scsi_Host *shost) 2098 { 2099 unsigned long flags; 2100 LIST_HEAD(eh_work_q); 2101 LIST_HEAD(eh_done_q); 2102 2103 spin_lock_irqsave(shost->host_lock, flags); 2104 list_splice_init(&shost->eh_cmd_q, &eh_work_q); 2105 spin_unlock_irqrestore(shost->host_lock, flags); 2106 2107 SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q)); 2108 2109 if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q)) 2110 scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); 2111 2112 spin_lock_irqsave(shost->host_lock, flags); 2113 if (shost->eh_deadline != -1) 2114 shost->last_reset = 0; 2115 spin_unlock_irqrestore(shost->host_lock, flags); 2116 scsi_eh_flush_done_q(&eh_done_q); 2117 } 2118 2119 /** 2120 * scsi_error_handler - SCSI error handler thread 2121 * @data: Host for which we are running. 2122 * 2123 * Notes: 2124 * This is the main error handling loop. This is run as a kernel thread 2125 * for every SCSI host and handles all error handling activity. 2126 */ 2127 int scsi_error_handler(void *data) 2128 { 2129 struct Scsi_Host *shost = data; 2130 2131 /* 2132 * We use TASK_INTERRUPTIBLE so that the thread is not 2133 * counted against the load average as a running process. 2134 * We never actually get interrupted because kthread_run 2135 * disables signal delivery for the created thread. 2136 */ 2137 while (true) { 2138 /* 2139 * The sequence in kthread_stop() sets the stop flag first 2140 * then wakes the process. To avoid missed wakeups, the task 2141 * should always be in a non running state before the stop 2142 * flag is checked 2143 */ 2144 set_current_state(TASK_INTERRUPTIBLE); 2145 if (kthread_should_stop()) 2146 break; 2147 2148 if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || 2149 shost->host_failed != atomic_read(&shost->host_busy)) { 2150 SCSI_LOG_ERROR_RECOVERY(1, 2151 shost_printk(KERN_INFO, shost, 2152 "scsi_eh_%d: sleeping\n", 2153 shost->host_no)); 2154 schedule(); 2155 continue; 2156 } 2157 2158 __set_current_state(TASK_RUNNING); 2159 SCSI_LOG_ERROR_RECOVERY(1, 2160 shost_printk(KERN_INFO, shost, 2161 "scsi_eh_%d: waking up %d/%d/%d\n", 2162 shost->host_no, shost->host_eh_scheduled, 2163 shost->host_failed, 2164 atomic_read(&shost->host_busy))); 2165 2166 /* 2167 * We have a host that is failing for some reason. Figure out 2168 * what we need to do to get it up and online again (if we can). 2169 * If we fail, we end up taking the thing offline. 2170 */ 2171 if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) { 2172 SCSI_LOG_ERROR_RECOVERY(1, 2173 shost_printk(KERN_ERR, shost, 2174 "scsi_eh_%d: unable to autoresume\n", 2175 shost->host_no)); 2176 continue; 2177 } 2178 2179 if (shost->transportt->eh_strategy_handler) 2180 shost->transportt->eh_strategy_handler(shost); 2181 else 2182 scsi_unjam_host(shost); 2183 2184 /* All scmds have been handled */ 2185 shost->host_failed = 0; 2186 2187 /* 2188 * Note - if the above fails completely, the action is to take 2189 * individual devices offline and flush the queue of any 2190 * outstanding requests that may have been pending. When we 2191 * restart, we restart any I/O to any other devices on the bus 2192 * which are still online. 2193 */ 2194 scsi_restart_operations(shost); 2195 if (!shost->eh_noresume) 2196 scsi_autopm_put_host(shost); 2197 } 2198 __set_current_state(TASK_RUNNING); 2199 2200 SCSI_LOG_ERROR_RECOVERY(1, 2201 shost_printk(KERN_INFO, shost, 2202 "Error handler scsi_eh_%d exiting\n", 2203 shost->host_no)); 2204 shost->ehandler = NULL; 2205 return 0; 2206 } 2207 2208 /* 2209 * Function: scsi_report_bus_reset() 2210 * 2211 * Purpose: Utility function used by low-level drivers to report that 2212 * they have observed a bus reset on the bus being handled. 2213 * 2214 * Arguments: shost - Host in question 2215 * channel - channel on which reset was observed. 2216 * 2217 * Returns: Nothing 2218 * 2219 * Lock status: Host lock must be held. 2220 * 2221 * Notes: This only needs to be called if the reset is one which 2222 * originates from an unknown location. Resets originated 2223 * by the mid-level itself don't need to call this, but there 2224 * should be no harm. 2225 * 2226 * The main purpose of this is to make sure that a CHECK_CONDITION 2227 * is properly treated. 2228 */ 2229 void scsi_report_bus_reset(struct Scsi_Host *shost, int channel) 2230 { 2231 struct scsi_device *sdev; 2232 2233 __shost_for_each_device(sdev, shost) { 2234 if (channel == sdev_channel(sdev)) 2235 __scsi_report_device_reset(sdev, NULL); 2236 } 2237 } 2238 EXPORT_SYMBOL(scsi_report_bus_reset); 2239 2240 /* 2241 * Function: scsi_report_device_reset() 2242 * 2243 * Purpose: Utility function used by low-level drivers to report that 2244 * they have observed a device reset on the device being handled. 2245 * 2246 * Arguments: shost - Host in question 2247 * channel - channel on which reset was observed 2248 * target - target on which reset was observed 2249 * 2250 * Returns: Nothing 2251 * 2252 * Lock status: Host lock must be held 2253 * 2254 * Notes: This only needs to be called if the reset is one which 2255 * originates from an unknown location. Resets originated 2256 * by the mid-level itself don't need to call this, but there 2257 * should be no harm. 2258 * 2259 * The main purpose of this is to make sure that a CHECK_CONDITION 2260 * is properly treated. 2261 */ 2262 void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target) 2263 { 2264 struct scsi_device *sdev; 2265 2266 __shost_for_each_device(sdev, shost) { 2267 if (channel == sdev_channel(sdev) && 2268 target == sdev_id(sdev)) 2269 __scsi_report_device_reset(sdev, NULL); 2270 } 2271 } 2272 EXPORT_SYMBOL(scsi_report_device_reset); 2273 2274 static void 2275 scsi_reset_provider_done_command(struct scsi_cmnd *scmd) 2276 { 2277 } 2278 2279 /** 2280 * scsi_ioctl_reset: explicitly reset a host/bus/target/device 2281 * @dev: scsi_device to operate on 2282 * @arg: reset type (see sg.h) 2283 */ 2284 int 2285 scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) 2286 { 2287 struct scsi_cmnd *scmd; 2288 struct Scsi_Host *shost = dev->host; 2289 struct request *rq; 2290 unsigned long flags; 2291 int error = 0, rtn, val; 2292 2293 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2294 return -EACCES; 2295 2296 error = get_user(val, arg); 2297 if (error) 2298 return error; 2299 2300 if (scsi_autopm_get_host(shost) < 0) 2301 return -EIO; 2302 2303 error = -EIO; 2304 rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + 2305 shost->hostt->cmd_size, GFP_KERNEL); 2306 if (!rq) 2307 goto out_put_autopm_host; 2308 blk_rq_init(NULL, rq); 2309 2310 scmd = (struct scsi_cmnd *)(rq + 1); 2311 scsi_init_command(dev, scmd); 2312 scmd->request = rq; 2313 scmd->cmnd = scsi_req(rq)->cmd; 2314 2315 scmd->scsi_done = scsi_reset_provider_done_command; 2316 memset(&scmd->sdb, 0, sizeof(scmd->sdb)); 2317 2318 scmd->cmd_len = 0; 2319 2320 scmd->sc_data_direction = DMA_BIDIRECTIONAL; 2321 2322 spin_lock_irqsave(shost->host_lock, flags); 2323 shost->tmf_in_progress = 1; 2324 spin_unlock_irqrestore(shost->host_lock, flags); 2325 2326 switch (val & ~SG_SCSI_RESET_NO_ESCALATE) { 2327 case SG_SCSI_RESET_NOTHING: 2328 rtn = SUCCESS; 2329 break; 2330 case SG_SCSI_RESET_DEVICE: 2331 rtn = scsi_try_bus_device_reset(scmd); 2332 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) 2333 break; 2334 /* FALLTHROUGH */ 2335 case SG_SCSI_RESET_TARGET: 2336 rtn = scsi_try_target_reset(scmd); 2337 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) 2338 break; 2339 /* FALLTHROUGH */ 2340 case SG_SCSI_RESET_BUS: 2341 rtn = scsi_try_bus_reset(scmd); 2342 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) 2343 break; 2344 /* FALLTHROUGH */ 2345 case SG_SCSI_RESET_HOST: 2346 rtn = scsi_try_host_reset(scmd); 2347 if (rtn == SUCCESS) 2348 break; 2349 /* FALLTHROUGH */ 2350 default: 2351 rtn = FAILED; 2352 break; 2353 } 2354 2355 error = (rtn == SUCCESS) ? 0 : -EIO; 2356 2357 spin_lock_irqsave(shost->host_lock, flags); 2358 shost->tmf_in_progress = 0; 2359 spin_unlock_irqrestore(shost->host_lock, flags); 2360 2361 /* 2362 * be sure to wake up anyone who was sleeping or had their queue 2363 * suspended while we performed the TMF. 2364 */ 2365 SCSI_LOG_ERROR_RECOVERY(3, 2366 shost_printk(KERN_INFO, shost, 2367 "waking up host to restart after TMF\n")); 2368 2369 wake_up(&shost->host_wait); 2370 scsi_run_host_queues(shost); 2371 2372 scsi_put_command(scmd); 2373 kfree(rq); 2374 2375 out_put_autopm_host: 2376 scsi_autopm_put_host(shost); 2377 return error; 2378 } 2379 EXPORT_SYMBOL(scsi_ioctl_reset); 2380 2381 bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd, 2382 struct scsi_sense_hdr *sshdr) 2383 { 2384 return scsi_normalize_sense(cmd->sense_buffer, 2385 SCSI_SENSE_BUFFERSIZE, sshdr); 2386 } 2387 EXPORT_SYMBOL(scsi_command_normalize_sense); 2388 2389 /** 2390 * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format) 2391 * @sense_buffer: byte array of sense data 2392 * @sb_len: number of valid bytes in sense_buffer 2393 * @info_out: pointer to 64 integer where 8 or 4 byte information 2394 * field will be placed if found. 2395 * 2396 * Return value: 2397 * true if information field found, false if not found. 2398 */ 2399 bool scsi_get_sense_info_fld(const u8 *sense_buffer, int sb_len, 2400 u64 *info_out) 2401 { 2402 const u8 * ucp; 2403 2404 if (sb_len < 7) 2405 return false; 2406 switch (sense_buffer[0] & 0x7f) { 2407 case 0x70: 2408 case 0x71: 2409 if (sense_buffer[0] & 0x80) { 2410 *info_out = get_unaligned_be32(&sense_buffer[3]); 2411 return true; 2412 } 2413 return false; 2414 case 0x72: 2415 case 0x73: 2416 ucp = scsi_sense_desc_find(sense_buffer, sb_len, 2417 0 /* info desc */); 2418 if (ucp && (0xa == ucp[1])) { 2419 *info_out = get_unaligned_be64(&ucp[4]); 2420 return true; 2421 } 2422 return false; 2423 default: 2424 return false; 2425 } 2426 } 2427 EXPORT_SYMBOL(scsi_get_sense_info_fld); 2428