1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 static void __ata_port_freeze(struct ata_port *ap); 54 static void ata_eh_finish(struct ata_port *ap); 55 #ifdef CONFIG_PM 56 static void ata_eh_handle_port_suspend(struct ata_port *ap); 57 static void ata_eh_handle_port_resume(struct ata_port *ap); 58 static int ata_eh_suspend(struct ata_port *ap, 59 struct ata_device **r_failed_dev); 60 static void ata_eh_prep_resume(struct ata_port *ap); 61 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev); 62 #else /* CONFIG_PM */ 63 static void ata_eh_handle_port_suspend(struct ata_port *ap) 64 { } 65 66 static void ata_eh_handle_port_resume(struct ata_port *ap) 67 { } 68 69 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 70 { 71 return 0; 72 } 73 74 static void ata_eh_prep_resume(struct ata_port *ap) 75 { } 76 77 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 78 { 79 return 0; 80 } 81 #endif /* CONFIG_PM */ 82 83 static void ata_ering_record(struct ata_ering *ering, int is_io, 84 unsigned int err_mask) 85 { 86 struct ata_ering_entry *ent; 87 88 WARN_ON(!err_mask); 89 90 ering->cursor++; 91 ering->cursor %= ATA_ERING_SIZE; 92 93 ent = &ering->ring[ering->cursor]; 94 ent->is_io = is_io; 95 ent->err_mask = err_mask; 96 ent->timestamp = get_jiffies_64(); 97 } 98 99 static void ata_ering_clear(struct ata_ering *ering) 100 { 101 memset(ering, 0, sizeof(*ering)); 102 } 103 104 static int ata_ering_map(struct ata_ering *ering, 105 int (*map_fn)(struct ata_ering_entry *, void *), 106 void *arg) 107 { 108 int idx, rc = 0; 109 struct ata_ering_entry *ent; 110 111 idx = ering->cursor; 112 do { 113 ent = &ering->ring[idx]; 114 if (!ent->err_mask) 115 break; 116 rc = map_fn(ent, arg); 117 if (rc) 118 break; 119 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 120 } while (idx != ering->cursor); 121 122 return rc; 123 } 124 125 static unsigned int ata_eh_dev_action(struct ata_device *dev) 126 { 127 struct ata_eh_context *ehc = &dev->ap->eh_context; 128 129 return ehc->i.action | ehc->i.dev_action[dev->devno]; 130 } 131 132 static void ata_eh_clear_action(struct ata_device *dev, 133 struct ata_eh_info *ehi, unsigned int action) 134 { 135 int i; 136 137 if (!dev) { 138 ehi->action &= ~action; 139 for (i = 0; i < ATA_MAX_DEVICES; i++) 140 ehi->dev_action[i] &= ~action; 141 } else { 142 /* doesn't make sense for port-wide EH actions */ 143 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 144 145 /* break ehi->action into ehi->dev_action */ 146 if (ehi->action & action) { 147 for (i = 0; i < ATA_MAX_DEVICES; i++) 148 ehi->dev_action[i] |= ehi->action & action; 149 ehi->action &= ~action; 150 } 151 152 /* turn off the specified per-dev action */ 153 ehi->dev_action[dev->devno] &= ~action; 154 } 155 } 156 157 /** 158 * ata_scsi_timed_out - SCSI layer time out callback 159 * @cmd: timed out SCSI command 160 * 161 * Handles SCSI layer timeout. We race with normal completion of 162 * the qc for @cmd. If the qc is already gone, we lose and let 163 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 164 * timed out and EH should be invoked. Prevent ata_qc_complete() 165 * from finishing it by setting EH_SCHEDULED and return 166 * EH_NOT_HANDLED. 167 * 168 * TODO: kill this function once old EH is gone. 169 * 170 * LOCKING: 171 * Called from timer context 172 * 173 * RETURNS: 174 * EH_HANDLED or EH_NOT_HANDLED 175 */ 176 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 177 { 178 struct Scsi_Host *host = cmd->device->host; 179 struct ata_port *ap = ata_shost_to_port(host); 180 unsigned long flags; 181 struct ata_queued_cmd *qc; 182 enum scsi_eh_timer_return ret; 183 184 DPRINTK("ENTER\n"); 185 186 if (ap->ops->error_handler) { 187 ret = EH_NOT_HANDLED; 188 goto out; 189 } 190 191 ret = EH_HANDLED; 192 spin_lock_irqsave(ap->lock, flags); 193 qc = ata_qc_from_tag(ap, ap->active_tag); 194 if (qc) { 195 WARN_ON(qc->scsicmd != cmd); 196 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 197 qc->err_mask |= AC_ERR_TIMEOUT; 198 ret = EH_NOT_HANDLED; 199 } 200 spin_unlock_irqrestore(ap->lock, flags); 201 202 out: 203 DPRINTK("EXIT, ret=%d\n", ret); 204 return ret; 205 } 206 207 /** 208 * ata_scsi_error - SCSI layer error handler callback 209 * @host: SCSI host on which error occurred 210 * 211 * Handles SCSI-layer-thrown error events. 212 * 213 * LOCKING: 214 * Inherited from SCSI layer (none, can sleep) 215 * 216 * RETURNS: 217 * Zero. 218 */ 219 void ata_scsi_error(struct Scsi_Host *host) 220 { 221 struct ata_port *ap = ata_shost_to_port(host); 222 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 223 unsigned long flags; 224 225 DPRINTK("ENTER\n"); 226 227 /* synchronize with port task */ 228 ata_port_flush_task(ap); 229 230 /* synchronize with host lock and sort out timeouts */ 231 232 /* For new EH, all qcs are finished in one of three ways - 233 * normal completion, error completion, and SCSI timeout. 234 * Both cmpletions can race against SCSI timeout. When normal 235 * completion wins, the qc never reaches EH. When error 236 * completion wins, the qc has ATA_QCFLAG_FAILED set. 237 * 238 * When SCSI timeout wins, things are a bit more complex. 239 * Normal or error completion can occur after the timeout but 240 * before this point. In such cases, both types of 241 * completions are honored. A scmd is determined to have 242 * timed out iff its associated qc is active and not failed. 243 */ 244 if (ap->ops->error_handler) { 245 struct scsi_cmnd *scmd, *tmp; 246 int nr_timedout = 0; 247 248 spin_lock_irqsave(ap->lock, flags); 249 250 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 251 struct ata_queued_cmd *qc; 252 253 for (i = 0; i < ATA_MAX_QUEUE; i++) { 254 qc = __ata_qc_from_tag(ap, i); 255 if (qc->flags & ATA_QCFLAG_ACTIVE && 256 qc->scsicmd == scmd) 257 break; 258 } 259 260 if (i < ATA_MAX_QUEUE) { 261 /* the scmd has an associated qc */ 262 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 263 /* which hasn't failed yet, timeout */ 264 qc->err_mask |= AC_ERR_TIMEOUT; 265 qc->flags |= ATA_QCFLAG_FAILED; 266 nr_timedout++; 267 } 268 } else { 269 /* Normal completion occurred after 270 * SCSI timeout but before this point. 271 * Successfully complete it. 272 */ 273 scmd->retries = scmd->allowed; 274 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 275 } 276 } 277 278 /* If we have timed out qcs. They belong to EH from 279 * this point but the state of the controller is 280 * unknown. Freeze the port to make sure the IRQ 281 * handler doesn't diddle with those qcs. This must 282 * be done atomically w.r.t. setting QCFLAG_FAILED. 283 */ 284 if (nr_timedout) 285 __ata_port_freeze(ap); 286 287 spin_unlock_irqrestore(ap->lock, flags); 288 } else 289 spin_unlock_wait(ap->lock); 290 291 repeat: 292 /* invoke error handler */ 293 if (ap->ops->error_handler) { 294 /* process port resume request */ 295 ata_eh_handle_port_resume(ap); 296 297 /* fetch & clear EH info */ 298 spin_lock_irqsave(ap->lock, flags); 299 300 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 301 ap->eh_context.i = ap->eh_info; 302 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 303 304 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 305 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 306 307 spin_unlock_irqrestore(ap->lock, flags); 308 309 /* invoke EH, skip if unloading or suspended */ 310 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 311 ap->ops->error_handler(ap); 312 else 313 ata_eh_finish(ap); 314 315 /* process port suspend request */ 316 ata_eh_handle_port_suspend(ap); 317 318 /* Exception might have happend after ->error_handler 319 * recovered the port but before this point. Repeat 320 * EH in such case. 321 */ 322 spin_lock_irqsave(ap->lock, flags); 323 324 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 325 if (--repeat_cnt) { 326 ata_port_printk(ap, KERN_INFO, 327 "EH pending after completion, " 328 "repeating EH (cnt=%d)\n", repeat_cnt); 329 spin_unlock_irqrestore(ap->lock, flags); 330 goto repeat; 331 } 332 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 333 "tries, giving up\n", ATA_EH_MAX_REPEAT); 334 } 335 336 /* this run is complete, make sure EH info is clear */ 337 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 338 339 /* Clear host_eh_scheduled while holding ap->lock such 340 * that if exception occurs after this point but 341 * before EH completion, SCSI midlayer will 342 * re-initiate EH. 343 */ 344 host->host_eh_scheduled = 0; 345 346 spin_unlock_irqrestore(ap->lock, flags); 347 } else { 348 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 349 ap->ops->eng_timeout(ap); 350 } 351 352 /* finish or retry handled scmd's and clean up */ 353 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 354 355 scsi_eh_flush_done_q(&ap->eh_done_q); 356 357 /* clean up */ 358 spin_lock_irqsave(ap->lock, flags); 359 360 if (ap->pflags & ATA_PFLAG_LOADING) 361 ap->pflags &= ~ATA_PFLAG_LOADING; 362 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 363 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 364 365 if (ap->pflags & ATA_PFLAG_RECOVERED) 366 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 367 368 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 369 370 /* tell wait_eh that we're done */ 371 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 372 wake_up_all(&ap->eh_wait_q); 373 374 spin_unlock_irqrestore(ap->lock, flags); 375 376 DPRINTK("EXIT\n"); 377 } 378 379 /** 380 * ata_port_wait_eh - Wait for the currently pending EH to complete 381 * @ap: Port to wait EH for 382 * 383 * Wait until the currently pending EH is complete. 384 * 385 * LOCKING: 386 * Kernel thread context (may sleep). 387 */ 388 void ata_port_wait_eh(struct ata_port *ap) 389 { 390 unsigned long flags; 391 DEFINE_WAIT(wait); 392 393 retry: 394 spin_lock_irqsave(ap->lock, flags); 395 396 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 397 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 398 spin_unlock_irqrestore(ap->lock, flags); 399 schedule(); 400 spin_lock_irqsave(ap->lock, flags); 401 } 402 finish_wait(&ap->eh_wait_q, &wait); 403 404 spin_unlock_irqrestore(ap->lock, flags); 405 406 /* make sure SCSI EH is complete */ 407 if (scsi_host_in_recovery(ap->scsi_host)) { 408 msleep(10); 409 goto retry; 410 } 411 } 412 413 /** 414 * ata_qc_timeout - Handle timeout of queued command 415 * @qc: Command that timed out 416 * 417 * Some part of the kernel (currently, only the SCSI layer) 418 * has noticed that the active command on port @ap has not 419 * completed after a specified length of time. Handle this 420 * condition by disabling DMA (if necessary) and completing 421 * transactions, with error if necessary. 422 * 423 * This also handles the case of the "lost interrupt", where 424 * for some reason (possibly hardware bug, possibly driver bug) 425 * an interrupt was not delivered to the driver, even though the 426 * transaction completed successfully. 427 * 428 * TODO: kill this function once old EH is gone. 429 * 430 * LOCKING: 431 * Inherited from SCSI layer (none, can sleep) 432 */ 433 static void ata_qc_timeout(struct ata_queued_cmd *qc) 434 { 435 struct ata_port *ap = qc->ap; 436 u8 host_stat = 0, drv_stat; 437 unsigned long flags; 438 439 DPRINTK("ENTER\n"); 440 441 ap->hsm_task_state = HSM_ST_IDLE; 442 443 spin_lock_irqsave(ap->lock, flags); 444 445 switch (qc->tf.protocol) { 446 447 case ATA_PROT_DMA: 448 case ATA_PROT_ATAPI_DMA: 449 host_stat = ap->ops->bmdma_status(ap); 450 451 /* before we do anything else, clear DMA-Start bit */ 452 ap->ops->bmdma_stop(qc); 453 454 /* fall through */ 455 456 default: 457 ata_altstatus(ap); 458 drv_stat = ata_chk_status(ap); 459 460 /* ack bmdma irq events */ 461 ap->ops->irq_clear(ap); 462 463 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 464 "stat 0x%x host_stat 0x%x\n", 465 qc->tf.command, drv_stat, host_stat); 466 467 /* complete taskfile transaction */ 468 qc->err_mask |= AC_ERR_TIMEOUT; 469 break; 470 } 471 472 spin_unlock_irqrestore(ap->lock, flags); 473 474 ata_eh_qc_complete(qc); 475 476 DPRINTK("EXIT\n"); 477 } 478 479 /** 480 * ata_eng_timeout - Handle timeout of queued command 481 * @ap: Port on which timed-out command is active 482 * 483 * Some part of the kernel (currently, only the SCSI layer) 484 * has noticed that the active command on port @ap has not 485 * completed after a specified length of time. Handle this 486 * condition by disabling DMA (if necessary) and completing 487 * transactions, with error if necessary. 488 * 489 * This also handles the case of the "lost interrupt", where 490 * for some reason (possibly hardware bug, possibly driver bug) 491 * an interrupt was not delivered to the driver, even though the 492 * transaction completed successfully. 493 * 494 * TODO: kill this function once old EH is gone. 495 * 496 * LOCKING: 497 * Inherited from SCSI layer (none, can sleep) 498 */ 499 void ata_eng_timeout(struct ata_port *ap) 500 { 501 DPRINTK("ENTER\n"); 502 503 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 504 505 DPRINTK("EXIT\n"); 506 } 507 508 /** 509 * ata_qc_schedule_eh - schedule qc for error handling 510 * @qc: command to schedule error handling for 511 * 512 * Schedule error handling for @qc. EH will kick in as soon as 513 * other commands are drained. 514 * 515 * LOCKING: 516 * spin_lock_irqsave(host lock) 517 */ 518 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 519 { 520 struct ata_port *ap = qc->ap; 521 522 WARN_ON(!ap->ops->error_handler); 523 524 qc->flags |= ATA_QCFLAG_FAILED; 525 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 526 527 /* The following will fail if timeout has already expired. 528 * ata_scsi_error() takes care of such scmds on EH entry. 529 * Note that ATA_QCFLAG_FAILED is unconditionally set after 530 * this function completes. 531 */ 532 scsi_req_abort_cmd(qc->scsicmd); 533 } 534 535 /** 536 * ata_port_schedule_eh - schedule error handling without a qc 537 * @ap: ATA port to schedule EH for 538 * 539 * Schedule error handling for @ap. EH will kick in as soon as 540 * all commands are drained. 541 * 542 * LOCKING: 543 * spin_lock_irqsave(host lock) 544 */ 545 void ata_port_schedule_eh(struct ata_port *ap) 546 { 547 WARN_ON(!ap->ops->error_handler); 548 549 ap->pflags |= ATA_PFLAG_EH_PENDING; 550 scsi_schedule_eh(ap->scsi_host); 551 552 DPRINTK("port EH scheduled\n"); 553 } 554 555 /** 556 * ata_port_abort - abort all qc's on the port 557 * @ap: ATA port to abort qc's for 558 * 559 * Abort all active qc's of @ap and schedule EH. 560 * 561 * LOCKING: 562 * spin_lock_irqsave(host lock) 563 * 564 * RETURNS: 565 * Number of aborted qc's. 566 */ 567 int ata_port_abort(struct ata_port *ap) 568 { 569 int tag, nr_aborted = 0; 570 571 WARN_ON(!ap->ops->error_handler); 572 573 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 574 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 575 576 if (qc) { 577 qc->flags |= ATA_QCFLAG_FAILED; 578 ata_qc_complete(qc); 579 nr_aborted++; 580 } 581 } 582 583 if (!nr_aborted) 584 ata_port_schedule_eh(ap); 585 586 return nr_aborted; 587 } 588 589 /** 590 * __ata_port_freeze - freeze port 591 * @ap: ATA port to freeze 592 * 593 * This function is called when HSM violation or some other 594 * condition disrupts normal operation of the port. Frozen port 595 * is not allowed to perform any operation until the port is 596 * thawed, which usually follows a successful reset. 597 * 598 * ap->ops->freeze() callback can be used for freezing the port 599 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 600 * port cannot be frozen hardware-wise, the interrupt handler 601 * must ack and clear interrupts unconditionally while the port 602 * is frozen. 603 * 604 * LOCKING: 605 * spin_lock_irqsave(host lock) 606 */ 607 static void __ata_port_freeze(struct ata_port *ap) 608 { 609 WARN_ON(!ap->ops->error_handler); 610 611 if (ap->ops->freeze) 612 ap->ops->freeze(ap); 613 614 ap->pflags |= ATA_PFLAG_FROZEN; 615 616 DPRINTK("ata%u port frozen\n", ap->print_id); 617 } 618 619 /** 620 * ata_port_freeze - abort & freeze port 621 * @ap: ATA port to freeze 622 * 623 * Abort and freeze @ap. 624 * 625 * LOCKING: 626 * spin_lock_irqsave(host lock) 627 * 628 * RETURNS: 629 * Number of aborted commands. 630 */ 631 int ata_port_freeze(struct ata_port *ap) 632 { 633 int nr_aborted; 634 635 WARN_ON(!ap->ops->error_handler); 636 637 nr_aborted = ata_port_abort(ap); 638 __ata_port_freeze(ap); 639 640 return nr_aborted; 641 } 642 643 /** 644 * ata_eh_freeze_port - EH helper to freeze port 645 * @ap: ATA port to freeze 646 * 647 * Freeze @ap. 648 * 649 * LOCKING: 650 * None. 651 */ 652 void ata_eh_freeze_port(struct ata_port *ap) 653 { 654 unsigned long flags; 655 656 if (!ap->ops->error_handler) 657 return; 658 659 spin_lock_irqsave(ap->lock, flags); 660 __ata_port_freeze(ap); 661 spin_unlock_irqrestore(ap->lock, flags); 662 } 663 664 /** 665 * ata_port_thaw_port - EH helper to thaw port 666 * @ap: ATA port to thaw 667 * 668 * Thaw frozen port @ap. 669 * 670 * LOCKING: 671 * None. 672 */ 673 void ata_eh_thaw_port(struct ata_port *ap) 674 { 675 unsigned long flags; 676 677 if (!ap->ops->error_handler) 678 return; 679 680 spin_lock_irqsave(ap->lock, flags); 681 682 ap->pflags &= ~ATA_PFLAG_FROZEN; 683 684 if (ap->ops->thaw) 685 ap->ops->thaw(ap); 686 687 spin_unlock_irqrestore(ap->lock, flags); 688 689 DPRINTK("ata%u port thawed\n", ap->print_id); 690 } 691 692 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 693 { 694 /* nada */ 695 } 696 697 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 698 { 699 struct ata_port *ap = qc->ap; 700 struct scsi_cmnd *scmd = qc->scsicmd; 701 unsigned long flags; 702 703 spin_lock_irqsave(ap->lock, flags); 704 qc->scsidone = ata_eh_scsidone; 705 __ata_qc_complete(qc); 706 WARN_ON(ata_tag_valid(qc->tag)); 707 spin_unlock_irqrestore(ap->lock, flags); 708 709 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 710 } 711 712 /** 713 * ata_eh_qc_complete - Complete an active ATA command from EH 714 * @qc: Command to complete 715 * 716 * Indicate to the mid and upper layers that an ATA command has 717 * completed. To be used from EH. 718 */ 719 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 720 { 721 struct scsi_cmnd *scmd = qc->scsicmd; 722 scmd->retries = scmd->allowed; 723 __ata_eh_qc_complete(qc); 724 } 725 726 /** 727 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 728 * @qc: Command to retry 729 * 730 * Indicate to the mid and upper layers that an ATA command 731 * should be retried. To be used from EH. 732 * 733 * SCSI midlayer limits the number of retries to scmd->allowed. 734 * scmd->retries is decremented for commands which get retried 735 * due to unrelated failures (qc->err_mask is zero). 736 */ 737 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 738 { 739 struct scsi_cmnd *scmd = qc->scsicmd; 740 if (!qc->err_mask && scmd->retries) 741 scmd->retries--; 742 __ata_eh_qc_complete(qc); 743 } 744 745 /** 746 * ata_eh_detach_dev - detach ATA device 747 * @dev: ATA device to detach 748 * 749 * Detach @dev. 750 * 751 * LOCKING: 752 * None. 753 */ 754 static void ata_eh_detach_dev(struct ata_device *dev) 755 { 756 struct ata_port *ap = dev->ap; 757 unsigned long flags; 758 759 ata_dev_disable(dev); 760 761 spin_lock_irqsave(ap->lock, flags); 762 763 dev->flags &= ~ATA_DFLAG_DETACH; 764 765 if (ata_scsi_offline_dev(dev)) { 766 dev->flags |= ATA_DFLAG_DETACHED; 767 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 768 } 769 770 /* clear per-dev EH actions */ 771 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 772 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 773 774 spin_unlock_irqrestore(ap->lock, flags); 775 } 776 777 /** 778 * ata_eh_about_to_do - about to perform eh_action 779 * @ap: target ATA port 780 * @dev: target ATA dev for per-dev action (can be NULL) 781 * @action: action about to be performed 782 * 783 * Called just before performing EH actions to clear related bits 784 * in @ap->eh_info such that eh actions are not unnecessarily 785 * repeated. 786 * 787 * LOCKING: 788 * None. 789 */ 790 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 791 unsigned int action) 792 { 793 unsigned long flags; 794 struct ata_eh_info *ehi = &ap->eh_info; 795 struct ata_eh_context *ehc = &ap->eh_context; 796 797 spin_lock_irqsave(ap->lock, flags); 798 799 /* Reset is represented by combination of actions and EHI 800 * flags. Suck in all related bits before clearing eh_info to 801 * avoid losing requested action. 802 */ 803 if (action & ATA_EH_RESET_MASK) { 804 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 805 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 806 807 /* make sure all reset actions are cleared & clear EHI flags */ 808 action |= ATA_EH_RESET_MASK; 809 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 810 } 811 812 ata_eh_clear_action(dev, ehi, action); 813 814 if (!(ehc->i.flags & ATA_EHI_QUIET)) 815 ap->pflags |= ATA_PFLAG_RECOVERED; 816 817 spin_unlock_irqrestore(ap->lock, flags); 818 } 819 820 /** 821 * ata_eh_done - EH action complete 822 * @ap: target ATA port 823 * @dev: target ATA dev for per-dev action (can be NULL) 824 * @action: action just completed 825 * 826 * Called right after performing EH actions to clear related bits 827 * in @ap->eh_context. 828 * 829 * LOCKING: 830 * None. 831 */ 832 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 833 unsigned int action) 834 { 835 /* if reset is complete, clear all reset actions & reset modifier */ 836 if (action & ATA_EH_RESET_MASK) { 837 action |= ATA_EH_RESET_MASK; 838 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 839 } 840 841 ata_eh_clear_action(dev, &ap->eh_context.i, action); 842 } 843 844 /** 845 * ata_err_string - convert err_mask to descriptive string 846 * @err_mask: error mask to convert to string 847 * 848 * Convert @err_mask to descriptive string. Errors are 849 * prioritized according to severity and only the most severe 850 * error is reported. 851 * 852 * LOCKING: 853 * None. 854 * 855 * RETURNS: 856 * Descriptive string for @err_mask 857 */ 858 static const char * ata_err_string(unsigned int err_mask) 859 { 860 if (err_mask & AC_ERR_HOST_BUS) 861 return "host bus error"; 862 if (err_mask & AC_ERR_ATA_BUS) 863 return "ATA bus error"; 864 if (err_mask & AC_ERR_TIMEOUT) 865 return "timeout"; 866 if (err_mask & AC_ERR_HSM) 867 return "HSM violation"; 868 if (err_mask & AC_ERR_SYSTEM) 869 return "internal error"; 870 if (err_mask & AC_ERR_MEDIA) 871 return "media error"; 872 if (err_mask & AC_ERR_INVALID) 873 return "invalid argument"; 874 if (err_mask & AC_ERR_DEV) 875 return "device error"; 876 return "unknown error"; 877 } 878 879 /** 880 * ata_read_log_page - read a specific log page 881 * @dev: target device 882 * @page: page to read 883 * @buf: buffer to store read page 884 * @sectors: number of sectors to read 885 * 886 * Read log page using READ_LOG_EXT command. 887 * 888 * LOCKING: 889 * Kernel thread context (may sleep). 890 * 891 * RETURNS: 892 * 0 on success, AC_ERR_* mask otherwise. 893 */ 894 static unsigned int ata_read_log_page(struct ata_device *dev, 895 u8 page, void *buf, unsigned int sectors) 896 { 897 struct ata_taskfile tf; 898 unsigned int err_mask; 899 900 DPRINTK("read log page - page %d\n", page); 901 902 ata_tf_init(dev, &tf); 903 tf.command = ATA_CMD_READ_LOG_EXT; 904 tf.lbal = page; 905 tf.nsect = sectors; 906 tf.hob_nsect = sectors >> 8; 907 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 908 tf.protocol = ATA_PROT_PIO; 909 910 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 911 buf, sectors * ATA_SECT_SIZE); 912 913 DPRINTK("EXIT, err_mask=%x\n", err_mask); 914 return err_mask; 915 } 916 917 /** 918 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 919 * @dev: Device to read log page 10h from 920 * @tag: Resulting tag of the failed command 921 * @tf: Resulting taskfile registers of the failed command 922 * 923 * Read log page 10h to obtain NCQ error details and clear error 924 * condition. 925 * 926 * LOCKING: 927 * Kernel thread context (may sleep). 928 * 929 * RETURNS: 930 * 0 on success, -errno otherwise. 931 */ 932 static int ata_eh_read_log_10h(struct ata_device *dev, 933 int *tag, struct ata_taskfile *tf) 934 { 935 u8 *buf = dev->ap->sector_buf; 936 unsigned int err_mask; 937 u8 csum; 938 int i; 939 940 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 941 if (err_mask) 942 return -EIO; 943 944 csum = 0; 945 for (i = 0; i < ATA_SECT_SIZE; i++) 946 csum += buf[i]; 947 if (csum) 948 ata_dev_printk(dev, KERN_WARNING, 949 "invalid checksum 0x%x on log page 10h\n", csum); 950 951 if (buf[0] & 0x80) 952 return -ENOENT; 953 954 *tag = buf[0] & 0x1f; 955 956 tf->command = buf[2]; 957 tf->feature = buf[3]; 958 tf->lbal = buf[4]; 959 tf->lbam = buf[5]; 960 tf->lbah = buf[6]; 961 tf->device = buf[7]; 962 tf->hob_lbal = buf[8]; 963 tf->hob_lbam = buf[9]; 964 tf->hob_lbah = buf[10]; 965 tf->nsect = buf[12]; 966 tf->hob_nsect = buf[13]; 967 968 return 0; 969 } 970 971 /** 972 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 973 * @dev: device to perform REQUEST_SENSE to 974 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 975 * 976 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 977 * SENSE. This function is EH helper. 978 * 979 * LOCKING: 980 * Kernel thread context (may sleep). 981 * 982 * RETURNS: 983 * 0 on success, AC_ERR_* mask on failure 984 */ 985 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 986 unsigned char *sense_buf) 987 { 988 struct ata_port *ap = dev->ap; 989 struct ata_taskfile tf; 990 u8 cdb[ATAPI_CDB_LEN]; 991 992 DPRINTK("ATAPI request sense\n"); 993 994 ata_tf_init(dev, &tf); 995 996 /* FIXME: is this needed? */ 997 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 998 999 /* XXX: why tf_read here? */ 1000 ap->ops->tf_read(ap, &tf); 1001 1002 /* fill these in, for the case where they are -not- overwritten */ 1003 sense_buf[0] = 0x70; 1004 sense_buf[2] = tf.feature >> 4; 1005 1006 memset(cdb, 0, ATAPI_CDB_LEN); 1007 cdb[0] = REQUEST_SENSE; 1008 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1009 1010 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1011 tf.command = ATA_CMD_PACKET; 1012 1013 /* is it pointless to prefer PIO for "safety reasons"? */ 1014 if (ap->flags & ATA_FLAG_PIO_DMA) { 1015 tf.protocol = ATA_PROT_ATAPI_DMA; 1016 tf.feature |= ATAPI_PKT_DMA; 1017 } else { 1018 tf.protocol = ATA_PROT_ATAPI; 1019 tf.lbam = (8 * 1024) & 0xff; 1020 tf.lbah = (8 * 1024) >> 8; 1021 } 1022 1023 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1024 sense_buf, SCSI_SENSE_BUFFERSIZE); 1025 } 1026 1027 /** 1028 * ata_eh_analyze_serror - analyze SError for a failed port 1029 * @ap: ATA port to analyze SError for 1030 * 1031 * Analyze SError if available and further determine cause of 1032 * failure. 1033 * 1034 * LOCKING: 1035 * None. 1036 */ 1037 static void ata_eh_analyze_serror(struct ata_port *ap) 1038 { 1039 struct ata_eh_context *ehc = &ap->eh_context; 1040 u32 serror = ehc->i.serror; 1041 unsigned int err_mask = 0, action = 0; 1042 1043 if (serror & SERR_PERSISTENT) { 1044 err_mask |= AC_ERR_ATA_BUS; 1045 action |= ATA_EH_HARDRESET; 1046 } 1047 if (serror & 1048 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1049 err_mask |= AC_ERR_ATA_BUS; 1050 action |= ATA_EH_SOFTRESET; 1051 } 1052 if (serror & SERR_PROTOCOL) { 1053 err_mask |= AC_ERR_HSM; 1054 action |= ATA_EH_SOFTRESET; 1055 } 1056 if (serror & SERR_INTERNAL) { 1057 err_mask |= AC_ERR_SYSTEM; 1058 action |= ATA_EH_SOFTRESET; 1059 } 1060 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1061 ata_ehi_hotplugged(&ehc->i); 1062 1063 ehc->i.err_mask |= err_mask; 1064 ehc->i.action |= action; 1065 } 1066 1067 /** 1068 * ata_eh_analyze_ncq_error - analyze NCQ error 1069 * @ap: ATA port to analyze NCQ error for 1070 * 1071 * Read log page 10h, determine the offending qc and acquire 1072 * error status TF. For NCQ device errors, all LLDDs have to do 1073 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1074 * care of the rest. 1075 * 1076 * LOCKING: 1077 * Kernel thread context (may sleep). 1078 */ 1079 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1080 { 1081 struct ata_eh_context *ehc = &ap->eh_context; 1082 struct ata_device *dev = ap->device; 1083 struct ata_queued_cmd *qc; 1084 struct ata_taskfile tf; 1085 int tag, rc; 1086 1087 /* if frozen, we can't do much */ 1088 if (ap->pflags & ATA_PFLAG_FROZEN) 1089 return; 1090 1091 /* is it NCQ device error? */ 1092 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1093 return; 1094 1095 /* has LLDD analyzed already? */ 1096 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1097 qc = __ata_qc_from_tag(ap, tag); 1098 1099 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1100 continue; 1101 1102 if (qc->err_mask) 1103 return; 1104 } 1105 1106 /* okay, this error is ours */ 1107 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1108 if (rc) { 1109 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1110 "(errno=%d)\n", rc); 1111 return; 1112 } 1113 1114 if (!(ap->sactive & (1 << tag))) { 1115 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1116 "inactive tag %d\n", tag); 1117 return; 1118 } 1119 1120 /* we've got the perpetrator, condemn it */ 1121 qc = __ata_qc_from_tag(ap, tag); 1122 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1123 qc->err_mask |= AC_ERR_DEV; 1124 ehc->i.err_mask &= ~AC_ERR_DEV; 1125 } 1126 1127 /** 1128 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1129 * @qc: qc to analyze 1130 * @tf: Taskfile registers to analyze 1131 * 1132 * Analyze taskfile of @qc and further determine cause of 1133 * failure. This function also requests ATAPI sense data if 1134 * avaliable. 1135 * 1136 * LOCKING: 1137 * Kernel thread context (may sleep). 1138 * 1139 * RETURNS: 1140 * Determined recovery action 1141 */ 1142 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1143 const struct ata_taskfile *tf) 1144 { 1145 unsigned int tmp, action = 0; 1146 u8 stat = tf->command, err = tf->feature; 1147 1148 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1149 qc->err_mask |= AC_ERR_HSM; 1150 return ATA_EH_SOFTRESET; 1151 } 1152 1153 if (!(qc->err_mask & AC_ERR_DEV)) 1154 return 0; 1155 1156 switch (qc->dev->class) { 1157 case ATA_DEV_ATA: 1158 if (err & ATA_ICRC) 1159 qc->err_mask |= AC_ERR_ATA_BUS; 1160 if (err & ATA_UNC) 1161 qc->err_mask |= AC_ERR_MEDIA; 1162 if (err & ATA_IDNF) 1163 qc->err_mask |= AC_ERR_INVALID; 1164 break; 1165 1166 case ATA_DEV_ATAPI: 1167 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1168 tmp = atapi_eh_request_sense(qc->dev, 1169 qc->scsicmd->sense_buffer); 1170 if (!tmp) { 1171 /* ATA_QCFLAG_SENSE_VALID is used to 1172 * tell atapi_qc_complete() that sense 1173 * data is already valid. 1174 * 1175 * TODO: interpret sense data and set 1176 * appropriate err_mask. 1177 */ 1178 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1179 } else 1180 qc->err_mask |= tmp; 1181 } 1182 } 1183 1184 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1185 action |= ATA_EH_SOFTRESET; 1186 1187 return action; 1188 } 1189 1190 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1191 { 1192 if (err_mask & AC_ERR_ATA_BUS) 1193 return 1; 1194 1195 if (err_mask & AC_ERR_TIMEOUT) 1196 return 2; 1197 1198 if (is_io) { 1199 if (err_mask & AC_ERR_HSM) 1200 return 2; 1201 if ((err_mask & 1202 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1203 return 3; 1204 } 1205 1206 return 0; 1207 } 1208 1209 struct speed_down_verdict_arg { 1210 u64 since; 1211 int nr_errors[4]; 1212 }; 1213 1214 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1215 { 1216 struct speed_down_verdict_arg *arg = void_arg; 1217 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1218 1219 if (ent->timestamp < arg->since) 1220 return -1; 1221 1222 arg->nr_errors[cat]++; 1223 return 0; 1224 } 1225 1226 /** 1227 * ata_eh_speed_down_verdict - Determine speed down verdict 1228 * @dev: Device of interest 1229 * 1230 * This function examines error ring of @dev and determines 1231 * whether NCQ needs to be turned off, transfer speed should be 1232 * stepped down, or falling back to PIO is necessary. 1233 * 1234 * Cat-1 is ATA_BUS error for any command. 1235 * 1236 * Cat-2 is TIMEOUT for any command or HSM violation for known 1237 * supported commands. 1238 * 1239 * Cat-3 is is unclassified DEV error for known supported 1240 * command. 1241 * 1242 * NCQ needs to be turned off if there have been more than 3 1243 * Cat-2 + Cat-3 errors during last 10 minutes. 1244 * 1245 * Speed down is necessary if there have been more than 3 Cat-1 + 1246 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1247 * 1248 * Falling back to PIO mode is necessary if there have been more 1249 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1250 * 1251 * LOCKING: 1252 * Inherited from caller. 1253 * 1254 * RETURNS: 1255 * OR of ATA_EH_SPDN_* flags. 1256 */ 1257 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1258 { 1259 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1260 u64 j64 = get_jiffies_64(); 1261 struct speed_down_verdict_arg arg; 1262 unsigned int verdict = 0; 1263 1264 /* scan past 10 mins of error history */ 1265 memset(&arg, 0, sizeof(arg)); 1266 arg.since = j64 - min(j64, j10mins); 1267 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1268 1269 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1270 verdict |= ATA_EH_SPDN_NCQ_OFF; 1271 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1272 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1273 1274 /* scan past 3 mins of error history */ 1275 memset(&arg, 0, sizeof(arg)); 1276 arg.since = j64 - min(j64, j5mins); 1277 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1278 1279 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1280 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1281 1282 return verdict; 1283 } 1284 1285 /** 1286 * ata_eh_speed_down - record error and speed down if necessary 1287 * @dev: Failed device 1288 * @is_io: Did the device fail during normal IO? 1289 * @err_mask: err_mask of the error 1290 * 1291 * Record error and examine error history to determine whether 1292 * adjusting transmission speed is necessary. It also sets 1293 * transmission limits appropriately if such adjustment is 1294 * necessary. 1295 * 1296 * LOCKING: 1297 * Kernel thread context (may sleep). 1298 * 1299 * RETURNS: 1300 * Determined recovery action. 1301 */ 1302 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1303 unsigned int err_mask) 1304 { 1305 unsigned int verdict; 1306 unsigned int action = 0; 1307 1308 /* don't bother if Cat-0 error */ 1309 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1310 return 0; 1311 1312 /* record error and determine whether speed down is necessary */ 1313 ata_ering_record(&dev->ering, is_io, err_mask); 1314 verdict = ata_eh_speed_down_verdict(dev); 1315 1316 /* turn off NCQ? */ 1317 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1318 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1319 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1320 dev->flags |= ATA_DFLAG_NCQ_OFF; 1321 ata_dev_printk(dev, KERN_WARNING, 1322 "NCQ disabled due to excessive errors\n"); 1323 goto done; 1324 } 1325 1326 /* speed down? */ 1327 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1328 /* speed down SATA link speed if possible */ 1329 if (sata_down_spd_limit(dev->ap) == 0) { 1330 action |= ATA_EH_HARDRESET; 1331 goto done; 1332 } 1333 1334 /* lower transfer mode */ 1335 if (dev->spdn_cnt < 2) { 1336 static const int dma_dnxfer_sel[] = 1337 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1338 static const int pio_dnxfer_sel[] = 1339 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1340 int sel; 1341 1342 if (dev->xfer_shift != ATA_SHIFT_PIO) 1343 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1344 else 1345 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1346 1347 dev->spdn_cnt++; 1348 1349 if (ata_down_xfermask_limit(dev, sel) == 0) { 1350 action |= ATA_EH_SOFTRESET; 1351 goto done; 1352 } 1353 } 1354 } 1355 1356 /* Fall back to PIO? Slowing down to PIO is meaningless for 1357 * SATA. Consider it only for PATA. 1358 */ 1359 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1360 (dev->ap->cbl != ATA_CBL_SATA) && 1361 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1362 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1363 dev->spdn_cnt = 0; 1364 action |= ATA_EH_SOFTRESET; 1365 goto done; 1366 } 1367 } 1368 1369 return 0; 1370 done: 1371 /* device has been slowed down, blow error history */ 1372 ata_ering_clear(&dev->ering); 1373 return action; 1374 } 1375 1376 /** 1377 * ata_eh_autopsy - analyze error and determine recovery action 1378 * @ap: ATA port to perform autopsy on 1379 * 1380 * Analyze why @ap failed and determine which recovery action is 1381 * needed. This function also sets more detailed AC_ERR_* values 1382 * and fills sense data for ATAPI CHECK SENSE. 1383 * 1384 * LOCKING: 1385 * Kernel thread context (may sleep). 1386 */ 1387 static void ata_eh_autopsy(struct ata_port *ap) 1388 { 1389 struct ata_eh_context *ehc = &ap->eh_context; 1390 unsigned int all_err_mask = 0; 1391 int tag, is_io = 0; 1392 u32 serror; 1393 int rc; 1394 1395 DPRINTK("ENTER\n"); 1396 1397 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1398 return; 1399 1400 /* obtain and analyze SError */ 1401 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1402 if (rc == 0) { 1403 ehc->i.serror |= serror; 1404 ata_eh_analyze_serror(ap); 1405 } else if (rc != -EOPNOTSUPP) 1406 ehc->i.action |= ATA_EH_HARDRESET; 1407 1408 /* analyze NCQ failure */ 1409 ata_eh_analyze_ncq_error(ap); 1410 1411 /* any real error trumps AC_ERR_OTHER */ 1412 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1413 ehc->i.err_mask &= ~AC_ERR_OTHER; 1414 1415 all_err_mask |= ehc->i.err_mask; 1416 1417 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1418 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1419 1420 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1421 continue; 1422 1423 /* inherit upper level err_mask */ 1424 qc->err_mask |= ehc->i.err_mask; 1425 1426 /* analyze TF */ 1427 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1428 1429 /* DEV errors are probably spurious in case of ATA_BUS error */ 1430 if (qc->err_mask & AC_ERR_ATA_BUS) 1431 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1432 AC_ERR_INVALID); 1433 1434 /* any real error trumps unknown error */ 1435 if (qc->err_mask & ~AC_ERR_OTHER) 1436 qc->err_mask &= ~AC_ERR_OTHER; 1437 1438 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1439 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1440 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1441 ehc->i.action &= ~ATA_EH_REVALIDATE; 1442 } 1443 1444 /* accumulate error info */ 1445 ehc->i.dev = qc->dev; 1446 all_err_mask |= qc->err_mask; 1447 if (qc->flags & ATA_QCFLAG_IO) 1448 is_io = 1; 1449 } 1450 1451 /* enforce default EH actions */ 1452 if (ap->pflags & ATA_PFLAG_FROZEN || 1453 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1454 ehc->i.action |= ATA_EH_SOFTRESET; 1455 else if (all_err_mask) 1456 ehc->i.action |= ATA_EH_REVALIDATE; 1457 1458 /* if we have offending qcs and the associated failed device */ 1459 if (ehc->i.dev) { 1460 /* speed down */ 1461 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1462 all_err_mask); 1463 1464 /* perform per-dev EH action only on the offending device */ 1465 ehc->i.dev_action[ehc->i.dev->devno] |= 1466 ehc->i.action & ATA_EH_PERDEV_MASK; 1467 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1468 } 1469 1470 DPRINTK("EXIT\n"); 1471 } 1472 1473 /** 1474 * ata_eh_report - report error handling to user 1475 * @ap: ATA port EH is going on 1476 * 1477 * Report EH to user. 1478 * 1479 * LOCKING: 1480 * None. 1481 */ 1482 static void ata_eh_report(struct ata_port *ap) 1483 { 1484 struct ata_eh_context *ehc = &ap->eh_context; 1485 const char *frozen, *desc; 1486 int tag, nr_failed = 0; 1487 1488 desc = NULL; 1489 if (ehc->i.desc[0] != '\0') 1490 desc = ehc->i.desc; 1491 1492 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1493 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1494 1495 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1496 continue; 1497 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1498 continue; 1499 1500 nr_failed++; 1501 } 1502 1503 if (!nr_failed && !ehc->i.err_mask) 1504 return; 1505 1506 frozen = ""; 1507 if (ap->pflags & ATA_PFLAG_FROZEN) 1508 frozen = " frozen"; 1509 1510 if (ehc->i.dev) { 1511 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1512 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1513 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1514 ehc->i.action, frozen); 1515 if (desc) 1516 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1517 } else { 1518 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1519 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1520 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1521 ehc->i.action, frozen); 1522 if (desc) 1523 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1524 } 1525 1526 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1527 static const char *dma_str[] = { 1528 [DMA_BIDIRECTIONAL] = "bidi", 1529 [DMA_TO_DEVICE] = "out", 1530 [DMA_FROM_DEVICE] = "in", 1531 [DMA_NONE] = "", 1532 }; 1533 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1534 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1535 1536 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1537 continue; 1538 1539 ata_dev_printk(qc->dev, KERN_ERR, 1540 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1541 "tag %d cdb 0x%x data %u %s\n " 1542 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1543 "Emask 0x%x (%s)\n", 1544 cmd->command, cmd->feature, cmd->nsect, 1545 cmd->lbal, cmd->lbam, cmd->lbah, 1546 cmd->hob_feature, cmd->hob_nsect, 1547 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1548 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1549 dma_str[qc->dma_dir], 1550 res->command, res->feature, res->nsect, 1551 res->lbal, res->lbam, res->lbah, 1552 res->hob_feature, res->hob_nsect, 1553 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1554 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1555 } 1556 } 1557 1558 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1559 unsigned int *classes) 1560 { 1561 int i, rc; 1562 1563 for (i = 0; i < ATA_MAX_DEVICES; i++) 1564 classes[i] = ATA_DEV_UNKNOWN; 1565 1566 rc = reset(ap, classes); 1567 if (rc) 1568 return rc; 1569 1570 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1571 * is complete and convert all ATA_DEV_UNKNOWN to 1572 * ATA_DEV_NONE. 1573 */ 1574 for (i = 0; i < ATA_MAX_DEVICES; i++) 1575 if (classes[i] != ATA_DEV_UNKNOWN) 1576 break; 1577 1578 if (i < ATA_MAX_DEVICES) 1579 for (i = 0; i < ATA_MAX_DEVICES; i++) 1580 if (classes[i] == ATA_DEV_UNKNOWN) 1581 classes[i] = ATA_DEV_NONE; 1582 1583 return 0; 1584 } 1585 1586 static int ata_eh_followup_srst_needed(int rc, int classify, 1587 const unsigned int *classes) 1588 { 1589 if (rc == -EAGAIN) 1590 return 1; 1591 if (rc != 0) 1592 return 0; 1593 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1594 return 1; 1595 return 0; 1596 } 1597 1598 static int ata_eh_reset(struct ata_port *ap, int classify, 1599 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1600 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1601 { 1602 struct ata_eh_context *ehc = &ap->eh_context; 1603 unsigned int *classes = ehc->classes; 1604 int tries = ATA_EH_RESET_TRIES; 1605 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1606 unsigned int action; 1607 ata_reset_fn_t reset; 1608 int i, did_followup_srst, rc; 1609 1610 /* about to reset */ 1611 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1612 1613 /* Determine which reset to use and record in ehc->i.action. 1614 * prereset() may examine and modify it. 1615 */ 1616 action = ehc->i.action; 1617 ehc->i.action &= ~ATA_EH_RESET_MASK; 1618 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1619 !(action & ATA_EH_HARDRESET)))) 1620 ehc->i.action |= ATA_EH_SOFTRESET; 1621 else 1622 ehc->i.action |= ATA_EH_HARDRESET; 1623 1624 if (prereset) { 1625 rc = prereset(ap); 1626 if (rc) { 1627 if (rc == -ENOENT) { 1628 ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n"); 1629 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1630 } else 1631 ata_port_printk(ap, KERN_ERR, 1632 "prereset failed (errno=%d)\n", rc); 1633 return rc; 1634 } 1635 } 1636 1637 /* prereset() might have modified ehc->i.action */ 1638 if (ehc->i.action & ATA_EH_HARDRESET) 1639 reset = hardreset; 1640 else if (ehc->i.action & ATA_EH_SOFTRESET) 1641 reset = softreset; 1642 else { 1643 /* prereset told us not to reset, bang classes and return */ 1644 for (i = 0; i < ATA_MAX_DEVICES; i++) 1645 classes[i] = ATA_DEV_NONE; 1646 return 0; 1647 } 1648 1649 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1650 if (!reset) { 1651 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1652 "invalid reset type\n"); 1653 if (softreset) 1654 reset = softreset; 1655 else 1656 reset = hardreset; 1657 } 1658 1659 retry: 1660 /* shut up during boot probing */ 1661 if (verbose) 1662 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1663 reset == softreset ? "soft" : "hard"); 1664 1665 /* mark that this EH session started with reset */ 1666 ehc->i.flags |= ATA_EHI_DID_RESET; 1667 1668 rc = ata_do_reset(ap, reset, classes); 1669 1670 did_followup_srst = 0; 1671 if (reset == hardreset && 1672 ata_eh_followup_srst_needed(rc, classify, classes)) { 1673 /* okay, let's do follow-up softreset */ 1674 did_followup_srst = 1; 1675 reset = softreset; 1676 1677 if (!reset) { 1678 ata_port_printk(ap, KERN_ERR, 1679 "follow-up softreset required " 1680 "but no softreset avaliable\n"); 1681 return -EINVAL; 1682 } 1683 1684 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1685 rc = ata_do_reset(ap, reset, classes); 1686 1687 if (rc == 0 && classify && 1688 classes[0] == ATA_DEV_UNKNOWN) { 1689 ata_port_printk(ap, KERN_ERR, 1690 "classification failed\n"); 1691 return -EINVAL; 1692 } 1693 } 1694 1695 if (rc && --tries) { 1696 const char *type; 1697 1698 if (reset == softreset) { 1699 if (did_followup_srst) 1700 type = "follow-up soft"; 1701 else 1702 type = "soft"; 1703 } else 1704 type = "hard"; 1705 1706 ata_port_printk(ap, KERN_WARNING, 1707 "%sreset failed, retrying in 5 secs\n", type); 1708 ssleep(5); 1709 1710 if (reset == hardreset) 1711 sata_down_spd_limit(ap); 1712 if (hardreset) 1713 reset = hardreset; 1714 goto retry; 1715 } 1716 1717 if (rc == 0) { 1718 /* After the reset, the device state is PIO 0 and the 1719 * controller state is undefined. Record the mode. 1720 */ 1721 for (i = 0; i < ATA_MAX_DEVICES; i++) 1722 ap->device[i].pio_mode = XFER_PIO_0; 1723 1724 if (postreset) 1725 postreset(ap, classes); 1726 1727 /* reset successful, schedule revalidation */ 1728 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1729 ehc->i.action |= ATA_EH_REVALIDATE; 1730 } 1731 1732 return rc; 1733 } 1734 1735 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1736 struct ata_device **r_failed_dev) 1737 { 1738 struct ata_eh_context *ehc = &ap->eh_context; 1739 struct ata_device *dev; 1740 unsigned long flags; 1741 int i, rc = 0; 1742 1743 DPRINTK("ENTER\n"); 1744 1745 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1746 unsigned int action, readid_flags = 0; 1747 1748 dev = &ap->device[i]; 1749 action = ata_eh_dev_action(dev); 1750 1751 if (ehc->i.flags & ATA_EHI_DID_RESET) 1752 readid_flags |= ATA_READID_POSTRESET; 1753 1754 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1755 if (ata_port_offline(ap)) { 1756 rc = -EIO; 1757 break; 1758 } 1759 1760 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1761 rc = ata_dev_revalidate(dev, readid_flags); 1762 if (rc) 1763 break; 1764 1765 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1766 1767 /* Configuration may have changed, reconfigure 1768 * transfer mode. 1769 */ 1770 ehc->i.flags |= ATA_EHI_SETMODE; 1771 1772 /* schedule the scsi_rescan_device() here */ 1773 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1774 } else if (dev->class == ATA_DEV_UNKNOWN && 1775 ehc->tries[dev->devno] && 1776 ata_class_enabled(ehc->classes[dev->devno])) { 1777 dev->class = ehc->classes[dev->devno]; 1778 1779 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1780 dev->id); 1781 if (rc == 0) { 1782 ehc->i.flags |= ATA_EHI_PRINTINFO; 1783 rc = ata_dev_configure(dev); 1784 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1785 } else if (rc == -ENOENT) { 1786 /* IDENTIFY was issued to non-existent 1787 * device. No need to reset. Just 1788 * thaw and kill the device. 1789 */ 1790 ata_eh_thaw_port(ap); 1791 dev->class = ATA_DEV_UNKNOWN; 1792 rc = 0; 1793 } 1794 1795 if (rc) { 1796 dev->class = ATA_DEV_UNKNOWN; 1797 break; 1798 } 1799 1800 if (ata_dev_enabled(dev)) { 1801 spin_lock_irqsave(ap->lock, flags); 1802 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1803 spin_unlock_irqrestore(ap->lock, flags); 1804 1805 /* new device discovered, configure xfermode */ 1806 ehc->i.flags |= ATA_EHI_SETMODE; 1807 } 1808 } 1809 } 1810 1811 if (rc) 1812 *r_failed_dev = dev; 1813 1814 DPRINTK("EXIT\n"); 1815 return rc; 1816 } 1817 1818 #ifdef CONFIG_PM 1819 /** 1820 * ata_eh_suspend - handle suspend EH action 1821 * @ap: target host port 1822 * @r_failed_dev: result parameter to indicate failing device 1823 * 1824 * Handle suspend EH action. Disk devices are spinned down and 1825 * other types of devices are just marked suspended. Once 1826 * suspended, no EH action to the device is allowed until it is 1827 * resumed. 1828 * 1829 * LOCKING: 1830 * Kernel thread context (may sleep). 1831 * 1832 * RETURNS: 1833 * 0 on success, -errno otherwise 1834 */ 1835 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1836 { 1837 struct ata_device *dev; 1838 int i, rc = 0; 1839 1840 DPRINTK("ENTER\n"); 1841 1842 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1843 unsigned long flags; 1844 unsigned int action, err_mask; 1845 1846 dev = &ap->device[i]; 1847 action = ata_eh_dev_action(dev); 1848 1849 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1850 continue; 1851 1852 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1853 1854 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1855 1856 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1857 /* flush cache */ 1858 rc = ata_flush_cache(dev); 1859 if (rc) 1860 break; 1861 1862 /* spin down */ 1863 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1864 if (err_mask) { 1865 ata_dev_printk(dev, KERN_ERR, "failed to " 1866 "spin down (err_mask=0x%x)\n", 1867 err_mask); 1868 rc = -EIO; 1869 break; 1870 } 1871 } 1872 1873 spin_lock_irqsave(ap->lock, flags); 1874 dev->flags |= ATA_DFLAG_SUSPENDED; 1875 spin_unlock_irqrestore(ap->lock, flags); 1876 1877 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1878 } 1879 1880 if (rc) 1881 *r_failed_dev = dev; 1882 1883 DPRINTK("EXIT\n"); 1884 return rc; 1885 } 1886 1887 /** 1888 * ata_eh_prep_resume - prep for resume EH action 1889 * @ap: target host port 1890 * 1891 * Clear SUSPENDED in preparation for scheduled resume actions. 1892 * This allows other parts of EH to access the devices being 1893 * resumed. 1894 * 1895 * LOCKING: 1896 * Kernel thread context (may sleep). 1897 */ 1898 static void ata_eh_prep_resume(struct ata_port *ap) 1899 { 1900 struct ata_device *dev; 1901 unsigned long flags; 1902 int i; 1903 1904 DPRINTK("ENTER\n"); 1905 1906 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1907 unsigned int action; 1908 1909 dev = &ap->device[i]; 1910 action = ata_eh_dev_action(dev); 1911 1912 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1913 continue; 1914 1915 spin_lock_irqsave(ap->lock, flags); 1916 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1917 spin_unlock_irqrestore(ap->lock, flags); 1918 } 1919 1920 DPRINTK("EXIT\n"); 1921 } 1922 1923 /** 1924 * ata_eh_resume - handle resume EH action 1925 * @ap: target host port 1926 * @r_failed_dev: result parameter to indicate failing device 1927 * 1928 * Handle resume EH action. Target devices are already reset and 1929 * revalidated. Spinning up is the only operation left. 1930 * 1931 * LOCKING: 1932 * Kernel thread context (may sleep). 1933 * 1934 * RETURNS: 1935 * 0 on success, -errno otherwise 1936 */ 1937 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1938 { 1939 struct ata_device *dev; 1940 int i, rc = 0; 1941 1942 DPRINTK("ENTER\n"); 1943 1944 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1945 unsigned int action, err_mask; 1946 1947 dev = &ap->device[i]; 1948 action = ata_eh_dev_action(dev); 1949 1950 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1951 continue; 1952 1953 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1954 1955 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1956 err_mask = ata_do_simple_cmd(dev, 1957 ATA_CMD_IDLEIMMEDIATE); 1958 if (err_mask) { 1959 ata_dev_printk(dev, KERN_ERR, "failed to " 1960 "spin up (err_mask=0x%x)\n", 1961 err_mask); 1962 rc = -EIO; 1963 break; 1964 } 1965 } 1966 1967 ata_eh_done(ap, dev, ATA_EH_RESUME); 1968 } 1969 1970 if (rc) 1971 *r_failed_dev = dev; 1972 1973 DPRINTK("EXIT\n"); 1974 return 0; 1975 } 1976 #endif /* CONFIG_PM */ 1977 1978 static int ata_port_nr_enabled(struct ata_port *ap) 1979 { 1980 int i, cnt = 0; 1981 1982 for (i = 0; i < ATA_MAX_DEVICES; i++) 1983 if (ata_dev_enabled(&ap->device[i])) 1984 cnt++; 1985 return cnt; 1986 } 1987 1988 static int ata_port_nr_vacant(struct ata_port *ap) 1989 { 1990 int i, cnt = 0; 1991 1992 for (i = 0; i < ATA_MAX_DEVICES; i++) 1993 if (ap->device[i].class == ATA_DEV_UNKNOWN) 1994 cnt++; 1995 return cnt; 1996 } 1997 1998 static int ata_eh_skip_recovery(struct ata_port *ap) 1999 { 2000 struct ata_eh_context *ehc = &ap->eh_context; 2001 int i; 2002 2003 /* skip if all possible devices are suspended */ 2004 for (i = 0; i < ata_port_max_devices(ap); i++) { 2005 struct ata_device *dev = &ap->device[i]; 2006 2007 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 2008 break; 2009 } 2010 2011 if (i == ata_port_max_devices(ap)) 2012 return 1; 2013 2014 /* thaw frozen port, resume link and recover failed devices */ 2015 if ((ap->pflags & ATA_PFLAG_FROZEN) || 2016 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 2017 return 0; 2018 2019 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2020 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2021 struct ata_device *dev = &ap->device[i]; 2022 2023 if (dev->class == ATA_DEV_UNKNOWN && 2024 ehc->classes[dev->devno] != ATA_DEV_NONE) 2025 return 0; 2026 } 2027 2028 return 1; 2029 } 2030 2031 /** 2032 * ata_eh_recover - recover host port after error 2033 * @ap: host port to recover 2034 * @prereset: prereset method (can be NULL) 2035 * @softreset: softreset method (can be NULL) 2036 * @hardreset: hardreset method (can be NULL) 2037 * @postreset: postreset method (can be NULL) 2038 * 2039 * This is the alpha and omega, eum and yang, heart and soul of 2040 * libata exception handling. On entry, actions required to 2041 * recover the port and hotplug requests are recorded in 2042 * eh_context. This function executes all the operations with 2043 * appropriate retrials and fallbacks to resurrect failed 2044 * devices, detach goners and greet newcomers. 2045 * 2046 * LOCKING: 2047 * Kernel thread context (may sleep). 2048 * 2049 * RETURNS: 2050 * 0 on success, -errno on failure. 2051 */ 2052 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2053 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2054 ata_postreset_fn_t postreset) 2055 { 2056 struct ata_eh_context *ehc = &ap->eh_context; 2057 struct ata_device *dev; 2058 int i, rc; 2059 2060 DPRINTK("ENTER\n"); 2061 2062 /* prep for recovery */ 2063 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2064 dev = &ap->device[i]; 2065 2066 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2067 2068 /* collect port action mask recorded in dev actions */ 2069 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2070 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2071 2072 /* process hotplug request */ 2073 if (dev->flags & ATA_DFLAG_DETACH) 2074 ata_eh_detach_dev(dev); 2075 2076 if (!ata_dev_enabled(dev) && 2077 ((ehc->i.probe_mask & (1 << dev->devno)) && 2078 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2079 ata_eh_detach_dev(dev); 2080 ata_dev_init(dev); 2081 ehc->did_probe_mask |= (1 << dev->devno); 2082 ehc->i.action |= ATA_EH_SOFTRESET; 2083 } 2084 } 2085 2086 retry: 2087 rc = 0; 2088 2089 /* if UNLOADING, finish immediately */ 2090 if (ap->pflags & ATA_PFLAG_UNLOADING) 2091 goto out; 2092 2093 /* prep for resume */ 2094 ata_eh_prep_resume(ap); 2095 2096 /* skip EH if possible. */ 2097 if (ata_eh_skip_recovery(ap)) 2098 ehc->i.action = 0; 2099 2100 for (i = 0; i < ATA_MAX_DEVICES; i++) 2101 ehc->classes[i] = ATA_DEV_UNKNOWN; 2102 2103 /* reset */ 2104 if (ehc->i.action & ATA_EH_RESET_MASK) { 2105 ata_eh_freeze_port(ap); 2106 2107 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2108 softreset, hardreset, postreset); 2109 if (rc) { 2110 ata_port_printk(ap, KERN_ERR, 2111 "reset failed, giving up\n"); 2112 goto out; 2113 } 2114 2115 ata_eh_thaw_port(ap); 2116 } 2117 2118 /* revalidate existing devices and attach new ones */ 2119 rc = ata_eh_revalidate_and_attach(ap, &dev); 2120 if (rc) 2121 goto dev_fail; 2122 2123 /* resume devices */ 2124 rc = ata_eh_resume(ap, &dev); 2125 if (rc) 2126 goto dev_fail; 2127 2128 /* configure transfer mode if necessary */ 2129 if (ehc->i.flags & ATA_EHI_SETMODE) { 2130 rc = ata_set_mode(ap, &dev); 2131 if (rc) 2132 goto dev_fail; 2133 ehc->i.flags &= ~ATA_EHI_SETMODE; 2134 } 2135 2136 /* suspend devices */ 2137 rc = ata_eh_suspend(ap, &dev); 2138 if (rc) 2139 goto dev_fail; 2140 2141 goto out; 2142 2143 dev_fail: 2144 ehc->tries[dev->devno]--; 2145 2146 switch (rc) { 2147 case -EINVAL: 2148 /* eeek, something went very wrong, give up */ 2149 ehc->tries[dev->devno] = 0; 2150 break; 2151 2152 case -ENODEV: 2153 /* device missing or wrong IDENTIFY data, schedule probing */ 2154 ehc->i.probe_mask |= (1 << dev->devno); 2155 /* give it just one more chance */ 2156 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2157 case -EIO: 2158 if (ehc->tries[dev->devno] == 1) { 2159 /* This is the last chance, better to slow 2160 * down than lose it. 2161 */ 2162 sata_down_spd_limit(ap); 2163 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2164 } 2165 } 2166 2167 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2168 /* disable device if it has used up all its chances */ 2169 ata_dev_disable(dev); 2170 2171 /* detach if offline */ 2172 if (ata_port_offline(ap)) 2173 ata_eh_detach_dev(dev); 2174 2175 /* probe if requested */ 2176 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2177 !(ehc->did_probe_mask & (1 << dev->devno))) { 2178 ata_eh_detach_dev(dev); 2179 ata_dev_init(dev); 2180 2181 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2182 ehc->did_probe_mask |= (1 << dev->devno); 2183 ehc->i.action |= ATA_EH_SOFTRESET; 2184 } 2185 } else { 2186 /* soft didn't work? be haaaaard */ 2187 if (ehc->i.flags & ATA_EHI_DID_RESET) 2188 ehc->i.action |= ATA_EH_HARDRESET; 2189 else 2190 ehc->i.action |= ATA_EH_SOFTRESET; 2191 } 2192 2193 if (ata_port_nr_enabled(ap)) { 2194 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2195 "devices, retrying in 5 secs\n"); 2196 ssleep(5); 2197 } else { 2198 /* no device left, repeat fast */ 2199 msleep(500); 2200 } 2201 2202 goto retry; 2203 2204 out: 2205 if (rc) { 2206 for (i = 0; i < ATA_MAX_DEVICES; i++) 2207 ata_dev_disable(&ap->device[i]); 2208 } 2209 2210 DPRINTK("EXIT, rc=%d\n", rc); 2211 return rc; 2212 } 2213 2214 /** 2215 * ata_eh_finish - finish up EH 2216 * @ap: host port to finish EH for 2217 * 2218 * Recovery is complete. Clean up EH states and retry or finish 2219 * failed qcs. 2220 * 2221 * LOCKING: 2222 * None. 2223 */ 2224 static void ata_eh_finish(struct ata_port *ap) 2225 { 2226 int tag; 2227 2228 /* retry or finish qcs */ 2229 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2230 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2231 2232 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2233 continue; 2234 2235 if (qc->err_mask) { 2236 /* FIXME: Once EH migration is complete, 2237 * generate sense data in this function, 2238 * considering both err_mask and tf. 2239 */ 2240 if (qc->err_mask & AC_ERR_INVALID) 2241 ata_eh_qc_complete(qc); 2242 else 2243 ata_eh_qc_retry(qc); 2244 } else { 2245 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2246 ata_eh_qc_complete(qc); 2247 } else { 2248 /* feed zero TF to sense generation */ 2249 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2250 ata_eh_qc_retry(qc); 2251 } 2252 } 2253 } 2254 } 2255 2256 /** 2257 * ata_do_eh - do standard error handling 2258 * @ap: host port to handle error for 2259 * @prereset: prereset method (can be NULL) 2260 * @softreset: softreset method (can be NULL) 2261 * @hardreset: hardreset method (can be NULL) 2262 * @postreset: postreset method (can be NULL) 2263 * 2264 * Perform standard error handling sequence. 2265 * 2266 * LOCKING: 2267 * Kernel thread context (may sleep). 2268 */ 2269 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2270 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2271 ata_postreset_fn_t postreset) 2272 { 2273 ata_eh_autopsy(ap); 2274 ata_eh_report(ap); 2275 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2276 ata_eh_finish(ap); 2277 } 2278 2279 #ifdef CONFIG_PM 2280 /** 2281 * ata_eh_handle_port_suspend - perform port suspend operation 2282 * @ap: port to suspend 2283 * 2284 * Suspend @ap. 2285 * 2286 * LOCKING: 2287 * Kernel thread context (may sleep). 2288 */ 2289 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2290 { 2291 unsigned long flags; 2292 int rc = 0; 2293 2294 /* are we suspending? */ 2295 spin_lock_irqsave(ap->lock, flags); 2296 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2297 ap->pm_mesg.event == PM_EVENT_ON) { 2298 spin_unlock_irqrestore(ap->lock, flags); 2299 return; 2300 } 2301 spin_unlock_irqrestore(ap->lock, flags); 2302 2303 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2304 2305 /* suspend */ 2306 ata_eh_freeze_port(ap); 2307 2308 if (ap->ops->port_suspend) 2309 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2310 2311 /* report result */ 2312 spin_lock_irqsave(ap->lock, flags); 2313 2314 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2315 if (rc == 0) 2316 ap->pflags |= ATA_PFLAG_SUSPENDED; 2317 else 2318 ata_port_schedule_eh(ap); 2319 2320 if (ap->pm_result) { 2321 *ap->pm_result = rc; 2322 ap->pm_result = NULL; 2323 } 2324 2325 spin_unlock_irqrestore(ap->lock, flags); 2326 2327 return; 2328 } 2329 2330 /** 2331 * ata_eh_handle_port_resume - perform port resume operation 2332 * @ap: port to resume 2333 * 2334 * Resume @ap. 2335 * 2336 * This function also waits upto one second until all devices 2337 * hanging off this port requests resume EH action. This is to 2338 * prevent invoking EH and thus reset multiple times on resume. 2339 * 2340 * On DPM resume, where some of devices might not be resumed 2341 * together, this may delay port resume upto one second, but such 2342 * DPM resumes are rare and 1 sec delay isn't too bad. 2343 * 2344 * LOCKING: 2345 * Kernel thread context (may sleep). 2346 */ 2347 static void ata_eh_handle_port_resume(struct ata_port *ap) 2348 { 2349 unsigned long timeout; 2350 unsigned long flags; 2351 int i, rc = 0; 2352 2353 /* are we resuming? */ 2354 spin_lock_irqsave(ap->lock, flags); 2355 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2356 ap->pm_mesg.event != PM_EVENT_ON) { 2357 spin_unlock_irqrestore(ap->lock, flags); 2358 return; 2359 } 2360 spin_unlock_irqrestore(ap->lock, flags); 2361 2362 /* spurious? */ 2363 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2364 goto done; 2365 2366 if (ap->ops->port_resume) 2367 rc = ap->ops->port_resume(ap); 2368 2369 /* give devices time to request EH */ 2370 timeout = jiffies + HZ; /* 1s max */ 2371 while (1) { 2372 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2373 struct ata_device *dev = &ap->device[i]; 2374 unsigned int action = ata_eh_dev_action(dev); 2375 2376 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2377 !(action & ATA_EH_RESUME)) 2378 break; 2379 } 2380 2381 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2382 break; 2383 msleep(10); 2384 } 2385 2386 done: 2387 spin_lock_irqsave(ap->lock, flags); 2388 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2389 if (ap->pm_result) { 2390 *ap->pm_result = rc; 2391 ap->pm_result = NULL; 2392 } 2393 spin_unlock_irqrestore(ap->lock, flags); 2394 } 2395 #endif /* CONFIG_PM */ 2396