1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 static void __ata_port_freeze(struct ata_port *ap); 54 static void ata_eh_finish(struct ata_port *ap); 55 #ifdef CONFIG_PM 56 static void ata_eh_handle_port_suspend(struct ata_port *ap); 57 static void ata_eh_handle_port_resume(struct ata_port *ap); 58 static int ata_eh_suspend(struct ata_port *ap, 59 struct ata_device **r_failed_dev); 60 static void ata_eh_prep_resume(struct ata_port *ap); 61 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev); 62 #else /* CONFIG_PM */ 63 static void ata_eh_handle_port_suspend(struct ata_port *ap) 64 { } 65 66 static void ata_eh_handle_port_resume(struct ata_port *ap) 67 { } 68 69 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 70 { 71 return 0; 72 } 73 74 static void ata_eh_prep_resume(struct ata_port *ap) 75 { } 76 77 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 78 { 79 return 0; 80 } 81 #endif /* CONFIG_PM */ 82 83 static void ata_ering_record(struct ata_ering *ering, int is_io, 84 unsigned int err_mask) 85 { 86 struct ata_ering_entry *ent; 87 88 WARN_ON(!err_mask); 89 90 ering->cursor++; 91 ering->cursor %= ATA_ERING_SIZE; 92 93 ent = &ering->ring[ering->cursor]; 94 ent->is_io = is_io; 95 ent->err_mask = err_mask; 96 ent->timestamp = get_jiffies_64(); 97 } 98 99 static void ata_ering_clear(struct ata_ering *ering) 100 { 101 memset(ering, 0, sizeof(*ering)); 102 } 103 104 static int ata_ering_map(struct ata_ering *ering, 105 int (*map_fn)(struct ata_ering_entry *, void *), 106 void *arg) 107 { 108 int idx, rc = 0; 109 struct ata_ering_entry *ent; 110 111 idx = ering->cursor; 112 do { 113 ent = &ering->ring[idx]; 114 if (!ent->err_mask) 115 break; 116 rc = map_fn(ent, arg); 117 if (rc) 118 break; 119 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 120 } while (idx != ering->cursor); 121 122 return rc; 123 } 124 125 static unsigned int ata_eh_dev_action(struct ata_device *dev) 126 { 127 struct ata_eh_context *ehc = &dev->ap->eh_context; 128 129 return ehc->i.action | ehc->i.dev_action[dev->devno]; 130 } 131 132 static void ata_eh_clear_action(struct ata_device *dev, 133 struct ata_eh_info *ehi, unsigned int action) 134 { 135 int i; 136 137 if (!dev) { 138 ehi->action &= ~action; 139 for (i = 0; i < ATA_MAX_DEVICES; i++) 140 ehi->dev_action[i] &= ~action; 141 } else { 142 /* doesn't make sense for port-wide EH actions */ 143 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 144 145 /* break ehi->action into ehi->dev_action */ 146 if (ehi->action & action) { 147 for (i = 0; i < ATA_MAX_DEVICES; i++) 148 ehi->dev_action[i] |= ehi->action & action; 149 ehi->action &= ~action; 150 } 151 152 /* turn off the specified per-dev action */ 153 ehi->dev_action[dev->devno] &= ~action; 154 } 155 } 156 157 /** 158 * ata_scsi_timed_out - SCSI layer time out callback 159 * @cmd: timed out SCSI command 160 * 161 * Handles SCSI layer timeout. We race with normal completion of 162 * the qc for @cmd. If the qc is already gone, we lose and let 163 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 164 * timed out and EH should be invoked. Prevent ata_qc_complete() 165 * from finishing it by setting EH_SCHEDULED and return 166 * EH_NOT_HANDLED. 167 * 168 * TODO: kill this function once old EH is gone. 169 * 170 * LOCKING: 171 * Called from timer context 172 * 173 * RETURNS: 174 * EH_HANDLED or EH_NOT_HANDLED 175 */ 176 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 177 { 178 struct Scsi_Host *host = cmd->device->host; 179 struct ata_port *ap = ata_shost_to_port(host); 180 unsigned long flags; 181 struct ata_queued_cmd *qc; 182 enum scsi_eh_timer_return ret; 183 184 DPRINTK("ENTER\n"); 185 186 if (ap->ops->error_handler) { 187 ret = EH_NOT_HANDLED; 188 goto out; 189 } 190 191 ret = EH_HANDLED; 192 spin_lock_irqsave(ap->lock, flags); 193 qc = ata_qc_from_tag(ap, ap->active_tag); 194 if (qc) { 195 WARN_ON(qc->scsicmd != cmd); 196 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 197 qc->err_mask |= AC_ERR_TIMEOUT; 198 ret = EH_NOT_HANDLED; 199 } 200 spin_unlock_irqrestore(ap->lock, flags); 201 202 out: 203 DPRINTK("EXIT, ret=%d\n", ret); 204 return ret; 205 } 206 207 /** 208 * ata_scsi_error - SCSI layer error handler callback 209 * @host: SCSI host on which error occurred 210 * 211 * Handles SCSI-layer-thrown error events. 212 * 213 * LOCKING: 214 * Inherited from SCSI layer (none, can sleep) 215 * 216 * RETURNS: 217 * Zero. 218 */ 219 void ata_scsi_error(struct Scsi_Host *host) 220 { 221 struct ata_port *ap = ata_shost_to_port(host); 222 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 223 unsigned long flags; 224 225 DPRINTK("ENTER\n"); 226 227 /* synchronize with port task */ 228 ata_port_flush_task(ap); 229 230 /* synchronize with host lock and sort out timeouts */ 231 232 /* For new EH, all qcs are finished in one of three ways - 233 * normal completion, error completion, and SCSI timeout. 234 * Both cmpletions can race against SCSI timeout. When normal 235 * completion wins, the qc never reaches EH. When error 236 * completion wins, the qc has ATA_QCFLAG_FAILED set. 237 * 238 * When SCSI timeout wins, things are a bit more complex. 239 * Normal or error completion can occur after the timeout but 240 * before this point. In such cases, both types of 241 * completions are honored. A scmd is determined to have 242 * timed out iff its associated qc is active and not failed. 243 */ 244 if (ap->ops->error_handler) { 245 struct scsi_cmnd *scmd, *tmp; 246 int nr_timedout = 0; 247 248 spin_lock_irqsave(ap->lock, flags); 249 250 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 251 struct ata_queued_cmd *qc; 252 253 for (i = 0; i < ATA_MAX_QUEUE; i++) { 254 qc = __ata_qc_from_tag(ap, i); 255 if (qc->flags & ATA_QCFLAG_ACTIVE && 256 qc->scsicmd == scmd) 257 break; 258 } 259 260 if (i < ATA_MAX_QUEUE) { 261 /* the scmd has an associated qc */ 262 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 263 /* which hasn't failed yet, timeout */ 264 qc->err_mask |= AC_ERR_TIMEOUT; 265 qc->flags |= ATA_QCFLAG_FAILED; 266 nr_timedout++; 267 } 268 } else { 269 /* Normal completion occurred after 270 * SCSI timeout but before this point. 271 * Successfully complete it. 272 */ 273 scmd->retries = scmd->allowed; 274 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 275 } 276 } 277 278 /* If we have timed out qcs. They belong to EH from 279 * this point but the state of the controller is 280 * unknown. Freeze the port to make sure the IRQ 281 * handler doesn't diddle with those qcs. This must 282 * be done atomically w.r.t. setting QCFLAG_FAILED. 283 */ 284 if (nr_timedout) 285 __ata_port_freeze(ap); 286 287 spin_unlock_irqrestore(ap->lock, flags); 288 } else 289 spin_unlock_wait(ap->lock); 290 291 repeat: 292 /* invoke error handler */ 293 if (ap->ops->error_handler) { 294 /* process port resume request */ 295 ata_eh_handle_port_resume(ap); 296 297 /* fetch & clear EH info */ 298 spin_lock_irqsave(ap->lock, flags); 299 300 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 301 ap->eh_context.i = ap->eh_info; 302 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 303 304 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 305 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 306 307 spin_unlock_irqrestore(ap->lock, flags); 308 309 /* invoke EH, skip if unloading or suspended */ 310 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 311 ap->ops->error_handler(ap); 312 else 313 ata_eh_finish(ap); 314 315 /* process port suspend request */ 316 ata_eh_handle_port_suspend(ap); 317 318 /* Exception might have happend after ->error_handler 319 * recovered the port but before this point. Repeat 320 * EH in such case. 321 */ 322 spin_lock_irqsave(ap->lock, flags); 323 324 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 325 if (--repeat_cnt) { 326 ata_port_printk(ap, KERN_INFO, 327 "EH pending after completion, " 328 "repeating EH (cnt=%d)\n", repeat_cnt); 329 spin_unlock_irqrestore(ap->lock, flags); 330 goto repeat; 331 } 332 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 333 "tries, giving up\n", ATA_EH_MAX_REPEAT); 334 } 335 336 /* this run is complete, make sure EH info is clear */ 337 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 338 339 /* Clear host_eh_scheduled while holding ap->lock such 340 * that if exception occurs after this point but 341 * before EH completion, SCSI midlayer will 342 * re-initiate EH. 343 */ 344 host->host_eh_scheduled = 0; 345 346 spin_unlock_irqrestore(ap->lock, flags); 347 } else { 348 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 349 ap->ops->eng_timeout(ap); 350 } 351 352 /* finish or retry handled scmd's and clean up */ 353 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 354 355 scsi_eh_flush_done_q(&ap->eh_done_q); 356 357 /* clean up */ 358 spin_lock_irqsave(ap->lock, flags); 359 360 if (ap->pflags & ATA_PFLAG_LOADING) 361 ap->pflags &= ~ATA_PFLAG_LOADING; 362 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 363 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 364 365 if (ap->pflags & ATA_PFLAG_RECOVERED) 366 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 367 368 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 369 370 /* tell wait_eh that we're done */ 371 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 372 wake_up_all(&ap->eh_wait_q); 373 374 spin_unlock_irqrestore(ap->lock, flags); 375 376 DPRINTK("EXIT\n"); 377 } 378 379 /** 380 * ata_port_wait_eh - Wait for the currently pending EH to complete 381 * @ap: Port to wait EH for 382 * 383 * Wait until the currently pending EH is complete. 384 * 385 * LOCKING: 386 * Kernel thread context (may sleep). 387 */ 388 void ata_port_wait_eh(struct ata_port *ap) 389 { 390 unsigned long flags; 391 DEFINE_WAIT(wait); 392 393 retry: 394 spin_lock_irqsave(ap->lock, flags); 395 396 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 397 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 398 spin_unlock_irqrestore(ap->lock, flags); 399 schedule(); 400 spin_lock_irqsave(ap->lock, flags); 401 } 402 finish_wait(&ap->eh_wait_q, &wait); 403 404 spin_unlock_irqrestore(ap->lock, flags); 405 406 /* make sure SCSI EH is complete */ 407 if (scsi_host_in_recovery(ap->scsi_host)) { 408 msleep(10); 409 goto retry; 410 } 411 } 412 413 /** 414 * ata_qc_timeout - Handle timeout of queued command 415 * @qc: Command that timed out 416 * 417 * Some part of the kernel (currently, only the SCSI layer) 418 * has noticed that the active command on port @ap has not 419 * completed after a specified length of time. Handle this 420 * condition by disabling DMA (if necessary) and completing 421 * transactions, with error if necessary. 422 * 423 * This also handles the case of the "lost interrupt", where 424 * for some reason (possibly hardware bug, possibly driver bug) 425 * an interrupt was not delivered to the driver, even though the 426 * transaction completed successfully. 427 * 428 * TODO: kill this function once old EH is gone. 429 * 430 * LOCKING: 431 * Inherited from SCSI layer (none, can sleep) 432 */ 433 static void ata_qc_timeout(struct ata_queued_cmd *qc) 434 { 435 struct ata_port *ap = qc->ap; 436 u8 host_stat = 0, drv_stat; 437 unsigned long flags; 438 439 DPRINTK("ENTER\n"); 440 441 ap->hsm_task_state = HSM_ST_IDLE; 442 443 spin_lock_irqsave(ap->lock, flags); 444 445 switch (qc->tf.protocol) { 446 447 case ATA_PROT_DMA: 448 case ATA_PROT_ATAPI_DMA: 449 host_stat = ap->ops->bmdma_status(ap); 450 451 /* before we do anything else, clear DMA-Start bit */ 452 ap->ops->bmdma_stop(qc); 453 454 /* fall through */ 455 456 default: 457 ata_altstatus(ap); 458 drv_stat = ata_chk_status(ap); 459 460 /* ack bmdma irq events */ 461 ap->ops->irq_clear(ap); 462 463 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 464 "stat 0x%x host_stat 0x%x\n", 465 qc->tf.command, drv_stat, host_stat); 466 467 /* complete taskfile transaction */ 468 qc->err_mask |= AC_ERR_TIMEOUT; 469 break; 470 } 471 472 spin_unlock_irqrestore(ap->lock, flags); 473 474 ata_eh_qc_complete(qc); 475 476 DPRINTK("EXIT\n"); 477 } 478 479 /** 480 * ata_eng_timeout - Handle timeout of queued command 481 * @ap: Port on which timed-out command is active 482 * 483 * Some part of the kernel (currently, only the SCSI layer) 484 * has noticed that the active command on port @ap has not 485 * completed after a specified length of time. Handle this 486 * condition by disabling DMA (if necessary) and completing 487 * transactions, with error if necessary. 488 * 489 * This also handles the case of the "lost interrupt", where 490 * for some reason (possibly hardware bug, possibly driver bug) 491 * an interrupt was not delivered to the driver, even though the 492 * transaction completed successfully. 493 * 494 * TODO: kill this function once old EH is gone. 495 * 496 * LOCKING: 497 * Inherited from SCSI layer (none, can sleep) 498 */ 499 void ata_eng_timeout(struct ata_port *ap) 500 { 501 DPRINTK("ENTER\n"); 502 503 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 504 505 DPRINTK("EXIT\n"); 506 } 507 508 /** 509 * ata_qc_schedule_eh - schedule qc for error handling 510 * @qc: command to schedule error handling for 511 * 512 * Schedule error handling for @qc. EH will kick in as soon as 513 * other commands are drained. 514 * 515 * LOCKING: 516 * spin_lock_irqsave(host lock) 517 */ 518 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 519 { 520 struct ata_port *ap = qc->ap; 521 522 WARN_ON(!ap->ops->error_handler); 523 524 qc->flags |= ATA_QCFLAG_FAILED; 525 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 526 527 /* The following will fail if timeout has already expired. 528 * ata_scsi_error() takes care of such scmds on EH entry. 529 * Note that ATA_QCFLAG_FAILED is unconditionally set after 530 * this function completes. 531 */ 532 scsi_req_abort_cmd(qc->scsicmd); 533 } 534 535 /** 536 * ata_port_schedule_eh - schedule error handling without a qc 537 * @ap: ATA port to schedule EH for 538 * 539 * Schedule error handling for @ap. EH will kick in as soon as 540 * all commands are drained. 541 * 542 * LOCKING: 543 * spin_lock_irqsave(host lock) 544 */ 545 void ata_port_schedule_eh(struct ata_port *ap) 546 { 547 WARN_ON(!ap->ops->error_handler); 548 549 ap->pflags |= ATA_PFLAG_EH_PENDING; 550 scsi_schedule_eh(ap->scsi_host); 551 552 DPRINTK("port EH scheduled\n"); 553 } 554 555 /** 556 * ata_port_abort - abort all qc's on the port 557 * @ap: ATA port to abort qc's for 558 * 559 * Abort all active qc's of @ap and schedule EH. 560 * 561 * LOCKING: 562 * spin_lock_irqsave(host lock) 563 * 564 * RETURNS: 565 * Number of aborted qc's. 566 */ 567 int ata_port_abort(struct ata_port *ap) 568 { 569 int tag, nr_aborted = 0; 570 571 WARN_ON(!ap->ops->error_handler); 572 573 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 574 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 575 576 if (qc) { 577 qc->flags |= ATA_QCFLAG_FAILED; 578 ata_qc_complete(qc); 579 nr_aborted++; 580 } 581 } 582 583 if (!nr_aborted) 584 ata_port_schedule_eh(ap); 585 586 return nr_aborted; 587 } 588 589 /** 590 * __ata_port_freeze - freeze port 591 * @ap: ATA port to freeze 592 * 593 * This function is called when HSM violation or some other 594 * condition disrupts normal operation of the port. Frozen port 595 * is not allowed to perform any operation until the port is 596 * thawed, which usually follows a successful reset. 597 * 598 * ap->ops->freeze() callback can be used for freezing the port 599 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 600 * port cannot be frozen hardware-wise, the interrupt handler 601 * must ack and clear interrupts unconditionally while the port 602 * is frozen. 603 * 604 * LOCKING: 605 * spin_lock_irqsave(host lock) 606 */ 607 static void __ata_port_freeze(struct ata_port *ap) 608 { 609 WARN_ON(!ap->ops->error_handler); 610 611 if (ap->ops->freeze) 612 ap->ops->freeze(ap); 613 614 ap->pflags |= ATA_PFLAG_FROZEN; 615 616 DPRINTK("ata%u port frozen\n", ap->print_id); 617 } 618 619 /** 620 * ata_port_freeze - abort & freeze port 621 * @ap: ATA port to freeze 622 * 623 * Abort and freeze @ap. 624 * 625 * LOCKING: 626 * spin_lock_irqsave(host lock) 627 * 628 * RETURNS: 629 * Number of aborted commands. 630 */ 631 int ata_port_freeze(struct ata_port *ap) 632 { 633 int nr_aborted; 634 635 WARN_ON(!ap->ops->error_handler); 636 637 nr_aborted = ata_port_abort(ap); 638 __ata_port_freeze(ap); 639 640 return nr_aborted; 641 } 642 643 /** 644 * ata_eh_freeze_port - EH helper to freeze port 645 * @ap: ATA port to freeze 646 * 647 * Freeze @ap. 648 * 649 * LOCKING: 650 * None. 651 */ 652 void ata_eh_freeze_port(struct ata_port *ap) 653 { 654 unsigned long flags; 655 656 if (!ap->ops->error_handler) 657 return; 658 659 spin_lock_irqsave(ap->lock, flags); 660 __ata_port_freeze(ap); 661 spin_unlock_irqrestore(ap->lock, flags); 662 } 663 664 /** 665 * ata_port_thaw_port - EH helper to thaw port 666 * @ap: ATA port to thaw 667 * 668 * Thaw frozen port @ap. 669 * 670 * LOCKING: 671 * None. 672 */ 673 void ata_eh_thaw_port(struct ata_port *ap) 674 { 675 unsigned long flags; 676 677 if (!ap->ops->error_handler) 678 return; 679 680 spin_lock_irqsave(ap->lock, flags); 681 682 ap->pflags &= ~ATA_PFLAG_FROZEN; 683 684 if (ap->ops->thaw) 685 ap->ops->thaw(ap); 686 687 spin_unlock_irqrestore(ap->lock, flags); 688 689 DPRINTK("ata%u port thawed\n", ap->print_id); 690 } 691 692 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 693 { 694 /* nada */ 695 } 696 697 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 698 { 699 struct ata_port *ap = qc->ap; 700 struct scsi_cmnd *scmd = qc->scsicmd; 701 unsigned long flags; 702 703 spin_lock_irqsave(ap->lock, flags); 704 qc->scsidone = ata_eh_scsidone; 705 __ata_qc_complete(qc); 706 WARN_ON(ata_tag_valid(qc->tag)); 707 spin_unlock_irqrestore(ap->lock, flags); 708 709 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 710 } 711 712 /** 713 * ata_eh_qc_complete - Complete an active ATA command from EH 714 * @qc: Command to complete 715 * 716 * Indicate to the mid and upper layers that an ATA command has 717 * completed. To be used from EH. 718 */ 719 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 720 { 721 struct scsi_cmnd *scmd = qc->scsicmd; 722 scmd->retries = scmd->allowed; 723 __ata_eh_qc_complete(qc); 724 } 725 726 /** 727 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 728 * @qc: Command to retry 729 * 730 * Indicate to the mid and upper layers that an ATA command 731 * should be retried. To be used from EH. 732 * 733 * SCSI midlayer limits the number of retries to scmd->allowed. 734 * scmd->retries is decremented for commands which get retried 735 * due to unrelated failures (qc->err_mask is zero). 736 */ 737 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 738 { 739 struct scsi_cmnd *scmd = qc->scsicmd; 740 if (!qc->err_mask && scmd->retries) 741 scmd->retries--; 742 __ata_eh_qc_complete(qc); 743 } 744 745 /** 746 * ata_eh_detach_dev - detach ATA device 747 * @dev: ATA device to detach 748 * 749 * Detach @dev. 750 * 751 * LOCKING: 752 * None. 753 */ 754 static void ata_eh_detach_dev(struct ata_device *dev) 755 { 756 struct ata_port *ap = dev->ap; 757 unsigned long flags; 758 759 ata_dev_disable(dev); 760 761 spin_lock_irqsave(ap->lock, flags); 762 763 dev->flags &= ~ATA_DFLAG_DETACH; 764 765 if (ata_scsi_offline_dev(dev)) { 766 dev->flags |= ATA_DFLAG_DETACHED; 767 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 768 } 769 770 /* clear per-dev EH actions */ 771 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 772 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 773 774 spin_unlock_irqrestore(ap->lock, flags); 775 } 776 777 /** 778 * ata_eh_about_to_do - about to perform eh_action 779 * @ap: target ATA port 780 * @dev: target ATA dev for per-dev action (can be NULL) 781 * @action: action about to be performed 782 * 783 * Called just before performing EH actions to clear related bits 784 * in @ap->eh_info such that eh actions are not unnecessarily 785 * repeated. 786 * 787 * LOCKING: 788 * None. 789 */ 790 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 791 unsigned int action) 792 { 793 unsigned long flags; 794 struct ata_eh_info *ehi = &ap->eh_info; 795 struct ata_eh_context *ehc = &ap->eh_context; 796 797 spin_lock_irqsave(ap->lock, flags); 798 799 /* Reset is represented by combination of actions and EHI 800 * flags. Suck in all related bits before clearing eh_info to 801 * avoid losing requested action. 802 */ 803 if (action & ATA_EH_RESET_MASK) { 804 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 805 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 806 807 /* make sure all reset actions are cleared & clear EHI flags */ 808 action |= ATA_EH_RESET_MASK; 809 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 810 } 811 812 ata_eh_clear_action(dev, ehi, action); 813 814 if (!(ehc->i.flags & ATA_EHI_QUIET)) 815 ap->pflags |= ATA_PFLAG_RECOVERED; 816 817 spin_unlock_irqrestore(ap->lock, flags); 818 } 819 820 /** 821 * ata_eh_done - EH action complete 822 * @ap: target ATA port 823 * @dev: target ATA dev for per-dev action (can be NULL) 824 * @action: action just completed 825 * 826 * Called right after performing EH actions to clear related bits 827 * in @ap->eh_context. 828 * 829 * LOCKING: 830 * None. 831 */ 832 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 833 unsigned int action) 834 { 835 /* if reset is complete, clear all reset actions & reset modifier */ 836 if (action & ATA_EH_RESET_MASK) { 837 action |= ATA_EH_RESET_MASK; 838 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 839 } 840 841 ata_eh_clear_action(dev, &ap->eh_context.i, action); 842 } 843 844 /** 845 * ata_err_string - convert err_mask to descriptive string 846 * @err_mask: error mask to convert to string 847 * 848 * Convert @err_mask to descriptive string. Errors are 849 * prioritized according to severity and only the most severe 850 * error is reported. 851 * 852 * LOCKING: 853 * None. 854 * 855 * RETURNS: 856 * Descriptive string for @err_mask 857 */ 858 static const char * ata_err_string(unsigned int err_mask) 859 { 860 if (err_mask & AC_ERR_HOST_BUS) 861 return "host bus error"; 862 if (err_mask & AC_ERR_ATA_BUS) 863 return "ATA bus error"; 864 if (err_mask & AC_ERR_TIMEOUT) 865 return "timeout"; 866 if (err_mask & AC_ERR_HSM) 867 return "HSM violation"; 868 if (err_mask & AC_ERR_SYSTEM) 869 return "internal error"; 870 if (err_mask & AC_ERR_MEDIA) 871 return "media error"; 872 if (err_mask & AC_ERR_INVALID) 873 return "invalid argument"; 874 if (err_mask & AC_ERR_DEV) 875 return "device error"; 876 return "unknown error"; 877 } 878 879 /** 880 * ata_read_log_page - read a specific log page 881 * @dev: target device 882 * @page: page to read 883 * @buf: buffer to store read page 884 * @sectors: number of sectors to read 885 * 886 * Read log page using READ_LOG_EXT command. 887 * 888 * LOCKING: 889 * Kernel thread context (may sleep). 890 * 891 * RETURNS: 892 * 0 on success, AC_ERR_* mask otherwise. 893 */ 894 static unsigned int ata_read_log_page(struct ata_device *dev, 895 u8 page, void *buf, unsigned int sectors) 896 { 897 struct ata_taskfile tf; 898 unsigned int err_mask; 899 900 DPRINTK("read log page - page %d\n", page); 901 902 ata_tf_init(dev, &tf); 903 tf.command = ATA_CMD_READ_LOG_EXT; 904 tf.lbal = page; 905 tf.nsect = sectors; 906 tf.hob_nsect = sectors >> 8; 907 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 908 tf.protocol = ATA_PROT_PIO; 909 910 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 911 buf, sectors * ATA_SECT_SIZE); 912 913 DPRINTK("EXIT, err_mask=%x\n", err_mask); 914 return err_mask; 915 } 916 917 /** 918 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 919 * @dev: Device to read log page 10h from 920 * @tag: Resulting tag of the failed command 921 * @tf: Resulting taskfile registers of the failed command 922 * 923 * Read log page 10h to obtain NCQ error details and clear error 924 * condition. 925 * 926 * LOCKING: 927 * Kernel thread context (may sleep). 928 * 929 * RETURNS: 930 * 0 on success, -errno otherwise. 931 */ 932 static int ata_eh_read_log_10h(struct ata_device *dev, 933 int *tag, struct ata_taskfile *tf) 934 { 935 u8 *buf = dev->ap->sector_buf; 936 unsigned int err_mask; 937 u8 csum; 938 int i; 939 940 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 941 if (err_mask) 942 return -EIO; 943 944 csum = 0; 945 for (i = 0; i < ATA_SECT_SIZE; i++) 946 csum += buf[i]; 947 if (csum) 948 ata_dev_printk(dev, KERN_WARNING, 949 "invalid checksum 0x%x on log page 10h\n", csum); 950 951 if (buf[0] & 0x80) 952 return -ENOENT; 953 954 *tag = buf[0] & 0x1f; 955 956 tf->command = buf[2]; 957 tf->feature = buf[3]; 958 tf->lbal = buf[4]; 959 tf->lbam = buf[5]; 960 tf->lbah = buf[6]; 961 tf->device = buf[7]; 962 tf->hob_lbal = buf[8]; 963 tf->hob_lbam = buf[9]; 964 tf->hob_lbah = buf[10]; 965 tf->nsect = buf[12]; 966 tf->hob_nsect = buf[13]; 967 968 return 0; 969 } 970 971 /** 972 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 973 * @dev: device to perform REQUEST_SENSE to 974 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 975 * 976 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 977 * SENSE. This function is EH helper. 978 * 979 * LOCKING: 980 * Kernel thread context (may sleep). 981 * 982 * RETURNS: 983 * 0 on success, AC_ERR_* mask on failure 984 */ 985 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 986 { 987 struct ata_device *dev = qc->dev; 988 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 989 struct ata_port *ap = dev->ap; 990 struct ata_taskfile tf; 991 u8 cdb[ATAPI_CDB_LEN]; 992 993 DPRINTK("ATAPI request sense\n"); 994 995 /* FIXME: is this needed? */ 996 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 997 998 /* initialize sense_buf with the error register, 999 * for the case where they are -not- overwritten 1000 */ 1001 sense_buf[0] = 0x70; 1002 sense_buf[2] = qc->result_tf.feature >> 4; 1003 1004 /* some devices time out if garbage left in tf */ 1005 ata_tf_init(dev, &tf); 1006 1007 memset(cdb, 0, ATAPI_CDB_LEN); 1008 cdb[0] = REQUEST_SENSE; 1009 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1010 1011 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1012 tf.command = ATA_CMD_PACKET; 1013 1014 /* is it pointless to prefer PIO for "safety reasons"? */ 1015 if (ap->flags & ATA_FLAG_PIO_DMA) { 1016 tf.protocol = ATA_PROT_ATAPI_DMA; 1017 tf.feature |= ATAPI_PKT_DMA; 1018 } else { 1019 tf.protocol = ATA_PROT_ATAPI; 1020 tf.lbam = (8 * 1024) & 0xff; 1021 tf.lbah = (8 * 1024) >> 8; 1022 } 1023 1024 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1025 sense_buf, SCSI_SENSE_BUFFERSIZE); 1026 } 1027 1028 /** 1029 * ata_eh_analyze_serror - analyze SError for a failed port 1030 * @ap: ATA port to analyze SError for 1031 * 1032 * Analyze SError if available and further determine cause of 1033 * failure. 1034 * 1035 * LOCKING: 1036 * None. 1037 */ 1038 static void ata_eh_analyze_serror(struct ata_port *ap) 1039 { 1040 struct ata_eh_context *ehc = &ap->eh_context; 1041 u32 serror = ehc->i.serror; 1042 unsigned int err_mask = 0, action = 0; 1043 1044 if (serror & SERR_PERSISTENT) { 1045 err_mask |= AC_ERR_ATA_BUS; 1046 action |= ATA_EH_HARDRESET; 1047 } 1048 if (serror & 1049 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1050 err_mask |= AC_ERR_ATA_BUS; 1051 action |= ATA_EH_SOFTRESET; 1052 } 1053 if (serror & SERR_PROTOCOL) { 1054 err_mask |= AC_ERR_HSM; 1055 action |= ATA_EH_SOFTRESET; 1056 } 1057 if (serror & SERR_INTERNAL) { 1058 err_mask |= AC_ERR_SYSTEM; 1059 action |= ATA_EH_SOFTRESET; 1060 } 1061 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1062 ata_ehi_hotplugged(&ehc->i); 1063 1064 ehc->i.err_mask |= err_mask; 1065 ehc->i.action |= action; 1066 } 1067 1068 /** 1069 * ata_eh_analyze_ncq_error - analyze NCQ error 1070 * @ap: ATA port to analyze NCQ error for 1071 * 1072 * Read log page 10h, determine the offending qc and acquire 1073 * error status TF. For NCQ device errors, all LLDDs have to do 1074 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1075 * care of the rest. 1076 * 1077 * LOCKING: 1078 * Kernel thread context (may sleep). 1079 */ 1080 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1081 { 1082 struct ata_eh_context *ehc = &ap->eh_context; 1083 struct ata_device *dev = ap->device; 1084 struct ata_queued_cmd *qc; 1085 struct ata_taskfile tf; 1086 int tag, rc; 1087 1088 /* if frozen, we can't do much */ 1089 if (ap->pflags & ATA_PFLAG_FROZEN) 1090 return; 1091 1092 /* is it NCQ device error? */ 1093 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1094 return; 1095 1096 /* has LLDD analyzed already? */ 1097 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1098 qc = __ata_qc_from_tag(ap, tag); 1099 1100 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1101 continue; 1102 1103 if (qc->err_mask) 1104 return; 1105 } 1106 1107 /* okay, this error is ours */ 1108 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1109 if (rc) { 1110 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1111 "(errno=%d)\n", rc); 1112 return; 1113 } 1114 1115 if (!(ap->sactive & (1 << tag))) { 1116 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1117 "inactive tag %d\n", tag); 1118 return; 1119 } 1120 1121 /* we've got the perpetrator, condemn it */ 1122 qc = __ata_qc_from_tag(ap, tag); 1123 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1124 qc->err_mask |= AC_ERR_DEV; 1125 ehc->i.err_mask &= ~AC_ERR_DEV; 1126 } 1127 1128 /** 1129 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1130 * @qc: qc to analyze 1131 * @tf: Taskfile registers to analyze 1132 * 1133 * Analyze taskfile of @qc and further determine cause of 1134 * failure. This function also requests ATAPI sense data if 1135 * avaliable. 1136 * 1137 * LOCKING: 1138 * Kernel thread context (may sleep). 1139 * 1140 * RETURNS: 1141 * Determined recovery action 1142 */ 1143 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1144 const struct ata_taskfile *tf) 1145 { 1146 unsigned int tmp, action = 0; 1147 u8 stat = tf->command, err = tf->feature; 1148 1149 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1150 qc->err_mask |= AC_ERR_HSM; 1151 return ATA_EH_SOFTRESET; 1152 } 1153 1154 if (!(qc->err_mask & AC_ERR_DEV)) 1155 return 0; 1156 1157 switch (qc->dev->class) { 1158 case ATA_DEV_ATA: 1159 if (err & ATA_ICRC) 1160 qc->err_mask |= AC_ERR_ATA_BUS; 1161 if (err & ATA_UNC) 1162 qc->err_mask |= AC_ERR_MEDIA; 1163 if (err & ATA_IDNF) 1164 qc->err_mask |= AC_ERR_INVALID; 1165 break; 1166 1167 case ATA_DEV_ATAPI: 1168 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1169 tmp = atapi_eh_request_sense(qc); 1170 if (!tmp) { 1171 /* ATA_QCFLAG_SENSE_VALID is used to 1172 * tell atapi_qc_complete() that sense 1173 * data is already valid. 1174 * 1175 * TODO: interpret sense data and set 1176 * appropriate err_mask. 1177 */ 1178 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1179 } else 1180 qc->err_mask |= tmp; 1181 } 1182 } 1183 1184 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1185 action |= ATA_EH_SOFTRESET; 1186 1187 return action; 1188 } 1189 1190 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1191 { 1192 if (err_mask & AC_ERR_ATA_BUS) 1193 return 1; 1194 1195 if (err_mask & AC_ERR_TIMEOUT) 1196 return 2; 1197 1198 if (is_io) { 1199 if (err_mask & AC_ERR_HSM) 1200 return 2; 1201 if ((err_mask & 1202 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1203 return 3; 1204 } 1205 1206 return 0; 1207 } 1208 1209 struct speed_down_verdict_arg { 1210 u64 since; 1211 int nr_errors[4]; 1212 }; 1213 1214 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1215 { 1216 struct speed_down_verdict_arg *arg = void_arg; 1217 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1218 1219 if (ent->timestamp < arg->since) 1220 return -1; 1221 1222 arg->nr_errors[cat]++; 1223 return 0; 1224 } 1225 1226 /** 1227 * ata_eh_speed_down_verdict - Determine speed down verdict 1228 * @dev: Device of interest 1229 * 1230 * This function examines error ring of @dev and determines 1231 * whether NCQ needs to be turned off, transfer speed should be 1232 * stepped down, or falling back to PIO is necessary. 1233 * 1234 * Cat-1 is ATA_BUS error for any command. 1235 * 1236 * Cat-2 is TIMEOUT for any command or HSM violation for known 1237 * supported commands. 1238 * 1239 * Cat-3 is is unclassified DEV error for known supported 1240 * command. 1241 * 1242 * NCQ needs to be turned off if there have been more than 3 1243 * Cat-2 + Cat-3 errors during last 10 minutes. 1244 * 1245 * Speed down is necessary if there have been more than 3 Cat-1 + 1246 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1247 * 1248 * Falling back to PIO mode is necessary if there have been more 1249 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1250 * 1251 * LOCKING: 1252 * Inherited from caller. 1253 * 1254 * RETURNS: 1255 * OR of ATA_EH_SPDN_* flags. 1256 */ 1257 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1258 { 1259 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1260 u64 j64 = get_jiffies_64(); 1261 struct speed_down_verdict_arg arg; 1262 unsigned int verdict = 0; 1263 1264 /* scan past 10 mins of error history */ 1265 memset(&arg, 0, sizeof(arg)); 1266 arg.since = j64 - min(j64, j10mins); 1267 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1268 1269 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1270 verdict |= ATA_EH_SPDN_NCQ_OFF; 1271 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1272 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1273 1274 /* scan past 3 mins of error history */ 1275 memset(&arg, 0, sizeof(arg)); 1276 arg.since = j64 - min(j64, j5mins); 1277 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1278 1279 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1280 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1281 1282 return verdict; 1283 } 1284 1285 /** 1286 * ata_eh_speed_down - record error and speed down if necessary 1287 * @dev: Failed device 1288 * @is_io: Did the device fail during normal IO? 1289 * @err_mask: err_mask of the error 1290 * 1291 * Record error and examine error history to determine whether 1292 * adjusting transmission speed is necessary. It also sets 1293 * transmission limits appropriately if such adjustment is 1294 * necessary. 1295 * 1296 * LOCKING: 1297 * Kernel thread context (may sleep). 1298 * 1299 * RETURNS: 1300 * Determined recovery action. 1301 */ 1302 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1303 unsigned int err_mask) 1304 { 1305 unsigned int verdict; 1306 unsigned int action = 0; 1307 1308 /* don't bother if Cat-0 error */ 1309 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1310 return 0; 1311 1312 /* record error and determine whether speed down is necessary */ 1313 ata_ering_record(&dev->ering, is_io, err_mask); 1314 verdict = ata_eh_speed_down_verdict(dev); 1315 1316 /* turn off NCQ? */ 1317 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1318 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1319 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1320 dev->flags |= ATA_DFLAG_NCQ_OFF; 1321 ata_dev_printk(dev, KERN_WARNING, 1322 "NCQ disabled due to excessive errors\n"); 1323 goto done; 1324 } 1325 1326 /* speed down? */ 1327 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1328 /* speed down SATA link speed if possible */ 1329 if (sata_down_spd_limit(dev->ap) == 0) { 1330 action |= ATA_EH_HARDRESET; 1331 goto done; 1332 } 1333 1334 /* lower transfer mode */ 1335 if (dev->spdn_cnt < 2) { 1336 static const int dma_dnxfer_sel[] = 1337 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1338 static const int pio_dnxfer_sel[] = 1339 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1340 int sel; 1341 1342 if (dev->xfer_shift != ATA_SHIFT_PIO) 1343 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1344 else 1345 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1346 1347 dev->spdn_cnt++; 1348 1349 if (ata_down_xfermask_limit(dev, sel) == 0) { 1350 action |= ATA_EH_SOFTRESET; 1351 goto done; 1352 } 1353 } 1354 } 1355 1356 /* Fall back to PIO? Slowing down to PIO is meaningless for 1357 * SATA. Consider it only for PATA. 1358 */ 1359 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1360 (dev->ap->cbl != ATA_CBL_SATA) && 1361 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1362 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1363 dev->spdn_cnt = 0; 1364 action |= ATA_EH_SOFTRESET; 1365 goto done; 1366 } 1367 } 1368 1369 return 0; 1370 done: 1371 /* device has been slowed down, blow error history */ 1372 ata_ering_clear(&dev->ering); 1373 return action; 1374 } 1375 1376 /** 1377 * ata_eh_autopsy - analyze error and determine recovery action 1378 * @ap: ATA port to perform autopsy on 1379 * 1380 * Analyze why @ap failed and determine which recovery action is 1381 * needed. This function also sets more detailed AC_ERR_* values 1382 * and fills sense data for ATAPI CHECK SENSE. 1383 * 1384 * LOCKING: 1385 * Kernel thread context (may sleep). 1386 */ 1387 static void ata_eh_autopsy(struct ata_port *ap) 1388 { 1389 struct ata_eh_context *ehc = &ap->eh_context; 1390 unsigned int all_err_mask = 0; 1391 int tag, is_io = 0; 1392 u32 serror; 1393 int rc; 1394 1395 DPRINTK("ENTER\n"); 1396 1397 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1398 return; 1399 1400 /* obtain and analyze SError */ 1401 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1402 if (rc == 0) { 1403 ehc->i.serror |= serror; 1404 ata_eh_analyze_serror(ap); 1405 } else if (rc != -EOPNOTSUPP) 1406 ehc->i.action |= ATA_EH_HARDRESET; 1407 1408 /* analyze NCQ failure */ 1409 ata_eh_analyze_ncq_error(ap); 1410 1411 /* any real error trumps AC_ERR_OTHER */ 1412 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1413 ehc->i.err_mask &= ~AC_ERR_OTHER; 1414 1415 all_err_mask |= ehc->i.err_mask; 1416 1417 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1418 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1419 1420 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1421 continue; 1422 1423 /* inherit upper level err_mask */ 1424 qc->err_mask |= ehc->i.err_mask; 1425 1426 /* analyze TF */ 1427 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1428 1429 /* DEV errors are probably spurious in case of ATA_BUS error */ 1430 if (qc->err_mask & AC_ERR_ATA_BUS) 1431 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1432 AC_ERR_INVALID); 1433 1434 /* any real error trumps unknown error */ 1435 if (qc->err_mask & ~AC_ERR_OTHER) 1436 qc->err_mask &= ~AC_ERR_OTHER; 1437 1438 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1439 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1440 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1441 ehc->i.action &= ~ATA_EH_REVALIDATE; 1442 } 1443 1444 /* accumulate error info */ 1445 ehc->i.dev = qc->dev; 1446 all_err_mask |= qc->err_mask; 1447 if (qc->flags & ATA_QCFLAG_IO) 1448 is_io = 1; 1449 } 1450 1451 /* enforce default EH actions */ 1452 if (ap->pflags & ATA_PFLAG_FROZEN || 1453 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1454 ehc->i.action |= ATA_EH_SOFTRESET; 1455 else if (all_err_mask) 1456 ehc->i.action |= ATA_EH_REVALIDATE; 1457 1458 /* if we have offending qcs and the associated failed device */ 1459 if (ehc->i.dev) { 1460 /* speed down */ 1461 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1462 all_err_mask); 1463 1464 /* perform per-dev EH action only on the offending device */ 1465 ehc->i.dev_action[ehc->i.dev->devno] |= 1466 ehc->i.action & ATA_EH_PERDEV_MASK; 1467 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1468 } 1469 1470 DPRINTK("EXIT\n"); 1471 } 1472 1473 /** 1474 * ata_eh_report - report error handling to user 1475 * @ap: ATA port EH is going on 1476 * 1477 * Report EH to user. 1478 * 1479 * LOCKING: 1480 * None. 1481 */ 1482 static void ata_eh_report(struct ata_port *ap) 1483 { 1484 struct ata_eh_context *ehc = &ap->eh_context; 1485 const char *frozen, *desc; 1486 int tag, nr_failed = 0; 1487 1488 desc = NULL; 1489 if (ehc->i.desc[0] != '\0') 1490 desc = ehc->i.desc; 1491 1492 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1493 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1494 1495 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1496 continue; 1497 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1498 continue; 1499 1500 nr_failed++; 1501 } 1502 1503 if (!nr_failed && !ehc->i.err_mask) 1504 return; 1505 1506 frozen = ""; 1507 if (ap->pflags & ATA_PFLAG_FROZEN) 1508 frozen = " frozen"; 1509 1510 if (ehc->i.dev) { 1511 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1512 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1513 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1514 ehc->i.action, frozen); 1515 if (desc) 1516 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1517 } else { 1518 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1519 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1520 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1521 ehc->i.action, frozen); 1522 if (desc) 1523 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1524 } 1525 1526 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1527 static const char *dma_str[] = { 1528 [DMA_BIDIRECTIONAL] = "bidi", 1529 [DMA_TO_DEVICE] = "out", 1530 [DMA_FROM_DEVICE] = "in", 1531 [DMA_NONE] = "", 1532 }; 1533 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1534 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1535 1536 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1537 continue; 1538 1539 ata_dev_printk(qc->dev, KERN_ERR, 1540 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1541 "tag %d cdb 0x%x data %u %s\n " 1542 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1543 "Emask 0x%x (%s)\n", 1544 cmd->command, cmd->feature, cmd->nsect, 1545 cmd->lbal, cmd->lbam, cmd->lbah, 1546 cmd->hob_feature, cmd->hob_nsect, 1547 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1548 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1549 dma_str[qc->dma_dir], 1550 res->command, res->feature, res->nsect, 1551 res->lbal, res->lbam, res->lbah, 1552 res->hob_feature, res->hob_nsect, 1553 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1554 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1555 } 1556 } 1557 1558 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1559 unsigned int *classes) 1560 { 1561 int i, rc; 1562 1563 for (i = 0; i < ATA_MAX_DEVICES; i++) 1564 classes[i] = ATA_DEV_UNKNOWN; 1565 1566 rc = reset(ap, classes); 1567 if (rc) 1568 return rc; 1569 1570 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1571 * is complete and convert all ATA_DEV_UNKNOWN to 1572 * ATA_DEV_NONE. 1573 */ 1574 for (i = 0; i < ATA_MAX_DEVICES; i++) 1575 if (classes[i] != ATA_DEV_UNKNOWN) 1576 break; 1577 1578 if (i < ATA_MAX_DEVICES) 1579 for (i = 0; i < ATA_MAX_DEVICES; i++) 1580 if (classes[i] == ATA_DEV_UNKNOWN) 1581 classes[i] = ATA_DEV_NONE; 1582 1583 return 0; 1584 } 1585 1586 static int ata_eh_followup_srst_needed(int rc, int classify, 1587 const unsigned int *classes) 1588 { 1589 if (rc == -EAGAIN) 1590 return 1; 1591 if (rc != 0) 1592 return 0; 1593 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1594 return 1; 1595 return 0; 1596 } 1597 1598 static int ata_eh_reset(struct ata_port *ap, int classify, 1599 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1600 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1601 { 1602 struct ata_eh_context *ehc = &ap->eh_context; 1603 unsigned int *classes = ehc->classes; 1604 int tries = ATA_EH_RESET_TRIES; 1605 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1606 unsigned int action; 1607 ata_reset_fn_t reset; 1608 int i, did_followup_srst, rc; 1609 1610 /* about to reset */ 1611 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1612 1613 /* Determine which reset to use and record in ehc->i.action. 1614 * prereset() may examine and modify it. 1615 */ 1616 action = ehc->i.action; 1617 ehc->i.action &= ~ATA_EH_RESET_MASK; 1618 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1619 !(action & ATA_EH_HARDRESET)))) 1620 ehc->i.action |= ATA_EH_SOFTRESET; 1621 else 1622 ehc->i.action |= ATA_EH_HARDRESET; 1623 1624 if (prereset) { 1625 rc = prereset(ap); 1626 if (rc) { 1627 if (rc == -ENOENT) { 1628 ata_port_printk(ap, KERN_DEBUG, 1629 "port disabled. ignoring.\n"); 1630 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1631 1632 for (i = 0; i < ATA_MAX_DEVICES; i++) 1633 classes[i] = ATA_DEV_NONE; 1634 1635 rc = 0; 1636 } else 1637 ata_port_printk(ap, KERN_ERR, 1638 "prereset failed (errno=%d)\n", rc); 1639 return rc; 1640 } 1641 } 1642 1643 /* prereset() might have modified ehc->i.action */ 1644 if (ehc->i.action & ATA_EH_HARDRESET) 1645 reset = hardreset; 1646 else if (ehc->i.action & ATA_EH_SOFTRESET) 1647 reset = softreset; 1648 else { 1649 /* prereset told us not to reset, bang classes and return */ 1650 for (i = 0; i < ATA_MAX_DEVICES; i++) 1651 classes[i] = ATA_DEV_NONE; 1652 return 0; 1653 } 1654 1655 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1656 if (!reset) { 1657 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1658 "invalid reset type\n"); 1659 if (softreset) 1660 reset = softreset; 1661 else 1662 reset = hardreset; 1663 } 1664 1665 retry: 1666 /* shut up during boot probing */ 1667 if (verbose) 1668 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1669 reset == softreset ? "soft" : "hard"); 1670 1671 /* mark that this EH session started with reset */ 1672 ehc->i.flags |= ATA_EHI_DID_RESET; 1673 1674 rc = ata_do_reset(ap, reset, classes); 1675 1676 did_followup_srst = 0; 1677 if (reset == hardreset && 1678 ata_eh_followup_srst_needed(rc, classify, classes)) { 1679 /* okay, let's do follow-up softreset */ 1680 did_followup_srst = 1; 1681 reset = softreset; 1682 1683 if (!reset) { 1684 ata_port_printk(ap, KERN_ERR, 1685 "follow-up softreset required " 1686 "but no softreset avaliable\n"); 1687 return -EINVAL; 1688 } 1689 1690 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1691 rc = ata_do_reset(ap, reset, classes); 1692 1693 if (rc == 0 && classify && 1694 classes[0] == ATA_DEV_UNKNOWN) { 1695 ata_port_printk(ap, KERN_ERR, 1696 "classification failed\n"); 1697 return -EINVAL; 1698 } 1699 } 1700 1701 if (rc && --tries) { 1702 const char *type; 1703 1704 if (reset == softreset) { 1705 if (did_followup_srst) 1706 type = "follow-up soft"; 1707 else 1708 type = "soft"; 1709 } else 1710 type = "hard"; 1711 1712 ata_port_printk(ap, KERN_WARNING, 1713 "%sreset failed, retrying in 5 secs\n", type); 1714 ssleep(5); 1715 1716 if (reset == hardreset) 1717 sata_down_spd_limit(ap); 1718 if (hardreset) 1719 reset = hardreset; 1720 goto retry; 1721 } 1722 1723 if (rc == 0) { 1724 /* After the reset, the device state is PIO 0 and the 1725 * controller state is undefined. Record the mode. 1726 */ 1727 for (i = 0; i < ATA_MAX_DEVICES; i++) 1728 ap->device[i].pio_mode = XFER_PIO_0; 1729 1730 if (postreset) 1731 postreset(ap, classes); 1732 1733 /* reset successful, schedule revalidation */ 1734 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1735 ehc->i.action |= ATA_EH_REVALIDATE; 1736 } 1737 1738 return rc; 1739 } 1740 1741 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1742 struct ata_device **r_failed_dev) 1743 { 1744 struct ata_eh_context *ehc = &ap->eh_context; 1745 struct ata_device *dev; 1746 unsigned int new_mask = 0; 1747 unsigned long flags; 1748 int i, rc = 0; 1749 1750 DPRINTK("ENTER\n"); 1751 1752 /* For PATA drive side cable detection to work, IDENTIFY must 1753 * be done backwards such that PDIAG- is released by the slave 1754 * device before the master device is identified. 1755 */ 1756 for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { 1757 unsigned int action, readid_flags = 0; 1758 1759 dev = &ap->device[i]; 1760 action = ata_eh_dev_action(dev); 1761 1762 if (ehc->i.flags & ATA_EHI_DID_RESET) 1763 readid_flags |= ATA_READID_POSTRESET; 1764 1765 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1766 if (ata_port_offline(ap)) { 1767 rc = -EIO; 1768 goto err; 1769 } 1770 1771 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1772 rc = ata_dev_revalidate(dev, readid_flags); 1773 if (rc) 1774 goto err; 1775 1776 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1777 1778 /* Configuration may have changed, reconfigure 1779 * transfer mode. 1780 */ 1781 ehc->i.flags |= ATA_EHI_SETMODE; 1782 1783 /* schedule the scsi_rescan_device() here */ 1784 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1785 } else if (dev->class == ATA_DEV_UNKNOWN && 1786 ehc->tries[dev->devno] && 1787 ata_class_enabled(ehc->classes[dev->devno])) { 1788 dev->class = ehc->classes[dev->devno]; 1789 1790 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1791 dev->id); 1792 switch (rc) { 1793 case 0: 1794 new_mask |= 1 << i; 1795 break; 1796 case -ENOENT: 1797 /* IDENTIFY was issued to non-existent 1798 * device. No need to reset. Just 1799 * thaw and kill the device. 1800 */ 1801 ata_eh_thaw_port(ap); 1802 dev->class = ATA_DEV_UNKNOWN; 1803 break; 1804 default: 1805 dev->class = ATA_DEV_UNKNOWN; 1806 goto err; 1807 } 1808 } 1809 } 1810 1811 /* Configure new devices forward such that user doesn't see 1812 * device detection messages backwards. 1813 */ 1814 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1815 dev = &ap->device[i]; 1816 1817 if (!(new_mask & (1 << i))) 1818 continue; 1819 1820 ehc->i.flags |= ATA_EHI_PRINTINFO; 1821 rc = ata_dev_configure(dev); 1822 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1823 if (rc) 1824 goto err; 1825 1826 spin_lock_irqsave(ap->lock, flags); 1827 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1828 spin_unlock_irqrestore(ap->lock, flags); 1829 1830 /* new device discovered, configure xfermode */ 1831 ehc->i.flags |= ATA_EHI_SETMODE; 1832 } 1833 1834 return 0; 1835 1836 err: 1837 *r_failed_dev = dev; 1838 DPRINTK("EXIT rc=%d\n", rc); 1839 return rc; 1840 } 1841 1842 #ifdef CONFIG_PM 1843 /** 1844 * ata_eh_suspend - handle suspend EH action 1845 * @ap: target host port 1846 * @r_failed_dev: result parameter to indicate failing device 1847 * 1848 * Handle suspend EH action. Disk devices are spinned down and 1849 * other types of devices are just marked suspended. Once 1850 * suspended, no EH action to the device is allowed until it is 1851 * resumed. 1852 * 1853 * LOCKING: 1854 * Kernel thread context (may sleep). 1855 * 1856 * RETURNS: 1857 * 0 on success, -errno otherwise 1858 */ 1859 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1860 { 1861 struct ata_device *dev; 1862 int i, rc = 0; 1863 1864 DPRINTK("ENTER\n"); 1865 1866 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1867 unsigned long flags; 1868 unsigned int action, err_mask; 1869 1870 dev = &ap->device[i]; 1871 action = ata_eh_dev_action(dev); 1872 1873 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1874 continue; 1875 1876 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1877 1878 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1879 1880 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1881 /* flush cache */ 1882 rc = ata_flush_cache(dev); 1883 if (rc) 1884 break; 1885 1886 /* spin down */ 1887 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1888 if (err_mask) { 1889 ata_dev_printk(dev, KERN_ERR, "failed to " 1890 "spin down (err_mask=0x%x)\n", 1891 err_mask); 1892 rc = -EIO; 1893 break; 1894 } 1895 } 1896 1897 spin_lock_irqsave(ap->lock, flags); 1898 dev->flags |= ATA_DFLAG_SUSPENDED; 1899 spin_unlock_irqrestore(ap->lock, flags); 1900 1901 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1902 } 1903 1904 if (rc) 1905 *r_failed_dev = dev; 1906 1907 DPRINTK("EXIT\n"); 1908 return rc; 1909 } 1910 1911 /** 1912 * ata_eh_prep_resume - prep for resume EH action 1913 * @ap: target host port 1914 * 1915 * Clear SUSPENDED in preparation for scheduled resume actions. 1916 * This allows other parts of EH to access the devices being 1917 * resumed. 1918 * 1919 * LOCKING: 1920 * Kernel thread context (may sleep). 1921 */ 1922 static void ata_eh_prep_resume(struct ata_port *ap) 1923 { 1924 struct ata_device *dev; 1925 unsigned long flags; 1926 int i; 1927 1928 DPRINTK("ENTER\n"); 1929 1930 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1931 unsigned int action; 1932 1933 dev = &ap->device[i]; 1934 action = ata_eh_dev_action(dev); 1935 1936 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1937 continue; 1938 1939 spin_lock_irqsave(ap->lock, flags); 1940 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1941 spin_unlock_irqrestore(ap->lock, flags); 1942 } 1943 1944 DPRINTK("EXIT\n"); 1945 } 1946 1947 /** 1948 * ata_eh_resume - handle resume EH action 1949 * @ap: target host port 1950 * @r_failed_dev: result parameter to indicate failing device 1951 * 1952 * Handle resume EH action. Target devices are already reset and 1953 * revalidated. Spinning up is the only operation left. 1954 * 1955 * LOCKING: 1956 * Kernel thread context (may sleep). 1957 * 1958 * RETURNS: 1959 * 0 on success, -errno otherwise 1960 */ 1961 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1962 { 1963 struct ata_device *dev; 1964 int i, rc = 0; 1965 1966 DPRINTK("ENTER\n"); 1967 1968 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1969 unsigned int action, err_mask; 1970 1971 dev = &ap->device[i]; 1972 action = ata_eh_dev_action(dev); 1973 1974 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1975 continue; 1976 1977 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1978 1979 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1980 err_mask = ata_do_simple_cmd(dev, 1981 ATA_CMD_IDLEIMMEDIATE); 1982 if (err_mask) { 1983 ata_dev_printk(dev, KERN_ERR, "failed to " 1984 "spin up (err_mask=0x%x)\n", 1985 err_mask); 1986 rc = -EIO; 1987 break; 1988 } 1989 } 1990 1991 ata_eh_done(ap, dev, ATA_EH_RESUME); 1992 } 1993 1994 if (rc) 1995 *r_failed_dev = dev; 1996 1997 DPRINTK("EXIT\n"); 1998 return 0; 1999 } 2000 #endif /* CONFIG_PM */ 2001 2002 static int ata_port_nr_enabled(struct ata_port *ap) 2003 { 2004 int i, cnt = 0; 2005 2006 for (i = 0; i < ATA_MAX_DEVICES; i++) 2007 if (ata_dev_enabled(&ap->device[i])) 2008 cnt++; 2009 return cnt; 2010 } 2011 2012 static int ata_port_nr_vacant(struct ata_port *ap) 2013 { 2014 int i, cnt = 0; 2015 2016 for (i = 0; i < ATA_MAX_DEVICES; i++) 2017 if (ap->device[i].class == ATA_DEV_UNKNOWN) 2018 cnt++; 2019 return cnt; 2020 } 2021 2022 static int ata_eh_skip_recovery(struct ata_port *ap) 2023 { 2024 struct ata_eh_context *ehc = &ap->eh_context; 2025 int i; 2026 2027 /* skip if all possible devices are suspended */ 2028 for (i = 0; i < ata_port_max_devices(ap); i++) { 2029 struct ata_device *dev = &ap->device[i]; 2030 2031 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 2032 break; 2033 } 2034 2035 if (i == ata_port_max_devices(ap)) 2036 return 1; 2037 2038 /* thaw frozen port, resume link and recover failed devices */ 2039 if ((ap->pflags & ATA_PFLAG_FROZEN) || 2040 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 2041 return 0; 2042 2043 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2044 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2045 struct ata_device *dev = &ap->device[i]; 2046 2047 if (dev->class == ATA_DEV_UNKNOWN && 2048 ehc->classes[dev->devno] != ATA_DEV_NONE) 2049 return 0; 2050 } 2051 2052 return 1; 2053 } 2054 2055 /** 2056 * ata_eh_recover - recover host port after error 2057 * @ap: host port to recover 2058 * @prereset: prereset method (can be NULL) 2059 * @softreset: softreset method (can be NULL) 2060 * @hardreset: hardreset method (can be NULL) 2061 * @postreset: postreset method (can be NULL) 2062 * 2063 * This is the alpha and omega, eum and yang, heart and soul of 2064 * libata exception handling. On entry, actions required to 2065 * recover the port and hotplug requests are recorded in 2066 * eh_context. This function executes all the operations with 2067 * appropriate retrials and fallbacks to resurrect failed 2068 * devices, detach goners and greet newcomers. 2069 * 2070 * LOCKING: 2071 * Kernel thread context (may sleep). 2072 * 2073 * RETURNS: 2074 * 0 on success, -errno on failure. 2075 */ 2076 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2077 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2078 ata_postreset_fn_t postreset) 2079 { 2080 struct ata_eh_context *ehc = &ap->eh_context; 2081 struct ata_device *dev; 2082 int i, rc; 2083 2084 DPRINTK("ENTER\n"); 2085 2086 /* prep for recovery */ 2087 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2088 dev = &ap->device[i]; 2089 2090 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2091 2092 /* collect port action mask recorded in dev actions */ 2093 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2094 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2095 2096 /* process hotplug request */ 2097 if (dev->flags & ATA_DFLAG_DETACH) 2098 ata_eh_detach_dev(dev); 2099 2100 if (!ata_dev_enabled(dev) && 2101 ((ehc->i.probe_mask & (1 << dev->devno)) && 2102 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2103 ata_eh_detach_dev(dev); 2104 ata_dev_init(dev); 2105 ehc->did_probe_mask |= (1 << dev->devno); 2106 ehc->i.action |= ATA_EH_SOFTRESET; 2107 } 2108 } 2109 2110 retry: 2111 rc = 0; 2112 2113 /* if UNLOADING, finish immediately */ 2114 if (ap->pflags & ATA_PFLAG_UNLOADING) 2115 goto out; 2116 2117 /* prep for resume */ 2118 ata_eh_prep_resume(ap); 2119 2120 /* skip EH if possible. */ 2121 if (ata_eh_skip_recovery(ap)) 2122 ehc->i.action = 0; 2123 2124 for (i = 0; i < ATA_MAX_DEVICES; i++) 2125 ehc->classes[i] = ATA_DEV_UNKNOWN; 2126 2127 /* reset */ 2128 if (ehc->i.action & ATA_EH_RESET_MASK) { 2129 ata_eh_freeze_port(ap); 2130 2131 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2132 softreset, hardreset, postreset); 2133 if (rc) { 2134 ata_port_printk(ap, KERN_ERR, 2135 "reset failed, giving up\n"); 2136 goto out; 2137 } 2138 2139 ata_eh_thaw_port(ap); 2140 } 2141 2142 /* revalidate existing devices and attach new ones */ 2143 rc = ata_eh_revalidate_and_attach(ap, &dev); 2144 if (rc) 2145 goto dev_fail; 2146 2147 /* resume devices */ 2148 rc = ata_eh_resume(ap, &dev); 2149 if (rc) 2150 goto dev_fail; 2151 2152 /* configure transfer mode if necessary */ 2153 if (ehc->i.flags & ATA_EHI_SETMODE) { 2154 rc = ata_set_mode(ap, &dev); 2155 if (rc) 2156 goto dev_fail; 2157 ehc->i.flags &= ~ATA_EHI_SETMODE; 2158 } 2159 2160 /* suspend devices */ 2161 rc = ata_eh_suspend(ap, &dev); 2162 if (rc) 2163 goto dev_fail; 2164 2165 goto out; 2166 2167 dev_fail: 2168 ehc->tries[dev->devno]--; 2169 2170 switch (rc) { 2171 case -EINVAL: 2172 /* eeek, something went very wrong, give up */ 2173 ehc->tries[dev->devno] = 0; 2174 break; 2175 2176 case -ENODEV: 2177 /* device missing or wrong IDENTIFY data, schedule probing */ 2178 ehc->i.probe_mask |= (1 << dev->devno); 2179 /* give it just one more chance */ 2180 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2181 case -EIO: 2182 if (ehc->tries[dev->devno] == 1) { 2183 /* This is the last chance, better to slow 2184 * down than lose it. 2185 */ 2186 sata_down_spd_limit(ap); 2187 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2188 } 2189 } 2190 2191 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2192 /* disable device if it has used up all its chances */ 2193 ata_dev_disable(dev); 2194 2195 /* detach if offline */ 2196 if (ata_port_offline(ap)) 2197 ata_eh_detach_dev(dev); 2198 2199 /* probe if requested */ 2200 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2201 !(ehc->did_probe_mask & (1 << dev->devno))) { 2202 ata_eh_detach_dev(dev); 2203 ata_dev_init(dev); 2204 2205 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2206 ehc->did_probe_mask |= (1 << dev->devno); 2207 ehc->i.action |= ATA_EH_SOFTRESET; 2208 } 2209 } else { 2210 /* soft didn't work? be haaaaard */ 2211 if (ehc->i.flags & ATA_EHI_DID_RESET) 2212 ehc->i.action |= ATA_EH_HARDRESET; 2213 else 2214 ehc->i.action |= ATA_EH_SOFTRESET; 2215 } 2216 2217 if (ata_port_nr_enabled(ap)) { 2218 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2219 "devices, retrying in 5 secs\n"); 2220 ssleep(5); 2221 } else { 2222 /* no device left, repeat fast */ 2223 msleep(500); 2224 } 2225 2226 goto retry; 2227 2228 out: 2229 if (rc) { 2230 for (i = 0; i < ATA_MAX_DEVICES; i++) 2231 ata_dev_disable(&ap->device[i]); 2232 } 2233 2234 DPRINTK("EXIT, rc=%d\n", rc); 2235 return rc; 2236 } 2237 2238 /** 2239 * ata_eh_finish - finish up EH 2240 * @ap: host port to finish EH for 2241 * 2242 * Recovery is complete. Clean up EH states and retry or finish 2243 * failed qcs. 2244 * 2245 * LOCKING: 2246 * None. 2247 */ 2248 static void ata_eh_finish(struct ata_port *ap) 2249 { 2250 int tag; 2251 2252 /* retry or finish qcs */ 2253 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2254 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2255 2256 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2257 continue; 2258 2259 if (qc->err_mask) { 2260 /* FIXME: Once EH migration is complete, 2261 * generate sense data in this function, 2262 * considering both err_mask and tf. 2263 */ 2264 if (qc->err_mask & AC_ERR_INVALID) 2265 ata_eh_qc_complete(qc); 2266 else 2267 ata_eh_qc_retry(qc); 2268 } else { 2269 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2270 ata_eh_qc_complete(qc); 2271 } else { 2272 /* feed zero TF to sense generation */ 2273 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2274 ata_eh_qc_retry(qc); 2275 } 2276 } 2277 } 2278 } 2279 2280 /** 2281 * ata_do_eh - do standard error handling 2282 * @ap: host port to handle error for 2283 * @prereset: prereset method (can be NULL) 2284 * @softreset: softreset method (can be NULL) 2285 * @hardreset: hardreset method (can be NULL) 2286 * @postreset: postreset method (can be NULL) 2287 * 2288 * Perform standard error handling sequence. 2289 * 2290 * LOCKING: 2291 * Kernel thread context (may sleep). 2292 */ 2293 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2294 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2295 ata_postreset_fn_t postreset) 2296 { 2297 ata_eh_autopsy(ap); 2298 ata_eh_report(ap); 2299 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2300 ata_eh_finish(ap); 2301 } 2302 2303 #ifdef CONFIG_PM 2304 /** 2305 * ata_eh_handle_port_suspend - perform port suspend operation 2306 * @ap: port to suspend 2307 * 2308 * Suspend @ap. 2309 * 2310 * LOCKING: 2311 * Kernel thread context (may sleep). 2312 */ 2313 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2314 { 2315 unsigned long flags; 2316 int rc = 0; 2317 2318 /* are we suspending? */ 2319 spin_lock_irqsave(ap->lock, flags); 2320 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2321 ap->pm_mesg.event == PM_EVENT_ON) { 2322 spin_unlock_irqrestore(ap->lock, flags); 2323 return; 2324 } 2325 spin_unlock_irqrestore(ap->lock, flags); 2326 2327 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2328 2329 /* suspend */ 2330 ata_eh_freeze_port(ap); 2331 2332 if (ap->ops->port_suspend) 2333 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2334 2335 /* report result */ 2336 spin_lock_irqsave(ap->lock, flags); 2337 2338 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2339 if (rc == 0) 2340 ap->pflags |= ATA_PFLAG_SUSPENDED; 2341 else 2342 ata_port_schedule_eh(ap); 2343 2344 if (ap->pm_result) { 2345 *ap->pm_result = rc; 2346 ap->pm_result = NULL; 2347 } 2348 2349 spin_unlock_irqrestore(ap->lock, flags); 2350 2351 return; 2352 } 2353 2354 /** 2355 * ata_eh_handle_port_resume - perform port resume operation 2356 * @ap: port to resume 2357 * 2358 * Resume @ap. 2359 * 2360 * This function also waits upto one second until all devices 2361 * hanging off this port requests resume EH action. This is to 2362 * prevent invoking EH and thus reset multiple times on resume. 2363 * 2364 * On DPM resume, where some of devices might not be resumed 2365 * together, this may delay port resume upto one second, but such 2366 * DPM resumes are rare and 1 sec delay isn't too bad. 2367 * 2368 * LOCKING: 2369 * Kernel thread context (may sleep). 2370 */ 2371 static void ata_eh_handle_port_resume(struct ata_port *ap) 2372 { 2373 unsigned long timeout; 2374 unsigned long flags; 2375 int i, rc = 0; 2376 2377 /* are we resuming? */ 2378 spin_lock_irqsave(ap->lock, flags); 2379 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2380 ap->pm_mesg.event != PM_EVENT_ON) { 2381 spin_unlock_irqrestore(ap->lock, flags); 2382 return; 2383 } 2384 spin_unlock_irqrestore(ap->lock, flags); 2385 2386 /* spurious? */ 2387 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2388 goto done; 2389 2390 if (ap->ops->port_resume) 2391 rc = ap->ops->port_resume(ap); 2392 2393 /* give devices time to request EH */ 2394 timeout = jiffies + HZ; /* 1s max */ 2395 while (1) { 2396 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2397 struct ata_device *dev = &ap->device[i]; 2398 unsigned int action = ata_eh_dev_action(dev); 2399 2400 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2401 !(action & ATA_EH_RESUME)) 2402 break; 2403 } 2404 2405 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2406 break; 2407 msleep(10); 2408 } 2409 2410 done: 2411 spin_lock_irqsave(ap->lock, flags); 2412 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2413 if (ap->pm_result) { 2414 *ap->pm_result = rc; 2415 ap->pm_result = NULL; 2416 } 2417 spin_unlock_irqrestore(ap->lock, flags); 2418 } 2419 #endif /* CONFIG_PM */ 2420